Skip to content
My Python Toolbox
###### Subsetting list
# my_list[0::3] # print every 3rd element starting at 0
# my_list[1::2] # print every 2nd element starting at 1
###### SETS - non-repeating
# s = {}
# set() .union(), .difference(), .intersection(), .symmetric_difference()
###### TUPLES - inmutable
# t = ()
###### ITERATORS
# iter(iterable) -> iteretor
# print(*iterator) # prints all elements and closes the iterator
###### ITERABLE: ZIP
# zip() -> joins lists or tuples into an object that matches firsts, seconds, thirds, etc..
# since zip creates a value pair, when "unzipping" with * you can use:
# value1, value2 = zip(*my_zip)
# rs_dict = dict(zipped_lists)
###### LIST COMPREHENSIONS
# squares = [i**2 for i in range(10)]
# Create a 5 x 5 matrix using a list of lists: matrix
# matrix = [[col for col in range(5)] for row in range(5)]
# [num ** 2 if num % 2 == 0 else 0 for num in range(20)]
# >>>[0, 0, 4, 0, 16, 0 36, 0, 64, 0]
# Using list comprehensions to filter a nested dictionary
# Use a for loop to iterate over the squirrels in Tompkins Square Park:
#for squirrel in squirrels_by_park["Tompkins Square Park"]:
# Safely print the activities of each squirrel or None
# print(squirrel.get("activities"))
# Print the list of 'Cinnamon' primary_fur_color squirrels in Union Square Park
#print([squirrel for squirrel in squirrels_by_park["Union Square Park"] if "Cinnamon" in squirrel["primary_fur_color"]])
# nums_list2 = [*range(1,12,2)] unpacks built-in iterable range and defines the list in one go, all odd from 1 to 11
###### DICT COMPREHENSIONS
# Create dict comprehension: new_fellowship
# new_fellowship = {member: len(member) for member in fellowship}
###### GENERATORS
# lannister = ['cersei', 'jaime', 'tywin', 'tyrion', 'joffrey']
# lengths = (len(person) for person in lannister)
###### GENERATOR FUNCTION
# def get_lengths(input_list):
# """Generator function that yields the
# length of the strings in input_list."""
#
# # Yield the length of a string
# for person in input_list:
# yield len(person)
###### DATA FRAMES filtering
# df_pop_ceb = df_urb_pop[df_urb_pop['CountryCode'] == 'CEB']
# assert salary_by_year.isna().sum().sum() == 0
# assert salary_by_year.isnull().any().any() == False #first any does per column, next does across all
################### WRITING EFFICIENT PYTHON CODE ###################
###### TESTING AND OPTIMIZATION
# %timeit -r(runs) -n(loops) <one-liner>
# %%timeit -r -n <beginning of multiple lines>
# <indexed line of loop or conditional>
# <indexed line of loop or conditional>
# pip install line_profiler
# >>>%load_ext line_profiler
# >>>%lprun -f (function no '()') (function call)
# >>>?%lprun
# pip install memory_profiler
# >>>%load_ext memory_profiler
# from your_file(NO.py)) import you_function
# >>>%mprun -f (function no '()') (function call) MUST BE IN FILE and imported
# >>>?%mprun
###### COLLECTIONS
# from collections import Counter
# Counter([some list or dict comprehension for filtering]) --> dictionary {key(original_key): value(count)}
###### ITERTOOLS
# from itertools import combinations
# Collect all possible combinations of 4 Pokémon directly into a list
# combos_4 = [*combinations(pokemon, 4)]
#top_3 = sorted(poke_list_np, key=lambda x: x[1], reverse=True)[:3]
###### PANDAS EFFICIENCIES (DATAFRAMES and series)
# df.info() can give similar results to R's str() "structure" method
# rangers_df.describe().transpose() similar to R's summary()
# len(DATAFRAME) -> # of rows
# .iterrows() -> tuple(index, pandas series)
# for row_tuple in team_wins_df.iterrows():
# print(row_tuple[1]['Team'])
# .itertuples() -> namedtuplewhith fields accessible using attribute lookup i.e. :
# for now_namedtuple in team_wins_df.itertuples():
# print(row_namedtuple)
# print(row_namedtuple.Index)
# print(row_namedtuple.Team) etc...
# df.apply(function, iertable?)
# df.apply(lambda x: function, iterable?)
# df['column'].values -> a Numpy array capable of broadcasting
# win_perc_preds_np = predict_win_perc(baseball_df['RS'].values, baseball_df['RA'].values)
# baseball_df['WP_preds'] = win_perc_preds_np
# import inspect to .getdoc or .getargs for a function. Returns docstring.
# sphinx and pydoc automatically generate online documentation for you based off of you docstrings.
# Use the "stock('NVDA')" context manager
#with stock('NVDA') as nvda:
# Open "NVDA.txt" for writing as f_out
# with open("NVDA.txt", "w") as f_out:
# for _ in range(10): #########################################
# value = nvda.price()
# print('Logging ${:.2f} for NVDA'.format(value))
# f_out.write('{:.2f}\n'.format(value))
######################### DECORATORS #########################
#from functools import wraps ###################################
#def print_before_and_after(func):
# @wraps(func) --> preserves wrapped functions metadata ##################
# def wrapper(*args, **kwargs):
# print('Before {}'.format(func.__name__))
# # Call the function being decorated with *args
# func(*args, **kwargs)
# print('After {}'.format(func.__name__)) ###############################
# Return the nested function
# return wrapper
#For decorators to accept args, you create a func that returns a decorater,
#def returns(return_type):
# def decorator(func):
# def wrapper(*args, **kwargs):
# result = func
# assert type(result) == return_type
# return func
# return wrapper
# return decorator
# help(function)
Explore Datasets
Use the DataFrame imported in the first cell to explore the data and practice your skills!
- Write a function that takes a timestamp (see column
timestamp_ms
) and returns the text of any tweet published at that timestamp. Additionally, make it so that users can pass column names as flexible arguments (*args
) so that the function can print out any other columns users want to see. - In a
filter()
call, write a lambda function to return tweets created on a Tuesday. Tip: look at the first three characters of thecreated_at
column. - Make sure to add error handling on the functions you've created!