Skip to content
fonctions utiles
1-fonction pour afficher une courbe, titre et les ticks de la même couleur
import matplotlib.pyplot as plt
# Define a function called plot_timeseries
def plot_timeseries(axes, x, y, color, xlabel, ylabel):
# Plot the inputs x,y in the provided color
axes.plot(x, y, color=color)
# Set the x-axis label
axes.set_xlabel(xlabel)
# Set the y-axis label
axes.set_ylabel(ylabel, color=color)
# Set the colors tick params for y-axis
axes.tick_params('y', colors=color)Fonction pour découper en morceau.
# Define count_entries()
def count_entries(csv_file,c_size,colname):
"""Return a dictionary with counts of
occurrences as value for each key."""
# Initialize an empty dictionary: counts_dict
counts_dict = {}
# Iterate over the file chunk by chunk
for chunk in pd.read_csv(csv_file,chunksize=c_size):
# Iterate over the column in DataFrame
for entry in chunk[colname]:
if entry in counts_dict.keys():
counts_dict[entry] += 1
else:
counts_dict[entry] = 1
# Return counts_dict
return counts_dict
# Call count_entries(): result_counts
result_counts = count_entries('tweets.csv',10,'lang')
# Print result_counts
print(result_counts)transformer listes en dict
# Define lists2dict()
def lists2dict(list1, list2):
"""Return a dictionary where list1 provides
the keys and list2 provides the values."""
# Zip lists: zipped_lists
zipped_lists = zip(list1, list2)
# Create a dictionary: rs_dict
rs_dict = dict(zipped_lists)
# Return the dictionary
return rs_dict
# Call lists2dict: rs_fxn
rs_fxn = lists2dict(feature_names,row_vals)rechercher un mot dans un fichier texte
import re
def word_in_text(word, text):
word = word.lower()
text = text.lower()
match = re.search(word, text)
if match:
return True
return FalseFor plotting it is easiest to use the plot method that is built into DataFrames. To get two lines into the same plot we need to use a trick you might not have seen before. If df1 and df2 are two DataFrames you can plot their data together like this:
By capturing the ax object and giving it as an argument in the plot statement we get both lines in the same plot.
ax = df1.plot(x="col_a", y="col_b",
label="df1")
df2.plot(x="col_a", y="col_b",
label="df2", ax=ax, ylabel="Y Axis Label")Plot training et test accuracies knn
# Add a title
plt.title("NN: Varying Number of Neighbors")
# Plot training accuracies
plt.plot(neighbors, train_accuracies.values(), label="Training Accuracy")
# Plot test accuracies
plt.plot(neighbors, test_accuracies.values(), label="Testing Accuracy")
plt.legend()
plt.xlabel("Number of Neighbors")
plt.ylabel("Accuracy")
# Display the plot
plt.show()Evaluer le meilleur modèle
models = {"Linear Regression": LinearRegression(), "Ridge": Ridge(alpha=0.1), "Lasso": Lasso(alpha=0.1)}
results = []
# Loop through the models' values
for model in models.values():
kf = KFold(n_splits=6, random_state=42, shuffle=True)
# Perform cross-validation
cv_scores = cross_val_score(model, X_train, y_train, cv=kf)
# Append the results
results.append(cv_scores)
# Create a box plot of the results
plt.boxplot(results, labels=models.keys())
plt.show()