Skip to content
fonctions utiles
1-fonction pour afficher une courbe, titre et les ticks de la même couleur
import matplotlib.pyplot as plt
# Define a function called plot_timeseries
def plot_timeseries(axes, x, y, color, xlabel, ylabel):
# Plot the inputs x,y in the provided color
axes.plot(x, y, color=color)
# Set the x-axis label
axes.set_xlabel(xlabel)
# Set the y-axis label
axes.set_ylabel(ylabel, color=color)
# Set the colors tick params for y-axis
axes.tick_params('y', colors=color)
Fonction pour découper en morceau.
# Define count_entries()
def count_entries(csv_file,c_size,colname):
"""Return a dictionary with counts of
occurrences as value for each key."""
# Initialize an empty dictionary: counts_dict
counts_dict = {}
# Iterate over the file chunk by chunk
for chunk in pd.read_csv(csv_file,chunksize=c_size):
# Iterate over the column in DataFrame
for entry in chunk[colname]:
if entry in counts_dict.keys():
counts_dict[entry] += 1
else:
counts_dict[entry] = 1
# Return counts_dict
return counts_dict
# Call count_entries(): result_counts
result_counts = count_entries('tweets.csv',10,'lang')
# Print result_counts
print(result_counts)
transformer listes en dict
# Define lists2dict()
def lists2dict(list1, list2):
"""Return a dictionary where list1 provides
the keys and list2 provides the values."""
# Zip lists: zipped_lists
zipped_lists = zip(list1, list2)
# Create a dictionary: rs_dict
rs_dict = dict(zipped_lists)
# Return the dictionary
return rs_dict
# Call lists2dict: rs_fxn
rs_fxn = lists2dict(feature_names,row_vals)
rechercher un mot dans un fichier texte
import re
def word_in_text(word, text):
word = word.lower()
text = text.lower()
match = re.search(word, text)
if match:
return True
return False
For plotting it is easiest to use the plot method that is built into DataFrames. To get two lines into the same plot we need to use a trick you might not have seen before. If df1 and df2 are two DataFrames you can plot their data together like this:
By capturing the ax object and giving it as an argument in the plot statement we get both lines in the same plot.
ax = df1.plot(x="col_a", y="col_b",
label="df1")
df2.plot(x="col_a", y="col_b",
label="df2", ax=ax, ylabel="Y Axis Label")
Plot training et test accuracies knn
# Add a title
plt.title("NN: Varying Number of Neighbors")
# Plot training accuracies
plt.plot(neighbors, train_accuracies.values(), label="Training Accuracy")
# Plot test accuracies
plt.plot(neighbors, test_accuracies.values(), label="Testing Accuracy")
plt.legend()
plt.xlabel("Number of Neighbors")
plt.ylabel("Accuracy")
# Display the plot
plt.show()
Evaluer le meilleur modèle
models = {"Linear Regression": LinearRegression(), "Ridge": Ridge(alpha=0.1), "Lasso": Lasso(alpha=0.1)}
results = []
# Loop through the models' values
for model in models.values():
kf = KFold(n_splits=6, random_state=42, shuffle=True)
# Perform cross-validation
cv_scores = cross_val_score(model, X_train, y_train, cv=kf)
# Append the results
results.append(cv_scores)
# Create a box plot of the results
plt.boxplot(results, labels=models.keys())
plt.show()