Coding — DataLab

import sklearn.datasets

wine= sklearn.datasets.load_wine()
from sklearn.svm import LinearSVC

svm = LinearSVC()

svm.fit(wine.data, wine.target)
svm.score(wine.data, wine.target)
print(wine.data.shape)
len(wine.data)

import sklearn.datasets

wine= sklearn.datasets.load_wine()
array = wine.target
array.shape[0]

Underfitting = model is too simple, low training accuracy. model not flexible enough to approximate f (the function that fits the unseen data with very low error) Overfitting = model is too complex, low test accuracy. Model fits training set noise. Generalization error = Bias^2 + Variance + irreducible error Bias = On average how distinct is your model from the function f (that whom describe the unseen data very good) Variance = how the model is inconsistent over different training sets (High variance models lead to overfitting) Model Complexity = flexibility of your model to approximate the true function f (whom describes the unseen very good). e.g (increasing maximum tree depth equals more complexity, leading to overfitting) High variance model = CV error of model > training set error of model. Overfitting on training set, decrease model complexity High bias model = CV error of model ~ training set error of model >> desired error. Underfittin training set, increase model complexity, gather more relevant features.

a = ("John", "Charles", "Mike")
b = ("Jenny", "Christy", "Monica")
c = ("Hola", "Chao", "Adios")
x = zip(a, b, c)

#use the tuple() function to display a readable version of the result:
print(tuple(x))
print(type(tuple(x)))

import numpy as np
print(np.exp(1.09887868721))
print(1+np.exp(1.09887868721))
print(np.log(np.exp(1.09887868721)))
print(np.log(1+np.exp(1.09887868721)))

from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
RMSE = MSE(y_test, y_pred)**(1/2)

import numpy as np
np.arange(0.05,1.05,0.05)

import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({'labels':['A', 'A', 'B', 'B', 'B', 'A']})
colors = {'A':'red', 'B':'blue'}
df['labels'].apply(lambda x: colors[x])
asd = colors['A']
df['labels'].apply(lambda x: colors[x])
df

import random
random.sample(range(0, 10), 5)

import numpy as np
array = np.array([[-3, 4, 7, 8, 9]])
print(array.shape)
array[0]