Skip to content

Start Exploring DataSeta

# importing
import pandas as pd

df = pd.read_csv('Iris.csv')

print(df.head(20))
print(df.shape)
print(df.corr())
media = pd.DataFrame(df.groupby('Species')[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']].mean()).sort_values('SepalLengthCm').reset_index()

print(media)

Relationship between SepalLenghtCm and PetalLenghtCm

set(df.Species)
import matplotlib.pyplot as plt

colors = {'Iris-setosa':'Blue','Iris-versicolor':'Green','Iris-virginica':'red'}
SpecieColor = df.Species.map(colors)

plt.scatter(df.SepalLengthCm,df.PetalLengthCm, color = SpecieColor)
plt.title('Relacion entre Sepal and Petal Lenght in cm')
plt.xlabel('Sepal Lenght (cm)')
plt.ylabel('Petal Lenght (cm)')
plt.show()

print(df.SepalLengthCm)

plt.clf()
plt.hist(df.SepalLengthCm,alpha =0.5)
plt.hist(df.PetalLengthCm,alpha =0.5)
plt.show()