Skip to content
Top 10 Stock Analysis
Analysis of Stock Market Data
Importing necessary packages
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_errorLoading the data
# Load data from the csv file
df = pd.read_csv('stock_data.csv', index_col=None)Understand the variables
# Understand your variables
variables = pd.DataFrame(columns=['Variable','Number of unique values','Values'])
for i, var in enumerate(df.columns):
variables.loc[i] = [var, df[var].nunique(), df[var].unique().tolist()]
# Join with the variables dataframe
var_dict = pd.read_csv('variable_explanation.csv', index_col=0)
variables.set_index('Variable').join(var_dict)Exploratory Analysis of the Data
# descriptive statistics
(df.describe())#checking first few rows of the data
display(df.head(10))#data types
display(df.info())# converting Date from object type to Datetime type
df['Date'] = pd.to_datetime(df['Date'], format = '%Y-%m-%d', errors='coerce')
df.info()#checking null values
df.isna().sum()#checking for duplicates
df.duplicated().sum()