Skip to content

Analysis of Stock Market Data

Importing necessary packages

import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

Loading the data

# Load data from the csv file
df = pd.read_csv('stock_data.csv', index_col=None)

Understand the variables

# Understand your variables
variables = pd.DataFrame(columns=['Variable','Number of unique values','Values'])

for i, var in enumerate(df.columns):
    variables.loc[i] = [var, df[var].nunique(), df[var].unique().tolist()]
    
# Join with the variables dataframe
var_dict = pd.read_csv('variable_explanation.csv', index_col=0)
variables.set_index('Variable').join(var_dict)

Exploratory Analysis of the Data

# descriptive statistics
(df.describe())
#checking first few rows of the data
display(df.head(10))
#data types
display(df.info())
# converting Date from object type to Datetime type
df['Date'] = pd.to_datetime(df['Date'], format = '%Y-%m-%d', errors='coerce')
df.info()
#checking null values
df.isna().sum()
#checking for duplicates
df.duplicated().sum()