Skip to content
#!python3 -m pip install --upgrade pip
Hidden output
#!pip install tensorflow==2.17.0
Hidden output
import tensorflow as tf
print(tf.__version__)

Start

# Load the dataset
import pandas as pd

df = pd.read_csv('Data_cleaned_Dataset.csv', parse_dates=['Trade Date'], index_col='Trade Date')

#df=df[['Electricity: Wtd Avg Price $/MWh','Electricity: Daily Volume MWh','Natural Gas: Henry Hub Natural Gas Spot Price (Dollars per Million Btu)','pjm_load sum in MW (daily)','temperature mean in C (daily): US','Weekday']]


# Display the first few rows of the updated dataset
df.head()
import matplotlib.pyplot as plt
import numpy as np

# Resampling the temperature data to monthly mean
df_monthly_temp = df['temperature mean in C (daily): US']

# Using the provided monthly Net_generated electricity data
df_monthly_electricity = df['Net_generated electricity in United States : all fuels (utility-scale) in thousand MWH (Monthly)']

# Plotting the data
fig, ax1 = plt.subplots()

# Plotting Net_generated electricity with linear scaling
ax1.set_xlabel('Date')
ax1.set_ylabel('Net Generated Electricity (thousand MWH)', color='tab:blue')
line1, = ax1.plot(df_monthly_electricity.index, df_monthly_electricity, color='tab:blue', label='Net Generated Electricity')
ax1.tick_params(axis='y', labelcolor='tab:blue')

# Creating a second y-axis to plot temperature
ax2 = ax1.twinx()
ax2.set_ylabel('Temperature Mean (C)', color='tab:red')
line2, = ax2.plot(df_monthly_temp.index, df_monthly_temp, color='tab:red', label='Temperature Mean')
ax2.tick_params(axis='y', labelcolor='tab:red')

# Adding a title
plt.title('Net Generated Electricity and Temperature Mean')

# Adding a legend below the plot
fig.legend(handles=[line1, line2], loc='upper center', , ncol=2)

# Displaying the plot
plt.show()
df['Net_generated electricity in United States : all fuels (utility-scale) in thousand MWH (Monthly)']
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np  # Import numpy

# Convert 'Weekday' to numerical values
df['Weekday_num'] = df['Weekday'].astype('category').cat.codes

# Rename the columns in the dataframe
df.rename(columns={
    'Electricity: Wtd Avg Price $/MWh': 'Electricity_Price',
    'Natural Gas: Henry Hub Natural Gas Spot Price (Dollars per Million Btu)': 'Natural_Gas_Price',
    'pjm_load sum in MW (daily)': 'PJM_Load'
}, inplace=True)

# Select the relevant columns for correlation
columns_of_interest = [
    'Weekday_num', 
    'Electricity_Price', 
    'Natural_Gas_Price', 
    'PJM_Load'
]

# Calculate the correlation matrix
correlation_matrix = df[columns_of_interest].corr()

# Plot the correlation matrix with viridis color map
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='viridis', fmt='.2f', linewidths=0.5, mask=np.triu(correlation_matrix))
plt.title('Correlation Matrix')
plt.xticks(rotation=0)  # Rotate x-axis labels to 0 degrees
plt.show()
# Assuming 'Net_generated_electricity' is a column in the dataframe 'df'
# and 'temperature mean in C (daily): US' is already present in 'df'

# Plotting the graph with logarithmic scaling
plt.figure(figsize=(12, 6))

# Plot Net_generated_electricity
plt.plot(df.index, df['Net_generated_electricity'], label='Net Generated Electricity (thousand MWH)', color='blue')

# Plot temperature mean
plt.plot(df.index, df['temperature mean in C (daily): US'], label='Temperature Mean (C)', color='red')

# Set logarithmic scale
plt.yscale('log')

# Adding labels and title
plt.xlabel('Date')
plt.ylabel('Value (Log Scale)')
plt.title('Net Generated Electricity and Temperature Mean with Logarithmic Scaling')
plt.legend()

# Show the plot
plt.show()

Instead of interpolating, I will drop the nulls

df.dropna(subset=['Electricity: Wtd Avg Price $/MWh'],inplace=True)
df.interpolate(subset=['Natural Gas: Henry Hub Natural Gas Spot Price (Dollars per Million Btu)'],inplace=True)
df.isna().sum().sum()
mean_non_zero = df[df['Electricity: Wtd Avg Price $/MWh'] != 0]['Electricity: Wtd Avg Price $/MWh'].mean()
df.loc[df['Electricity: Wtd Avg Price $/MWh'] == 0, 'Electricity: Wtd Avg Price $/MWh'] = mean_non_zero
df['Electricity: Wtd Avg Price $/MWh'].min()
df