Skip to content

Time Series Analysis in Python

Predicting NY Average Temperatures 1872 - 2046

Imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import (
    plot_acf, 
    plot_pacf,
    plot_predict,
)
from statsmodels.tsa.arima.model import ARIMA

Load temp_NY dataframe, provided by NOAA

  • We'll set the index to the DATE column
  • Display the head() and tail() of the data
temp_NY = pd.read_csv(
"datasets/NOAA_TAVG.csv",
index_col = 'DATE')

temp_NY.head(), temp_NY.tail()

Convert DATE (index) to_datetime(), %Y format

temp_NY.index = pd.to_datetime(
    temp_NY.index, 
    format='%Y',
)

temp_NY.head()

Display the min and max dates

temp_NY.index.min(), temp_NY.index.max()

Plot the average temperatures

temp_NY.plot()
plt.title = "Average Temperature in New York (1870-2016)"
plt.xlabel("Year")
plt.ylabel("Temperature (F)")
plt.show()

Apply the adfuller test and return the P-value

# Compute and print ADF p-value
result = adfuller(temp_NY['TAVG'])
print(f"P-value for the ADF test is {round(result[1],3)}")

Show change in avg temeerpature

chg_temp = temp_NY.diff()
chg_temp.dropna(inplace=True)