Skip to content
!pip install arch
#Importing project libraries
#Data Manipulation
import pandas as pd
import numpy as np
#Data Viz
import matplotlib.pyplot as plt
import seaborn as sns
#Optimization & Tests
from scipy.optimize import minimize
from scipy.stats import kstest, norm, probplot, anderson
# Econometric Var
from arch import arch_model
# Ensamble Methods libraries:
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from xgboost import XGBRegressor
#dataframe
df = pd.read_csv("crypto_prices.csv")
df.head()
#"date" to datetime
df["date"] = pd.to_datetime(df["date"])
df_train = df[df['date'] < '2021-07-01']
df_test = df[df['date'] >= '2021-07-01']
df_train['date'] = pd.to_datetime(df_train['date'])
df_test['date'] = pd.to_datetime(df_test['date'])
# "date" as index
df.set_index("date", inplace=True)
print(df.columns)
#time series for each cryptocurrency
df.plot(subplots=True, figsize=(10, 10))
plt.show()
#bitcoin prices: training data
df_train.plot(x='date', y='bitcoin')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Bitcoin Prices - Training Data')
plt.show()
#bitcoin prices: testing data
df_test.plot(x='date', y='bitcoin')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Bitcoin Prices - Test Data')
plt.show()
plt.figure(figsize=(12, 6))
df_test.plot(x='date', y=['bitcoin', 'bitcoin-cash', 'ethereum', 'ethereum-classic', 'litecoin', 'monero', 'ripple', 'stellar', 'cardano'])
plt.xlabel('Date')
plt.ylabel('Prices')
plt.title('Crypto Prices - Testing Data')
plt.show()
df_train['date'] = pd.to_datetime(df_train['date'])
df_train_returns = df_train.drop(columns=['date']).pct_change()
df_train_returns['date'] = df_train['date']
df_train_returns = df_train_returns.iloc[1:]
df_train_returns.drop(columns=['date']).hist(bins=100, figsize=(15, 10))
plt.tight_layout()
plt.show()
correlation_matrix = df_train_returns.drop(columns=['date']).corr()
df_train_returns.plot(x='date', y=df_train_returns.columns.drop('date'), figsize=(12, 6))
plt.xlabel('Date')
plt.ylabel('Returns')
plt.title('Evolution of Returns')
plt.legend(loc='upper left')
plt.show()
#correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title('Correlation Matrix - Training Data')
plt.show()
#covariance matrix: training data returns
covariance_matrix = df_train_returns.drop(columns=['date']).cov()
plt.figure(figsize=(10, 8))
sns.heatmap(covariance_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title('Covariance Matrix - Training Data')
plt.show()
df_test_returns = df_test.drop(columns=['date']).pct_change()
df_test_returns['date'] = pd.to_datetime(df_test['date'])
df_test_returns = df_test_returns.iloc[1:]
df_test_returns.drop(columns=['date']).hist(bins=100, figsize=(15, 10))
plt.tight_layout()
plt.show()
df_test_returns.plot(x='date', y=df_test_returns.columns.drop('date'), figsize=(12, 6))
plt.xlabel('Date')
plt.ylabel('Returns')
plt.title('Evolution of Returns')
plt.legend(loc='upper left')
plt.show()
df_test_returns.info()
df_test_returns['date'] = pd.to_datetime(df_test_returns['date'])
df_test_returns.info()
df_test_returns = df_test_returns.dropna()