Skip to content
import requests

# URLs of the NetCDF files
urls = [
    "https://www.ncei.noaa.gov/thredds/fileServer/cdr/ozone-zonal-mean-esrl/Ozone-ESRL-CDR_v01r01_altitude_s19790707_e20140707_c20140808.nc",
    "https://www.ncei.noaa.gov/thredds/fileServer/cdr/ozone-zonal-mean-esrl/Ozone-ESRL-CDR_v01r01_pressurelevel_s19790707_e20140707_c20140808.nc"
]

# Filenames for the downloaded files
file_names = ["ozone_altitude_data.nc", "ozone_pressurelevel_data.nc"]

# Download the files
for url, file_name in zip(urls, file_names):
    response = requests.get(url)
    if response.status_code == 200:
        with open(file_name, 'wb') as f:
            f.write(response.content)
        print(f"File downloaded successfully: {file_name}")
    else:
        print(f"Failed to download the file from {url}. HTTP status code: {response.status_code}")
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Load the CSV files
altitude_df = pd.read_csv('ozone_altitude_data.csv')
pressurelevel_df = pd.read_csv('ozone_pressurelevel_data.csv')

# Function to clean data
def clean_data(df):
    # Handle missing values
    df = df.dropna()  # Drop rows with missing values
    
    # Handle outliers (example: using IQR method)
    Q1 = df.quantile(0.25)
    Q3 = df.quantile(0.75)
    IQR = Q3 - Q1
    df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]
    
    return df

# Clean the data
altitude_df_cleaned = clean_data(altitude_df)
pressurelevel_df_cleaned = clean_data(pressurelevel_df)

# Function to normalize and standardize data
def normalize_and_standardize(df):
    # Normalize data (Min-Max Scaling)
    min_max_scaler = MinMaxScaler()
    df_normalized = pd.DataFrame(min_max_scaler.fit_transform(df), columns=df.columns)
    
    # Standardize data (Z-score normalization)
    standard_scaler = StandardScaler()
    df_standardized = pd.DataFrame(standard_scaler.fit_transform(df_normalized), columns=df.columns)
    
    return df_standardized

# Normalize and standardize the cleaned data
altitude_df_final = normalize_and_standardize(altitude_df_cleaned)
pressurelevel_df_final = normalize_and_standardize(pressurelevel_df_cleaned)

# Save the cleaned and normalized data to new CSV files
altitude_df_final.to_csv('ozone_altitude_data_cleaned.csv', index=False)
pressurelevel_df_final.to_csv('ozone_pressurelevel_data_cleaned.csv', index=False)

# Provide download links for the cleaned data
import IPython.display as display

print("Download cleaned and normalized data:")
display.display(display.FileLink('ozone_altitude_data_cleaned.csv'))
display.display(display.FileLink('ozone_pressurelevel_data_cleaned.csv'))
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the cleaned data
altitude_df = pd.read_csv('ozone_altitude_data_cleaned.csv')
pressurelevel_df = pd.read_csv('ozone_pressurelevel_data_cleaned.csv')

# Summary statistics
print("Altitude Data Summary:")
print(altitude_df.describe())

print("\nPressure Level Data Summary:")
print(pressurelevel_df.describe())

# Visualizations
# Histograms
altitude_df.hist(figsize=(10, 8))
plt.suptitle('Altitude Data Distributions')
plt.show()

pressurelevel_df.hist(figsize=(10, 8))
plt.suptitle('Pressure Level Data Distributions')
plt.show()

# Pairplot to see relationships
sns.pairplot(altitude_df)
plt.suptitle('Altitude Data Relationships')
plt.show()

sns.pairplot(pressurelevel_df)
plt.suptitle('Pressure Level Data Relationships')
plt.show()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Load the cleaned data
altitude_df = pd.read_csv('ozone_altitude_data_cleaned.csv')
pressurelevel_df = pd.read_csv('ozone_pressurelevel_data_cleaned.csv')

# Feature Engineering
# Example: Creating a new feature as a ratio of two existing features
altitude_df['ozone_ratio'] = altitude_df['NOAA_Climate_Data_Record_of_Zonal_Mean_Ozone_in_Mixing_Ratio_Tier1_4'] / altitude_df['z']
pressurelevel_df['ozone_ratio'] = pressurelevel_df['NOAA_Climate_Data_Record_of_Zonal_Mean_Ozone_in_Mixing_Ratio_Tier1_4'] / pressurelevel_df['z']

# Handle missing values (if any)
altitude_df.fillna(altitude_df.mean(), inplace=True)
pressurelevel_df.fillna(pressurelevel_df.mean(), inplace=True)

# Normalize the data
scaler = StandardScaler()
altitude_df_scaled = pd.DataFrame(scaler.fit_transform(altitude_df), columns=altitude_df.columns)
pressurelevel_df_scaled = pd.DataFrame(scaler.fit_transform(pressurelevel_df), columns=pressurelevel_df.columns)

# Feature Selection
# Correlation matrix
corr_altitude = altitude_df_scaled.corr()
corr_pressurelevel = pressurelevel_df_scaled.corr()

# Plotting the correlation matrix
plt.figure(figsize=(12, 8))
sns.heatmap(corr_altitude, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix - Altitude Data')
plt.show()

plt.figure(figsize=(12, 8))
sns.heatmap(corr_pressurelevel, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix - Pressure Level Data')
plt.show()

# Selecting features with high correlation to the target variable
# Assuming 'NOAA_Climate_Data_Record_of_Zonal_Mean_Ozone_in_Mixing_Ratio_Tier1_4' is the target variable
selected_features_altitude = corr_altitude['NOAA_Climate_Data_Record_of_Zonal_Mean_Ozone_in_Mixing_Ratio_Tier1_4'][corr_altitude['NOAA_Climate_Data_Record_of_Zonal_Mean_Ozone_in_Mixing_Ratio_Tier1_4'].abs() > 0.5].index.tolist()
selected_features_pressurelevel = corr_pressurelevel['NOAA_Climate_Data_Record_of_Zonal_Mean_Ozone_in_Mixing_Ratio_Tier1_4'][corr_pressurelevel['NOAA_Climate_Data_Record_of_Zonal_Mean_Ozone_in_Mixing_Ratio_Tier1_4'].abs() > 0.5].index.tolist()

print('Selected Features for Altitude Data:', selected_features_altitude)
print('Selected Features for Pressure Level Data:', selected_features_pressurelevel)
import plotly.express as px
import pandas as pd
from IPython.display import display, HTML

# Load the ozone data
ozone_altitude_data = pd.read_csv('ozone_altitude_data.csv')
ozone_pressurelevel_data = pd.read_csv('ozone_pressurelevel_data.csv')

# Function to create a line plot
def create_line_plot(df, title):
    fig = px.line(df, x='time', y='NOAA_Climate_Data_Record_of_Zonal_Mean_Ozone_in_Mixing_Ratio_Tier1_4', title=title)
    return fig

# Create plots for both datasets
fig_alt = create_line_plot(ozone_altitude_data, 'Ozone Levels Over Time (Altitude Data)')
fig_press = create_line_plot(ozone_pressurelevel_data, 'Ozone Levels Over Time (Pressure Level Data)')

# Display the plots
fig_alt.show()
fig_press.show()

# Define recommendations and alerts (dummy data for illustration)
recommendations_alt = ["Increase monitoring frequency", "Deploy additional sensors"]
alerts_alt = ["High ozone levels detected", "Sensor malfunction"]
recommendations_press = ["Calibrate pressure sensors", "Review data collection methods"]
alerts_press = ["Pressure anomaly detected", "Data inconsistency"]

# Display recommendations and alerts
recommendations_html_alt = '<ul>' + ''.join([f'<li>{rec}</li>' for rec in recommendations_alt]) + '</ul>'
alerts_html_alt = '<ul>' + ''.join([f'<li>{alert}</li>' for alert in alerts_alt]) + '</ul>'
recommendations_html_press = '<ul>' + ''.join([f'<li>{rec}</li>' for rec in recommendations_press]) + '</ul>'
alerts_html_press = '<ul>' + ''.join([f'<li>{alert}</li>' for alert in alerts_press]) + '</ul>'

html_content = f'''
<h1>Ozone Data Dashboard</h1>
<h2>Altitude Data</h2>
<h3>Recommendations</h3>
{recommendations_html_alt}
<h3>Alerts</h3>
{alerts_html_alt}
<h2>Pressure Level Data</h2>
<h3>Recommendations</h3>
{recommendations_html_press}
<h3>Alerts</h3>
{alerts_html_press}
'''

# Display the HTML content
display(HTML(html_content))
import plotly.express as px
import pandas as pd
from ipywidgets import interact, widgets
from IPython.display import display, HTML

# Load the ozone data
ozone_altitude_data = pd.read_csv('ozone_altitude_data.csv')
ozone_pressurelevel_data = pd.read_csv('ozone_pressurelevel_data.csv')

# Define the datasets and variables
datasets = {
    'altitude': ozone_altitude_data,
    'pressure': ozone_pressurelevel_data
}

variables = {
    'Ozone Levels': 'NOAA_Climate_Data_Record_of_Zonal_Mean_Ozone_in_Mixing_Ratio_Tier1_4',
    'Latitude': 'lat',
    'Altitude/Pressure': 'z'
}

# Function to update the plot based on selected dataset and variable
def update_plot(dataset, variable):
    df = datasets[dataset]
    var = variables[variable]
    fig = px.line(df, x='time', y=var, title=f'{variable} Over Time ({dataset.capitalize()} Data)')
    fig.show()
    
    # Display recommendations and alerts
    if dataset == 'altitude':
        recommendations = recommendations_alt
        alerts = alerts_alt
    else:
        recommendations = recommendations_press
        alerts = alerts_press
    
    recommendations_html = '<ul>' + ''.join([f'<li>{rec}</li>' for rec in recommendations]) + '</ul>'
    alerts_html = '<ul>' + ''.join([f'<li>{alert}</li>' for alert in alerts]) + '</ul>'
    
    html_content = f'''
    <h2>Recommendations</h2>
    {recommendations_html}
    <h2>Alerts</h2>
    {alerts_html}
    '''
    display(HTML(html_content))

# Create interactive widgets
dataset_widget = widgets.Dropdown(
    options=['altitude', 'pressure'],
    value='altitude',
    description='Dataset:'
)

variable_widget = widgets.Dropdown(
    options=['Ozone Levels', 'Latitude', 'Altitude/Pressure'],
    value='Ozone Levels',
    description='Variable:'
)

# Display the interactive widgets and plot
interact(update_plot, dataset=dataset_widget, variable=variable_widget)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the ozone data
ozone_altitude_data = pd.read_csv('ozone_altitude_data.csv')
ozone_pressurelevel_data = pd.read_csv('ozone_pressurelevel_data.csv')

# Trend Analysis
def plot_trend(df, title):
    df['time'] = pd.to_datetime(df['time'], unit='s')
    df.set_index('time', inplace=True)
    df['NOAA_Climate_Data_Record_of_Zonal_Mean_Ozone_in_Mixing_Ratio_Tier1_4'].plot(figsize=(10, 6), title=title)
    plt.xlabel('Time')
    plt.ylabel('Ozone Levels')
    plt.show()

plot_trend(ozone_altitude_data, 'Trend Analysis: Ozone Levels Over Time (Altitude Data)')
plot_trend(ozone_pressurelevel_data, 'Trend Analysis: Ozone Levels Over Time (Pressure Level Data)')

# Anomaly Detection
def detect_anomalies(df, variable):
    df['z_score'] = (df[variable] - df[variable].mean()) / df[variable].std()
    anomalies = df[np.abs(df['z_score']) > 3]
    return anomalies

anomalies_alt = detect_anomalies(ozone_altitude_data, 'NOAA_Climate_Data_Record_of_Zonal_Mean_Ozone_in_Mixing_Ratio_Tier1_4')
anomalies_press = detect_anomalies(ozone_pressurelevel_data, 'NOAA_Climate_Data_Record_of_Zonal_Mean_Ozone_in_Mixing_Ratio_Tier1_4')

print("Anomalies in Altitude Data:")
print(anomalies_alt)
print("Anomalies in Pressure Level Data:")
print(anomalies_press)

# Correlation Analysis
def plot_correlation_matrix(df, title):
    corr = df.corr()
    plt.figure(figsize=(10, 8))
    sns.heatmap(corr, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
    plt.title(title)
    plt.show()

plot_correlation_matrix(ozone_altitude_data, 'Correlation Matrix: Altitude Data')
plot_correlation_matrix(ozone_pressurelevel_data, 'Correlation Matrix: Pressure Level Data')
import requests

# URLs of the NetCDF files
urls = [
    "https://www.ncei.noaa.gov/thredds/fileServer/cdr/ozone-zonal-mean-esrl/Ozone-ESRL-CDR_v01r01_altitude_s19790707_e20140707_c20140808.nc",
    "https://www.ncei.noaa.gov/thredds/fileServer/cdr/ozone-zonal-mean-esrl/Ozone-ESRL-CDR_v01r01_pressurelevel_s19790707_e20140707_c20140808.nc"
]

# Filenames for the downloaded files
file_names = ["ozone_altitude_data.nc", "ozone_pressurelevel_data.nc"]

# Download the files
for url, file_name in zip(urls, file_names):
    response = requests.get(url)
    if response.status_code == 200:
        with open(file_name, 'wb') as f:
            f.write(response.content)
        print(f"File downloaded successfully: {file_name}")
    else:
        print(f"Failed to download the file from {url}. HTTP status code: {response.status_code}")
import os

if not os.path.exists('project'):
    os.mkdir('project')
os.chdir('project')
mkdir code docs app
import os
import shutil  # Import shutil module

source_dir = 'code'  # Replace with actual path
destination_dir = 'app'  # Replace with actual path

# Check if source directory exists
if not os.path.exists(source_dir):
    raise FileNotFoundError(f"The source directory '{source_dir}' does not exist.")

# Create destination directory if it does not exist
if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)

# Proceed with file operations (e.g., copying files)
# Example: Copying files from source to destination
for filename in os.listdir(source_dir):
    source_file = os.path.join(source_dir, filename)
    destination_file = os.path.join(destination_dir, filename)
    shutil.copy(source_file, destination_file)
# Ozone Analysis Toolkit

## Overview
# This toolkit provides a comprehensive solution for analyzing and visualizing ozone data. 
# It includes data ingestion, analysis, and visualization components.

## Directory Structure
# - `code/`: Contains all the code files, including Python scripts and Jupyter notebooks.
# - `docs/`: Contains documentation files, such as user guides and installation guides.
# - `app/`: Contains files necessary to package the application, including `setup.py` and `requirements.txt`.

## Installation
# To install the toolkit, navigate to the `app` directory and run:
# ```bash
# pip install .
# ```
### Example `LICENSE`
license_text = """
MIT License

Copyright (c) 2023 [Naila Rais]

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

[...]
"""
print(license_text)
# Assuming 'ozone_analysis' is a function or a variable that should be defined, 
# here is an example of how you might define it:

def ozone_analysis():
    # Placeholder for the actual analysis code
    print("Performing ozone analysis...")

# Now calling the function
ozone_analysis()
import os

# Create the main project directory
project_dir = 'project'
os.makedirs(project_dir, exist_ok=True)

# Create subdirectories
subdirs = ['code', 'docs', 'app']
for subdir in subdirs:
    os.makedirs(os.path.join(project_dir, subdir), exist_ok=True)

# Create placeholder files in the respective directories
files = {
    'code': ['data_ingestion.py', 'data_analysis.ipynb', 'visualization.ipynb'],
    'docs': ['user_guide.pdf', 'installation_guide.pdf'],
    'app': ['setup.py', 'requirements.txt', 'main.py'],
    '': ['README.md', 'LICENSE']
}

for subdir, filenames in files.items():
    for filename in filenames:
        filepath = os.path.join(project_dir, subdir, filename)
        with open(filepath, 'w') as f:
            f.write(f'# Placeholder for {filename}')

print(f"Directory structure created under '{project_dir}'")