Dummy Agro Project

import pandas as pd
import numpy as np

# Simulated data for 5 crops across 4 regions
regions = ['Region A', 'Region B', 'Region C', 'Region D']
crops = ['Maize', 'Tomatoes', 'Rice', 'Potatoes', 'Cassava']
weather_conditions = ['High Rain', 'Low Rain', 'Normal', 'High Temp', 'Low Temp']

# Simulate weather data and post-harvest loss percentages for each crop in each region
data = []
for region in regions:
    for crop in crops:
        for weather in weather_conditions:
            loss_percentage = np.random.uniform(5, 40)  # Random loss between 5% to 40%
            data.append([region, crop, weather, loss_percentage])

# Create DataFrame
df = pd.DataFrame(data, columns=['Region', 'Crop', 'Weather', 'Loss Percentage'])

# Display first few rows
df.head()

# Let's simulate missing values in the data
df.iloc[5, 3] = np.nan  # Introduce a missing value in 'Loss Percentage'

# Fill missing data with the mean value (for simplicity)
df['Loss Percentage'].fillna(df['Loss Percentage'].mean(), inplace=True)

# Clean: Ensure all columns are in the correct format (e.g., 'Loss Percentage' should be a float)
df['Loss Percentage'] = df['Loss Percentage'].astype(float)

# Display the cleaned data
df.head()

import plotly.express as px

# Heatmap for region-wise loss percentages
region_loss_map = df.groupby(['Region'])['Loss Percentage'].mean().reset_index()
fig = px.bar(region_loss_map, x='Region', y='Loss Percentage', title='Average Post-Harvest Loss by Region')
fig.show()

# Loss percentage by crop
crop_loss_map = df.groupby(['Crop'])['Loss Percentage'].mean().reset_index()
fig2 = px.bar(crop_loss_map, x='Crop', y='Loss Percentage', title='Average Post-Harvest Loss by Crop')
fig2.show()

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import LabelEncoder

# Encode categorical data
label_encoder = LabelEncoder()
df['Region_encoded'] = label_encoder.fit_transform(df['Region'])
df['Crop_encoded'] = label_encoder.fit_transform(df['Crop'])
df['Weather_encoded'] = label_encoder.fit_transform(df['Weather'])

# Features (Region, Crop, Weather) and target (Loss Percentage)
X = df[['Region_encoded', 'Crop_encoded', 'Weather_encoded']]
y = df['Loss Percentage']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Regressor
model = DecisionTreeRegressor()
model.fit(X_train, y_train)

# Predict loss percentage for the test set
y_pred = model.predict(X_test)

# Evaluate the model (basic R^2 score for now)
score = model.score(X_test, y_test)
score