Skip to content
Dummy Agro Project
import pandas as pd
import numpy as np
# Simulated data for 5 crops across 4 regions
regions = ['Region A', 'Region B', 'Region C', 'Region D']
crops = ['Maize', 'Tomatoes', 'Rice', 'Potatoes', 'Cassava']
weather_conditions = ['High Rain', 'Low Rain', 'Normal', 'High Temp', 'Low Temp']
# Simulate weather data and post-harvest loss percentages for each crop in each region
data = []
for region in regions:
for crop in crops:
for weather in weather_conditions:
loss_percentage = np.random.uniform(5, 40) # Random loss between 5% to 40%
data.append([region, crop, weather, loss_percentage])
# Create DataFrame
df = pd.DataFrame(data, columns=['Region', 'Crop', 'Weather', 'Loss Percentage'])
# Display first few rows
df.head()
# Let's simulate missing values in the data
df.iloc[5, 3] = np.nan # Introduce a missing value in 'Loss Percentage'
# Fill missing data with the mean value (for simplicity)
df['Loss Percentage'].fillna(df['Loss Percentage'].mean(), inplace=True)
# Clean: Ensure all columns are in the correct format (e.g., 'Loss Percentage' should be a float)
df['Loss Percentage'] = df['Loss Percentage'].astype(float)
# Display the cleaned data
df.head()
import plotly.express as px
# Heatmap for region-wise loss percentages
region_loss_map = df.groupby(['Region'])['Loss Percentage'].mean().reset_index()
fig = px.bar(region_loss_map, x='Region', y='Loss Percentage', title='Average Post-Harvest Loss by Region')
fig.show()
# Loss percentage by crop
crop_loss_map = df.groupby(['Crop'])['Loss Percentage'].mean().reset_index()
fig2 = px.bar(crop_loss_map, x='Crop', y='Loss Percentage', title='Average Post-Harvest Loss by Crop')
fig2.show()
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import LabelEncoder
# Encode categorical data
label_encoder = LabelEncoder()
df['Region_encoded'] = label_encoder.fit_transform(df['Region'])
df['Crop_encoded'] = label_encoder.fit_transform(df['Crop'])
df['Weather_encoded'] = label_encoder.fit_transform(df['Weather'])
# Features (Region, Crop, Weather) and target (Loss Percentage)
X = df[['Region_encoded', 'Crop_encoded', 'Weather_encoded']]
y = df['Loss Percentage']
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train a Decision Tree Regressor
model = DecisionTreeRegressor()
model.fit(X_train, y_train)
# Predict loss percentage for the test set
y_pred = model.predict(X_test)
# Evaluate the model (basic R^2 score for now)
score = model.score(X_test, y_test)
score