Skip to content

Preliminaries

Base Libraries

# Importing base libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd

Final Loads

# Loading datasets
survey_l2 = pd.read_csv("survey_l2.csv")

# Define the order of factor levels
order_likert = ['Strongly Disagree', 'Disagree', 'Neutral', "Agree", "Strongly Agree"]
order_age = ['24 years old and below', '25-45 years old', '46-60 years old', "61 years old and above"]
order_edu = ["Other","Primary/ Secondary School Student", "Post-Secondary (JC/ Polytechnic/ Other Diploma)", "Degree (Bachelor's/ Post-Graduate Diploma/ Cert, e.g. WSQ)", "Master's/ Doctoral"]

# Set columns as an ordered categorical type
survey_l2['knowledge'] = pd.Categorical(survey_l2['knowledge'], categories=order_likert, ordered=True)
survey_l2['action'] = pd.Categorical(survey_l2['action'], categories=order_likert, ordered=True)
survey_l2['highest_education'] = pd.Categorical(survey_l2['highest_education'], categories=order_edu, ordered=True)
survey_l2['age_bracket'] = pd.Categorical(survey_l2['age_bracket'], categories=order_age, ordered=True)

# Filter
survey = survey_l2[['age_bracket', 'gender', 'residency', 'region', 'highest_education', 'sector', 'conservation_field', 'knowledge', 'action']].copy()
# Instagram
ig_follows_oct = [1, 3, 6, 8, 8, 
                  3, 11, 5, 4, 8,
                 10, 14, 10, 13, 18,
                 7, 4, 16, 11, 7,
                 11, 20, 11, 9, 25,
                 18, 13, 18, 20, 11,
                 18, 46, 26, 23, 13,
                 13, 2]
ig = pd.read_csv("ig_l2.csv", parse_dates=["datetime"])
# Setting index

Data Preparation & Cleaning

Level 1 Report

# Importing datasets
survey_raw = pd.read_csv("survey.csv", parse_dates=["datetime"])
print("PARTICIPANTS SURVEY")
print(survey_raw.info())
orgs_raw = pd.read_csv("orgs.csv", parse_dates=["datetime"])
print("ORGANIZATIONS SURVEY")
print(orgs_raw.info())
ig_raw = pd.read_csv("ig.csv", parse_dates=["datetime"])
ig_raw = ig_raw.rename(columns={"hastags":"hashtags"})
print("INSTAGRAM")
print(ig_raw.info())
wix_raw = pd.read_csv("wix.csv", parse_dates=["Date"], date_format="%d/%m/%Y")
print("WEBSITE")
print(wix_raw.info())

Dealing with missing values

# Setting missing <address_region> values as 'Others'
survey_raw['address_region'] = survey_raw['address_region'].fillna('Others')
survey_raw.info()