Skip to content
3 hidden cells
53 hidden cells
11 hidden cells
Apache spark big data analytics for LSCMO
3 hidden cells
#FURTHER EXPLORATORY DATA ANALYSIS
53 hidden cells
11 hidden cells
Run cancelled
1.1 EXPLORATORY DATA ANALYSIS OF THE ODS FILE IN PYTHON
import pandas as pd
!pip install ezodf
import ezodf
def read_ods_file():
doc = ezodf.opendoc('final-greenhouse-gas-emissions-tables-2020.ods')
sheet = doc.sheets[0] # in order to read the first sheet
data = []
for row in sheet.rows():
row_data = [cell.value for cell in row]
data.append(row_data)
return data
# creating new dataframe for the ods file
ods_data = read_ods_file()
# Print the data
for row in ods_data:
print(row)
1.2 EXPLORING FURTHER THE WORK SHEET CONTAINING THE RESEARCH DATA IN VIEW(METHANE EMMISSION)
def read_ods_file():
doc = ezodf.opendoc('final-greenhouse-gas-emissions-tables-2020.ods')
sheet = doc.sheets[5] # Assuming you want to read the sixth sheet
data = []
for row in sheet.rows():
row_data = [cell.value for cell in row]
data.append(row_data)
return data
# Replace 'your_file.ods' with the actual path to your .ods file
ods_data = read_ods_file()
# Print the data
for row in ods_data:
print(row)
df = pd.DataFrame(ods_data[1:], columns=ods_data[6])
df.info()
1.30 CONVERTING THE ODS FILE TO CSV FILE
import ezodf
import csv
def convert_ods_to_csv(ods_file, csv_file):
doc = ezodf.opendoc(ods_file)
sheet = doc.sheets[5] # SINCE WE want to read the SIXTH sheet CONTAINING METHANE EMISSIONS
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
for row in sheet.rows():
row_data = [cell.value for cell in row]
writer.writerow(row_data)
# Replace 'input_file.ods' and 'output_file.csv' with the actual file paths
convert_ods_to_csv('final-greenhouse-gas-emissions-tables-2020.ods', 'output_file.csv')
1.40 IGNORING UNNECESSARY ROWS BY SKIPPING TO THE MAIN COLUMN HEADERS
import pandas as pd
visuals1 = pd.read_csv("output_file.csv",skiprows=5)
display (visuals1.head(10))
visuals1.info()