Skip to content
Neenopal
Task 1
1. How to identify the Records (Order ID + Product ID combination) present in data1 but missing in data2 (Specify the number of records missing in your answer)
DataFrameas
df
variable
select count(t1.order_id) as total_missing_records_in_t2
from neenopal_data1 as t1
left join neenopal_data2 as t2
on t1.order_id = t2.order_id and t1.product_id=t2.product_id
where t2.order_id is null2. How to identify the Records (Order ID + Product ID combination) missing in data1 but present in data2 (Specify the number of records missing in your answer)
DataFrameas
df1
variable
select count(t2.order_id) as total_missing_records_in_t1
from neenopal_data1 as t1
right join neenopal_data2 as t2
on t1.order_id = t2.order_id and t1.product_id=t2.product_id
where t1.order_id is null3. Find the Sum of the total Qty of Records missing in data1 but present in data2
DataFrameas
df2
variable
select sum(t2.qty) as sum_missing_records_in_t1
from neenopal_data1 as t1
right join neenopal_data2 as t2
on t1.order_id = t2.order_id and t1.product_id=t2.product_id
where t1.order_id is null4. Find the total number of unique records (Order ID + Product ID combination) present in the combined dataset of data1 and data2
DataFrameas
df3
variable
-- UNION clause returns distinct rows.
select count(order_id) as total_unique_records_t1_t2_combined
from
(select *
from neenopal_data1 as t1
union
select *
from neenopal_data2 as t2
) as tTask 2
1. Read the csv or excel file
import pandas as pd
df_used_bikes = pd.read_csv('used_bikes.csv')
df_used_bikes.head()2. Change the datatypes of the column and check the memory usage before and after the change in the data types.
# to import uint16 and uint32 datatypes.
import numpy as np# existing data types of columns.
df_used_bikes.info()