Skip to content
My workspace
import pandas as pd
food_claims_2212_1 = pd.read_csv('food_claims_2212 (1).csv')
food_claims_2212_1DataFrameas
df
variable
-- find the number of missing values of each column
SELECT
SUM(CASE WHEN claim_id IS NULL THEN 1 ELSE 0 END) AS missing_count_column1,
SUM(CASE WHEN time_to_close IS NULL THEN 1 ELSE 0 END) AS missing_count_column2,
SUM(CASE WHEN claim_amount IS NULL THEN 1 ELSE 0 END) AS missing_count_column3,
SUM(CASE WHEN amount_paid IS NULL THEN 1 ELSE 0 END) AS missing_count_column4,
SUM(CASE WHEN location IS NULL THEN 1 ELSE 0 END) AS missing_count_column5,
SUM(CASE WHEN individuals_on_claim IS NULL THEN 1 ELSE 0 END) AS missing_count_column6,
SUM(CASE WHEN linked_cases IS NULL THEN 1 ELSE 0 END) AS missing_count_column7,
SUM(CASE WHEN cause IS NULL THEN 1 ELSE 0 END) AS missing_count_column8
FROM food_claims_2212_1;
DataFrameas
df3
variable
SELECT location, COUNT(DISTINCT location) AS unique_values_count
FROM food_claims_2212_1
GROUP BY location;DataFrameas
df4
variable
SELECT linked_cases, COUNT(DISTINCT linked_cases) AS unique_values_count
FROM food_claims_2212_1
GROUP BY linked_cases;DataFrameas
df5
variable
SELECT cause, COUNT(DISTINCT cause) AS unique_values_count
FROM food_claims_2212_1
GROUP BY cause;
UPDATE food_claims_2212_1
SET cause = 'vegetable'
WHERE cause LIKE 'VEGETABLE';
UPDATE food_claims_2212_1
SET cause = 'meat'
WHERE cause IS 'Meat';
DataFrameas
df2
variable
SELECT MIN(claim_amount) AS min_value, MAX(claim_amount) AS max_value
FROM food_claims_2212_1;
DataFrameas
df6
variable
SELECT MIN(individuals_on_claim) AS min_value, MAX(individuals_on_claim) AS max_value
FROM food_claims_2212_1;DataFrameas
df1
variable
CREATE TABLE description_match AS
SELECT
CAST(claim_id AS desired_data_type1) AS new_column1,
CAST(time_to_close AS desired_data_type2) AS new_column2,
CAST(column1 AS desired_data_type1) AS new_column3,
CAST(column2 AS desired_data_type2) AS new_column4,
CAST(column1 AS desired_data_type1) AS new_column5,
CAST(column2 AS desired_data_type2) AS new_column6,
CAST(column1 AS desired_data_type1) AS new_column7,
CAST(column2 AS desired_data_type2) AS new_column8
FROM food_claims_2212_1;