Skip to content
import pandas as pd
food_claims_2212_1 = pd.read_csv('food_claims_2212 (1).csv')
food_claims_2212_1
Spinner
DataFrameas
df
variable
-- find the number of missing values of each column
SELECT
    SUM(CASE WHEN claim_id IS NULL THEN 1 ELSE 0 END) AS missing_count_column1,
    SUM(CASE WHEN time_to_close IS NULL THEN 1 ELSE 0 END) AS missing_count_column2,
	SUM(CASE WHEN claim_amount IS NULL THEN 1 ELSE 0 END) AS missing_count_column3,
    SUM(CASE WHEN amount_paid IS NULL THEN 1 ELSE 0 END) AS missing_count_column4,
	SUM(CASE WHEN location IS NULL THEN 1 ELSE 0 END) AS missing_count_column5,
    SUM(CASE WHEN individuals_on_claim IS NULL THEN 1 ELSE 0 END) AS missing_count_column6,
	SUM(CASE WHEN linked_cases IS NULL THEN 1 ELSE 0 END) AS missing_count_column7,
    SUM(CASE WHEN cause IS NULL THEN 1 ELSE 0 END) AS missing_count_column8
FROM food_claims_2212_1;
Spinner
DataFrameas
df3
variable
SELECT location, COUNT(DISTINCT location) AS unique_values_count
FROM food_claims_2212_1
GROUP BY location;
Spinner
DataFrameas
df4
variable
SELECT linked_cases, COUNT(DISTINCT linked_cases) AS unique_values_count
FROM food_claims_2212_1
GROUP BY linked_cases;
Spinner
DataFrameas
df5
variable
SELECT cause, COUNT(DISTINCT cause) AS unique_values_count
FROM food_claims_2212_1
GROUP BY cause;

UPDATE food_claims_2212_1
SET cause = 'vegetable'
WHERE cause LIKE 'VEGETABLE';

UPDATE food_claims_2212_1
SET cause = 'meat'
WHERE cause IS 'Meat';
Spinner
DataFrameas
df2
variable
SELECT MIN(claim_amount) AS min_value, MAX(claim_amount) AS max_value
FROM food_claims_2212_1;


Spinner
DataFrameas
df6
variable
SELECT MIN(individuals_on_claim) AS min_value, MAX(individuals_on_claim) AS max_value
FROM food_claims_2212_1;
Spinner
DataFrameas
df1
variable
CREATE TABLE description_match AS
SELECT
    CAST(claim_id AS desired_data_type1) AS new_column1,
    CAST(time_to_close AS desired_data_type2) AS new_column2,
    CAST(column1 AS desired_data_type1) AS new_column3,
    CAST(column2 AS desired_data_type2) AS new_column4,
	CAST(column1 AS desired_data_type1) AS new_column5,
    CAST(column2 AS desired_data_type2) AS new_column6,
	CAST(column1 AS desired_data_type1) AS new_column7,
    CAST(column2 AS desired_data_type2) AS new_column8
FROM food_claims_2212_1;