Skip to content
Great Expectations Tutorial
Install Great Expectations
- Check the version to see if it's correctly installed.
!pip install great_expectationsHidden output
import great_expectations as gx
print(gx.__version__)Generate sample Dataframes
- Use the Lego Postgres database sample data and create two DataFrames:
inventory_parts_dfandcolors_df.
DataFrameas
inventory_parts_df
variable
-- Explore the data in the table
SELECT *
FROM inventory_partsConnect GX to the Dataframe data
- Create a Data Source, a Data Asset and a Batch that will include the whole source DataFrame
context = gx.get_context()
data_source = context.data_sources.add_pandas(name="inventory_parts")
data_asset = data_source.add_dataframe_asset(name="inventory_parts_asset")
# Retrieve the Data Asset
data_source_name = "inventory_parts"
data_asset_name = "inventory_parts_asset"
data_asset = context.data_sources.get(data_source_name).get_asset(data_asset_name)
# Define the Batch Definition name
batch_definition_name = "inventory_parts_batch"
# Add a Batch Definition to the Data Asset
batch_definition = data_asset.add_batch_definition_whole_dataframe(
batch_definition_name
)
assert batch_definition.name == batch_definition_nameCreate Expectations
- Retrieve the previously created Batch, using the
inventory_parts_dfas parameter to run the Expectations against it - Create a Suite and add two simple Expectations to it
# Retrieve the dataframe Batch Definition
batch_definition = (
context.data_sources.get(data_source_name)
.get_asset(data_asset_name)
.get_batch_definition(batch_definition_name)
)
# Add the DataFrame as batch parameters
batch_parameters = {"dataframe": inventory_parts_df}
# Step 3: Retrieve the Batch
batch = batch_definition.get_batch(batch_parameters=batch_parameters)
# Step 4: Define Expectations
expectation_suite_name = "inventory_parts_suite"
suite = gx.ExpectationSuite(name=expectation_suite_name)
# Add Expectations to Suite
suite.add_expectation(
gx.expectations.ExpectColumnValuesToNotBeNull(column="inventory_id")
)
suite.add_expectation(
gx.expectations.ExpectColumnValuesToBeUnique(column="part_num")
)
# Add the Expectation Suite to the Context
context.suites.add(suite)Validate the Expectations
- Run the Suite of expectations against the source data in the
inventory_parts_dfDataFrame
# Validate the Data Against the Suite
validation_results = batch.validate(suite)
# Evaluate the Results
print(validation_results)