Introduction to SQL

Here you can access the books table used in the course.

Note: When using sample databases such as those that contain course data, you have read-only access. You can run queries, but cannot make any changes such as adding, deleting, or modifying the data (e.g., creating tables, views, etc.).

Take Notes

Add notes about the concepts you've learned and SQL cells with queries you want to keep.

Add your notes here

DataFrameas

books

variable

-- Add your own queries here
SELECT * 
FROM books
;

Hidden output

Explore Datasets

Use the books table to explore the data and practice your skills!

Select only the title column.
Alias the title column as book_title.
Select the distinct author names from the author column.
Select all records from the table and limit your results to 10.

DataFrameas

books

variable

SELECT DISTINCT author, title AS book_title 
FROM public.books
LIMIT 10
;

Summary statistics

this query specifically ran on the database fortune500 --What groups are you computing statistics by? SELECT sector, --Select the mean of assets with the avg function avg(assets) AS mean, --Select the median percentile_disc(.5) WITHIN GROUP (ORDER BY assets) AS median FROM fortune500 GROUP BY sector ORDER BY mean;

Creating Temp Table

Need to convert this into a query on a data set in notes after learning

-- To clear table if it already exists; fill in name of temp table
DROP TABLE IF EXISTS profit80;

-- Create the temporary table
CREATE TEMP TABLE profit80 AS
  -- Select the two columns you need; alias as needed
  SELECT sector, 
         percentile_disc(0.8) WITHIN GROUP (ORDER BY profits) AS pct80
    -- What table are you getting the data from?
    FROM fortune500 
   -- What do you need to group by?
   GROUP BY sector;

-- See what you created: select all columns and rows from the table you created
SELECT * 
  FROM profit80;
  
  -- To clear table if it already exists; fill in name of temp table
DROP TABLE IF EXISTS profit80;

-- Create the temporary table
CREATE TEMP TABLE profit80 AS
  -- Select the two columns you need; alias as needed
  SELECT sector, 
         percentile_disc(0.8) WITHIN GROUP (ORDER BY profits) AS pct80
    -- What table are you getting the data from?
    FROM fortune500 
   -- What do you need to group by?
   GROUP BY sector;

-- See what you created: select all columns and rows from the table you created
SELECT * 
  FROM profit80;

Another example of temporary tables

-- To clear table if it already exists
DROP TABLE IF EXISTS startdates;

-- Create temp table syntax
CREATE TEMP TABLE startdates AS
-- Compute the minimum date for each what?
SELECT tag,
       min(date) AS mindate
  FROM stackoverflow
 -- What do you need to compute the min date for each tag?
 GROUP BY tag;

 -- Look at the table you created
 SELECT * 
   FROM startdates;

Then to add onto the query

-- To clear table if it already exists
DROP TABLE IF EXISTS startdates;

CREATE TEMP TABLE startdates AS
SELECT tag, min(date) AS mindate
  FROM stackoverflow
 GROUP BY tag;

-- Select tag (Remember the table name!) and mindate
SELECT startdates.tag, 
       mindate, 
       -- Select question count on the min and max days
       so_min.question_count AS min_date_question_count,
       so_max.question_count AS max_date_question_count,
       -- Compute the change in question_count (max- min)
       so_max.question_count - so_min.question_count AS change
  FROM startdates
       -- Join startdates to stackoverflow with alias so_min
       INNER JOIN stackoverflow AS so_min
          -- What needs to match between tables?
          ON startdates.tag = so_min.tag
         AND startdates.mindate = so_min.date
       -- Join to stackoverflow again with alias so_max
       INNER JOIN stackoverflow AS so_max
          -- Again, what needs to match between tables?
          ON startdates.tag = so_max.tag
         AND so_max.date = '2018-09-25';

Temp Table example 3

DROP TABLE IF EXISTS correlations;

CREATE TEMP TABLE correlations AS
SELECT 'profits'::varchar AS measure,
       corr(profits, profits) AS profits,
       corr(profits, profits_change) AS profits_change,
       corr(profits, revenues_change) AS revenues_change
  FROM fortune500;

-- Add a row for profits_change
-- Insert into what table?
INSERT INTO correlations
-- Follow the pattern of the select statement above using profits_change instead of profits
SELECT 'profits_change'::varchar AS measure,
       corr(profits_change, profits) AS profits,
       corr(profits_change, profits_change) AS profits_change,
       corr(profits_change, revenues_change) AS revenues_change
  FROM fortune500;

-- Repeat the above, but for revenues_change
INSERT INTO correlations
SELECT 'revenues_change'::varchar AS measure,
       corr(revenues_change, profits) AS profits,
       corr(revenues_change, profits_change) AS profits_change,
       corr(revenues_change, revenues_change) AS revenues_change
  FROM fortune500;

Above temp table exercise continued

DROP TABLE IF EXISTS correlations;

CREATE TEMP TABLE correlations AS
SELECT 'profits'::varchar AS measure,
       corr(profits, profits) AS profits,
       corr(profits, profits_change) AS profits_change,
       corr(profits, revenues_change) AS revenues_change
  FROM fortune500;

INSERT INTO correlations
SELECT 'profits_change'::varchar AS measure,
       corr(profits_change, profits) AS profits,
       corr(profits_change, profits_change) AS profits_change,
       corr(profits_change, revenues_change) AS revenues_change
  FROM fortune500;

INSERT INTO correlations
SELECT 'revenues_change'::varchar AS measure,
       corr(revenues_change, profits) AS profits,
       corr(revenues_change, profits_change) AS profits_change,
       corr(revenues_change, revenues_change) AS revenues_change
  FROM fortune500;

-- Select each column, rounding the correlations
SELECT measure, 
       round(profits::numeric, 2) AS profits,
       round(profits_change::numeric, 2) AS profits_change,
       round(revenues_change::numeric, 2) AS revenues_change
  FROM correlations;

PostgreSQL character Types

character (n) or char(n)

-fixed length n -trailing

Types of test data Categorical or Unstructured text

Introduction to SQL

.mfe-app-workspace-kj242g{position:absolute;top:-8px;}.mfe-app-workspace-11ezf91{display:inline-block;}.mfe-app-workspace-11ezf91:hover .Anchor__copyLink{visibility:visible;}Introduction to SQL

Take Notes

Explore Datasets

Summary statistics

Creating Temp Table

Another example of temporary tables

Temp Table example 3

Above temp table exercise continued

PostgreSQL character Types

Introduction to SQL