Skip to content
Employee records in Helzenburg Car Manufacturing Company
🚘 Halzenbyrg Car Manufacturing Co. — Employee Records SQL Project
This SQL project explores employee data from Halzenbyrg Car Manufacturing Co. to uncover meaningful insights such as hiring trends, gender distribution, age demographics, and employee tenure. By leveraging SQL techniques like aggregation, date functions, CTEs, and window functions, the project provides a clear view of workforce dynamics over time.
| Column | Description |
|---|---|
| emp_no | Unique employee number |
| birth_date | Date of birth |
| first_name | Employee's first name |
| last_name | Employee's last name |
| gender | Gender of employee |
| hire_date | Date employee was hired |
DataFrameas
df
variable
-- Explore the data in the table
SELECT *
FROM employees
DataFrameas
df1
variable
-- How many employees are there in total?
SELECT COUNT(*) AS total_employees
FROM employees;
DataFrameas
df2
variable
-- What is the gender distribution of the workforce?
SELECT gender, COUNT(*) AS total
FROM employees
GROUP BY gender;
DataFrameas
df3
variable
-- What are the top 5 most common first names?
SELECT first_name, COUNT(*) AS count
FROM employees
GROUP BY first_name
ORDER BY count DESC
LIMIT 5;
DataFrameas
df4
variable
-- What is the age distribution of employees?
SELECT
FLOOR((YEAR(CURRENT_DATE) - YEAR(birth_date)) / 10) * 10 AS age_group,
COUNT(*) AS total
FROM employees.employees
GROUP BY age_group
ORDER BY age_group;DataFrameas
df5
variable
-- What is the average age of employees by gender?
SELECT gender, ROUND(AVG(TIMESTAMPDIFF(YEAR, birth_date, CURDATE())), 1) AS avg_age
FROM employees.employees
GROUP BY gender;DataFrameas
df6
variable
-- Which year had the highest number of hires?
SELECT EXTRACT(YEAR FROM hire_date) AS year, COUNT(*) AS hires
FROM employees
GROUP BY year
ORDER BY hires DESC
LIMIT 1;
DataFrameas
df7
variable
-- CTE: Find the longest-serving employees
WITH service_years AS (
SELECT emp_no, first_name, last_name,
hire_date,
CURRENT_DATE - hire_date AS days_served
FROM employees
)
SELECT emp_no, first_name, last_name, hire_date, days_served
FROM service_years
ORDER BY days_served DESC
LIMIT 10;
DataFrameas
df8
variable
-- Window Function: Rank employees by seniority (no skipped ranks)
SELECT emp_no, first_name, last_name, hire_date,
RANK() OVER (ORDER BY hire_date ASC) AS seniority_rank
FROM employees
LIMIT 30;
DataFrameas
df9
variable
-- Monthly hiring trend view
SELECT
DATE_FORMAT(hire_date, '%Y-%m-01') AS hire_month,
COUNT(*) AS hires
FROM employees.employees
GROUP BY hire_month
ORDER BY hire_month;DataFrameas
df10
variable
-- Gender hiring trend by year
SELECT
EXTRACT(YEAR FROM hire_date) AS year,
gender,
COUNT(*) AS hires
FROM employees
GROUP BY year, gender
ORDER BY year, gender;
DataFrameas
df11
variable
-- Detect hiring bursts
WITH yearly_hires AS (
SELECT EXTRACT(YEAR FROM hire_date) AS year,
COUNT(*) AS hires
FROM employees.employees
GROUP BY year
),
hire_growth AS (
SELECT year,
hires,
LAG(hires) OVER (ORDER BY year) AS prev_year_hires,
ROUND(((hires - LAG(hires) OVER (ORDER BY year)) / LAG(hires) OVER (ORDER BY year)) * 100, 2) AS percent_growth
FROM yearly_hires
)
SELECT *
FROM hire_growth
WHERE percent_growth > 30;DataFrameas
df12
variable
-- Bonus Challenge: 📌 Scenario 6: Detecting Recruitment Surges
-- Business Problem:
-- During certain years, there were company expansions that may have led to aggressive hiring. Executives want to know which years had a ≥30% increase in hiring compared to the previous year.Your Task is to create a query using window functions (LAG or LEAD) to calculate year-over-year growth in hires and filter years with ≥30% increase.
WITH yearly_hires AS (
SELECT
EXTRACT(YEAR FROM hire_date) AS hire_year,
COUNT(*) AS hires
FROM employees
GROUP BY hire_year
),
hire_growth AS (
SELECT
hire_year,
hires,
LAG(hires) OVER (ORDER BY hire_year) AS previous_year_hires,
ROUND(
100.0 * (hires - LAG(hires) OVER (ORDER BY hire_year)) /
NULLIF(LAG(hires) OVER (ORDER BY hire_year), 0), 2
) AS hire_growth_pct
FROM yearly_hires
)
SELECT
hire_year,
hires,
previous_year_hires,
hire_growth_pct
FROM hire_growth
WHERE hire_growth_pct >= 30;