기록

노마드코더 SQL 마스터클래스 강의 기록 03

als982001 2025. 3. 15. 18:06

 

1. 작성한 코드

-- 1. Find the number of movies released each year.
SELECT release_date, COUNT(*) AS total_movies FROM movies WHERE release_date IS NOT NULL GROUP BY release_date ORDER BY total_movies DESC;

-- 2. List the top 10 years with the highest average movie runtime
SELECT release_date, AVG(runtime) AS total_runtime FROM movies GROUP BY release_date ORDER BY total_runtime DESC LIMIT 10;

-- 3. Calculate the average rating for movies released in the 21st century
SELECT AVG(rating) AS average_rating FROM movies WHERE release_date >= 2000;

-- 4. Find the director with the hightest average movie runtime.
SELECT director, AVG(runtime) AS average_runtime, COUNT(*) as total_movies FROM movies WHERE director IS NOT NULL AND runtime IS NOT NULL GROUP BY director ORDER BY average_runtime DESC LIMIT 1;

-- 5. List the top 5 most prolific directors (thoes who have directed the most movies).
SELECT director, COUNT(*) AS the_number_of_movies FROM movies WHERE director IS NOT NULL GROUP BY director ORDER BY the_number_of_movies DESC LIMIT 5;

-- 6. Find the highest and lowest rating of each director.
SELECT director, MIN(rating) AS lowest_rating, MAX(rating) AS highest_rating FROM movies WHERE director is NOT NULL AND rating IS NOT NULL GROUP BY director HAVING COUNT(*) > 5;

-- 7. Find the director that has made the most money (revenue - budget)
SELECT director, SUM(revenue) - SUM(budget) AS money FROM movies WHERE director IS NOT NULL AND revenue IS NOT NULL AND budget IS NOT NULL GROUP BY director ORDER BY money DESC LIMIT 5;

-- 8. Calculate the average rating for movies longer than 2 hours.
SELECT AVG(rating) AS average_rating FROM movies WHERE rating IS NOT NULL AND runtime > 120 ORDER BY average_rating DESC;

-- 9. Find the year with the most movies released.
SELECT release_date, COUNT(*) AS total_movies FROM movies WHERE release_date IS NOT NULL GROUP BY release_date ORDER BY total_movies DESC;

-- 10. Find the average runtime of movies for each decade.
SELECT (release_date / 10) * 10 AS decade, count(*) AS total_movies FROM movies WHERE release_date IS NOT NULL GROUP BY decade ORDER BY total_movies;

-- 11. List the top 5 years where the difference between the highest and lowest rated movie was the greatest.
SELECT release_date, MAX(rating) - MIN(rating) AS difference FROM movies WHERE rating IS NOT NULL AND release_date IS NOT NULL GROUP BY release_date ORDER BY difference DESC LIMIT 5;

-- 12. List directors who have never made a movie shorter than 2 hours.
SELECT director, MIN(runtime) AS min_runtime FROM movies WHERE director IS NOT NULL GROUP BY director HAVING MIN(runtime) >= 120;

-- 13. Calculate the percentage of movies with a rating above 8.0
SELECT COUNT(CASE WHEN rating > 8 THEN 1 END) * 100.0 / COUNT(*) AS percentage FROM movies;

-- 14. Find the director with the highest ratio of movies rated above 7.0
SELECT director, COUNT(CASE WHEN rating > 7.0 THEN 1.0 END) * 100.0 / COUNT(*) AS '%' FROM movies WHERE director IS NOT NULL GROUP BY director HAVING COUNT(*) >= 2;

-- 15. Categorize and group movies by length.
SELECT CASE WHEN runtime < 90 THEN 'Short' WHEN runtime BETWEEN 90 AND 120 THEN 'Normal' WHEN runtime > 120 THEN 'Long' END AS runtime_category, COUNT(*) AS total_movies FROM movies GROUP BY runtime_category;

-- 16. Categorize and group movies by flop or not.
SELECT CASE WHEN revenue < budget THEN 'Flop' ELSE 'Success' END AS flop_or_not, COUNT(*) AS total_movies FROM movies WHERE budget IS NOT NULL AND revenue IS NOT NULL GROUP BY flop_or_not;

CREATE VIEW v_flop_or_not AS SELECT CASE WHEN revenue < budget THEN 'Flop' ELSE 'Success' END AS flop_or_not, COUNT(*) AS total_movies FROM movies WHERE budget IS NOT NULL AND revenue IS NOT NULL GROUP BY flop_or_not;SELECT CASE WHEN revenue < budget THEN 'Flop' ELSE 'Success' END AS flop_or_not, COUNT(*) AS total_movies FROM movies WHERE budget IS NOT NULL AND revenue IS NOT NULL GROUP BY flop_or_not;

SELECT * FROM v_flop_or_not;

DROP VIEW v_flop_or_not;

-- List movies with a (rating | revenue) higher than the average (rating | revenue) of all movies
SELECT AVG(rating) FROM movies; -- 5.73346691~
SELECT COUNT(*) FROM movies where rating > 5.73346691;

-- Independent Subqueries
SELECT COUNT(*) FROM movies WHERE rating > (SELECT AVG(rating) FROM movies);

-- CTE
WITH avg_revenue_cte AS (
  SELECT 
  	AVG(revenue)
  FROM
  	movies
)
SELECT
	title,
  director, 
  revenue,
  round((SELECT * FROM avg_revenue_cte), 0) AS avg_revenue
FROM
	movies
WHERE
	revenue > (SELECT * from avg_revenue_cte);
  
WITH avg_revenue_cte AS (
  SELECT 
  	AVG(revenue)
  FROM
  	movies
),
avg_rating_cte AS (
  SELECT
  	AVG(rating)
  FROM
  	movies
)
SELECT
	title,
  director, 
  revenue,
  rating,
  round((SELECT * FROM avg_revenue_cte), 0) AS avg_revenue,
  round((SELECT * FROM avg_rating_cte), 0) AS avg_rating
FROM
	movies
WHERE
	revenue > (SELECT * from avg_revenue_cte)
  AND rating > (SELECT * from avg_rating_cte);

-- Find the movies with a rating higher than the average rating of movies released in the same year.

-- 최적화 안 된 코드
SELECT 
	main_movies.title, 
  main_movies.director, 
  main_movies.rating
FROM movies AS main_movies -- AS 안 적어도 됨
WHERE 
	main_movies.rating > (
  	SELECT 
    	AVG(inner_movies.rating) 
    FROM movies AS inner_movies
    WHERE inner_movies.release_date = main_movies.release_date
  );

 

2. 배운 개념

-- Views 예시
CREATE VIEW v_flop_or_not AS SELECT CASE WHEN revenue < budget THEN 'Flop' ELSE 'Success' END AS flop_or_not, COUNT(*) AS total_movies FROM movies WHERE budget IS NOT NULL AND revenue IS NOT NULL GROUP BY flop_or_not;SELECT CASE WHEN revenue < budget THEN 'Flop' ELSE 'Success' END AS flop_or_not, COUNT(*) AS total_movies FROM movies WHERE budget IS NOT NULL AND revenue IS NOT NULL GROUP BY flop_or_not;
SELECT * FROM v_flop_or_not;
DROP VIEW v_flop_or_not;
  • Views
    • 가상의 테이블로, 미리 정의된 SQL 쿼리 결과를 테이블처럼 사용할 수 있는 기능
    • 복잡한 쿼리를 단순하게 만들 수 있다.
    • CREATE VIEW 뷰  이름 AS 쿼리문 으로 VIEW 생성
    • DROP VIEW 뷰 이름 으로 VIEW 삭제

 

-- Subquery 예시 (Independent Subqueries)
SELECT COUNT(*) FROM movies WHERE rating > (SELECT AVG(rating) FROM movies);
  • Subquery 
    • 다른 SQL 쿼리 안에 포함된 쿼리
    • 즉, 하나의 SQL 문 안에서 또 다른 SQL 쿼리를 실행하는 것을 의미한다.
    • 주 쿼리(Main query)가 실행되기 전에 서브 쿼리가 먼저 실행된다.
    • 주로 데이터 필터링, 집계, 조건 설정 등에 사용된다.
    • Independent Subqueries (독립 서브 쿼리)
      • 메인 쿼리와는 별도로 실행되는 서브 쿼리
      • 메인 쿼리의 각 행과는 관계 없이 한 번만 실행된다.

 

-- CTE
WITH avg_revenue_cte AS (
  SELECT 
  	AVG(revenue)
  FROM
  	movies
),
avg_rating_cte AS (
  SELECT
  	AVG(rating)
  FROM
  	movies
)
SELECT
	title,
  director, 
  revenue,
  rating,
  round((SELECT * FROM avg_revenue_cte), 0) AS avg_revenue,
  round((SELECT * FROM avg_rating_cte), 0) AS avg_rating
FROM
	movies
WHERE
	revenue > (SELECT * from avg_revenue_cte)
  AND rating > (SELECT * from avg_rating_cte);
  • CTE (Common Table Expressions, 공용 테이블 표현식)
    • 일시적으로 사용할 수 있는 가상의 테이블(임시 결과 집합)을 정의하는 기능으로, 테이블을 생성하지 않으면서 데이터를 가공할 수 있다.
    • 테이블을 리턴한다.
    • WITH 키워드를 사용하여 재사용 가능한 서브쿼리를 정의할 수 있다.
    • 쿼리의 가독성을 높이고, 중복된 서브 쿼리를 줄이는 데 도움을 준다.

 

-- Correlated Subquery 예시 코드 01

SELECT 
	main_movies.title, 
  main_movies.director, 
  main_movies.rating
FROM movies AS main_movies -- AS 안 적어도 됨
WHERE 
	main_movies.rating > (
  	SELECT 
    	AVG(inner_movies.rating) 
    FROM movies AS inner_movies
    WHERE inner_movies.release_date = main_movies.release_date
  );

-- Correlated Subquery 예시 코드 02

SELECT column1, column2
FROM table1 AS t1
WHERE column1 > (
    SELECT AVG(column1) 
    FROM table2 AS t2 
    WHERE t2.related_column = t1.related_column
);
  • Correlated Subquery (상관 서브쿼리)
    • 메인 쿼리의 각 행마다 실행되는 서브 쿼리
    • 브쿼리의 실행이 외부 쿼리의 행에 영향을 받기 때문에 "상관됨"이라고 한다.
    • 독립적으로 실행되지 않고, 메인 쿼리의 데이터를 참조하면서 반복 실행
    • 각 행마다 서브 쿼리를 실행해야 하므로 성능이 느릴 수 있음
    • 예시 코드 02에서...
      • t1.related_column을 t2.related_column과 비교하면서 각 행마다 서브 쿼리를 실행
      • Independent Subquery와는 다르게 한 번만 실행되지 않고 반복 실행된다.

 

  Correlated Subquery Independent Subquery
실행 방식 메인 쿼리의 각 행마다 실행 독립적으로 한 번만 실행
성능 비교적 느림 (반복 실행) 더 빠름 (한 번만 실행)
간단 예제 WHERE salary > (
    SELECT AVG(salary)
    FROM employees
    WHERE department = e.department
);
WHERE salary > (
    SELECT AVG(salary)
    FROM employees
);