SELECT premiered, primary_title || ' (' || original_title || ')' FROM titles WHERE primary_title != original_title AND type = 'movie' AND genres LIKE '%Action%' ORDER BY premiered DESC, primary_title LIMIT 10;
Q3 [5 points] (q3_longest_running_tv):
题目描述很不清晰啊,类型都不知道具体是什么。
1 2 3 4 5 6 7 8 9 10 11 12 13
SELECT primary_title, IIF(ended IS NULL, 2023, ended) - premiered AS runtime FROM titles WHERE primary_title IS NOT NULL AND type = 'tvSeries' ORDER BY runtime DESC, primary_title LIMIT 20;
Q4 [10 points] (q4_directors_in_each_decade):
唯一要注意的就是使用 DISTINCT。
1 2 3 4 5 6 7 8 9 10 11 12 13
SELECT CAST(born / 10 * 10 AS TEXT) || 's' AS decade, COUNT(DISTINCT(people.person_id)) AS num_directors FROM people INNER JOIN crew USING(person_id) WHERE category = 'director' AND born >= 1900 GROUP BY decade ORDER BY decade;
Q5 [10 points] (q5_german_type_ratings):
德语的缩写是 de。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
SELECT t.type, ROUND(AVG(r.rating), 2) AS avg_rating, MIN(r.rating), MAX(r.rating) FROM akas as a INNER JOIN ratings as r USING(title_id) INNER JOIN titles as t USING(title_id) WHERE a.language = 'de' AND a.types IN ('imdbDisplay', 'original') GROUP BY t.type ORDER BY avg_rating;
Q6 [10 points] (q6_who_played_a_batman):
坑点就是模糊查询时 Batman 两边要加上双引号,即 "Batman"。以及在连接 people 和 crew 表时,顺序很重要,如果使用 crew INNRE JOIN people USING(person_id) 会很慢(查询大概有 5 秒),具体不知道为什么,以下是它们的执行计划。
1 2 3 4 5 6 7 8 9 10 11 12 13
crew INNER JOIN people USING(person_id)
QUERY PLAN |--SCAN crew USING INDEX ix_crew_person_id |--SEARCH people USING INDEX sqlite_autoindex_people_1 (person_id=?) `--USE TEMP B-TREE FOR DISTINCT
people INNER JOIN crew USING(person_id)
QUERY PLAN |--SCAN crew |--SEARCH people USING INDEX sqlite_autoindex_people_1 (person_id=?) `--USE TEMP B-TREE FOR DISTINCT
WITH t AS ( SELECT DISTINCT(person_id), name FROM people INNER JOIN crew USING(person_id) WHERE category = 'actor' AND characters LIKE '%"Batman"%' )
SELECT name, ROUND(AVG(rating), 2) AS avg_rating FROM t INNER JOIN crew USING(person_id) INNER JOIN ratings USING(title_id) GROUP BY person_id ORDER BY avg_rating DESC LIMIT 10;
Q7 [15 points] (q7_born_with_prestige):
SQL 很容易写,但是性能和官解差两秒,等以后学习怎么优化再来看吧。
1 2 3 4 5 6 7 8 9
SELECT COUNT(DISTINCT(person_id)) FROM titles INNER JOIN people ON titles.premiered = people.born INNER JOIN crew USING(person_id) WHERE primary_title = 'The Prestige' AND category IN ('actor', 'actress');
1 2 3 4 5
QUERY PLAN |--USE TEMP B-TREE FOR count(DISTINCT) |--SCAN crew |--SEARCH people USING INDEX sqlite_autoindex_people_1 (person_id=?) `--SEARCH titles USING INDEX ix_titles_primary_title (primary_title=?)
Q8 [15 points] (q8_directing_rose.sql):
比官解快一秒。注意使用 Rose% 而不是 Rose %。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
SELECT DISTINCT(name) FROM crew INNER JOIN people USING(person_id) WHERE category = 'director' AND title_id IN ( SELECT title_id FROM crew INNER JOIN people USING(person_id) WHERE category = 'actress' AND name LIKE 'Rose%' ) ORDER BY name;
WITH t AS ( SELECT category, name, died, primary_title, runtime_minutes, DENSE_RANK() OVER( PARTITION BY category ORDER BY died, name ) AS rank_died_name, DENSE_RANK() OVER( PARTITION BY category, person_id ORDER BY runtime_minutes DESC, title_id ) AS rank_runtime_title FROM crew INNER JOIN people USING(person_id) INNER JOIN titles USING(title_id) WHERE died IS NOT NULL AND runtime_minutes IS NOT NULL )
SELECT category, name, died, primary_title, runtime_minutes, rank_died_name FROM t WHERE rank_died_name <= 5 AND rank_runtime_title = 1 ORDER BY category, rank_died_name;
WITH t1(characters) AS ( SELECT characters FROM people INNER JOIN crew USING(person_id) WHERE name = 'Leonardo DiCaprio' AND born = 1974 ), t2(value) AS ( SELECT DISTINCT(value) FROM t1, json_each(t1.characters) WHERE value != '' AND value NOT LIKE '%SELF%' ORDER BY value )