极客 - 大数据 - 作业 4 Hive
发布于: 1 小时前
代码仓库
https://github.com/sodawy/learndatadev/tree/master/src/main/java/soda/homework4
SQL 代码连接及截图
简单
queryId=hive_20210804055957_308abdad-47db-471b-b7ce-bceb2cd1a612
sql 代码
-- hive作业1: 简单:展示电影ID为2116这部电影各年龄段的平均影评分
SELECT t_user.age, avg(t_rating.rate)
FROM t_rating
LEFT JOIN t_user ON t_user.userid = t_rating.userid
WHERE t_rating.movieid = 2116
GROUP BY t_user.age
ORDER BY t_user.age
复制代码
截图
中等
uery ID = hive_20210804055424_6563cdc9-cd95-47bd-a264-8796f4fcabdf
sql 代码连接
-- 中等:找出男性评分最高且评分次数超过50次的10部电影,展示电影名,平均影评分和评分次数
SELECT t_user.sex AS sex,
t_movie.moviename AS name,
AVG(t_rating.rate) AS avgrate,
COUNT(t_rating.rate) AS total
FROM (
SELECT t_rating.movieid AS top_movieid,
avg(t_rating.rate) AS avg_score
FROM t_rating
LEFT JOIN t_user
ON t_user.userid = t_rating.userid
WHERE t_user.sex = 'M'
GROUP BY t_rating.movieid
HAVING count(t_rating.rate) > 50
ORDER BY avg_score desc
LIMIT 10) top_movie
LEFT JOIN t_rating ON t_rating.movieid = top_movie.top_movieid
LEFT JOIN t_user ON t_user.userid = t_rating.userid
LEFT JOIN t_movie ON t_movie.movieid = top_movie.top_movieid
WHERE t_user.sex = 'M'
GROUP BY t_user.sex, t_movie.moviename
ORDER BY avgrate DESC
复制代码
截图
困难
queryId=hive_20210804054232_45eaca4b-2019-44bb-8d74-7f210db768db
sql 代码连接
-- 困难:找出影评次数最多的女士所给出最高分的10部电影的平均影评分,展示电影名和平均影评分(可使用多行SQL)
WITH top10_movie_from_frist_lady AS (select RR.movieid as mid, RR.rate as rt
from t_rating RR
inner join
(select t_user.userid as uid, sum(1) as cnt
from t_rating
INNER JOIN t_user ON t_user.userid = t_rating.userid
where t_user.sex = 'F'
group by t_user.userid
order by cnt desc
LIMIT 1) R ON R.uid = RR.userid
ORDER BY rt desc
LIMIT 10)
SELECT M.moviename as `t.moviename`, avg(R.rate) as `t.avgrate`
FROM top10_movie_from_frist_lady T
LEFT JOIN t_movie M ON T.mid = M.movieid
LEFT JOIN t_rating R ON T.mid = R.movieid
GROUP BY M.moviename
ORDER BY `t.avgrate` desc
复制代码
截图
划线
评论
复制
发布于: 1 小时前阅读数: 2
西伯利亚鼯鼠
关注
还未添加个人签名 2017.10.18 加入
还未添加个人简介
评论