week4 作业
建表
create EXTERNAL table t_user_chloe (userid int , sex string, age int , occupation int, zipcode int)
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.MultiDelimitSerDe'
WITH SERDEPROPERTIES ("field.delim"="::")
STORED AS TEXTFILE
LOCATION '/data/hive/users/';
create EXTERNAL table t_movie_chloe (movieid int , moviename string, movietype string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.MultiDelimitSerDe'
WITH SERDEPROPERTIES ("field.delim"="::")
STORED AS TEXTFILE
LOCATION '/data/hive/movies/';
create EXTERNAL table t_ratting_chloe (userid int , movieid int, rate int , times int)
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.MultiDelimitSerDe'
WITH SERDEPROPERTIES ("field.delim"="::")
STORED AS TEXTFILE
LOCATION '/data/hive/ratings/';
作业 1:
select u.age, avg(r.rate) from t_movie_chloe m, t_user_chloe u, t_ratting_chloe r where m.movieid = 2116 and m.movieid = r.movieid and r.userid = u.userid group by u.age order by u.age;
结果:
作业 2:
select m.moviename, avg(rate), count(r.userid) from t_movie_chloe m, t_user_chloe u, t_ratting_chloe r where r.userid = u.userid and u.sex = 'M' and m.movieid = r.movieid group by r.movieid, m.moviename having count(*)> 50 order by avg(rate) desc limit 10;
结果:
作业 3:
select count(), u.userid from t_user_chloe u, t_ratting_chloe r where r.userid = u.userid and u.sex = 'F' group by u.userid order by count() desc limit 1;
select r.movieid,m.moviename, r.rate from t_user_chloe u, t_ratting_chloe r, t_movie_chloe m where u.userid = 1150 and r.movieid = m.movieid and u.userid = r.userid order by r.rate desc limit 10;
select m.moviename, avg(r.rate) from t_ratting_chloe r, t_movie_chloe m where m.movieid in (1256, 1094,905,2064,2997, 750, 904,1236,1279, 745) and m.movieid = r.movieid group by m.movieid, m.moviename;
结果:
评论