/*course*/ CREATE EXTERNAL TABLE IF NOT EXISTS course(CODE string,NAME string,PERIOD int,CREDIT int) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' stored as textfile
load data inpath '/hadoop_class/student_data/tb_course.csv' overwrite into table course;
1 2 3 4 5 6
/*course_class*/ CREATE EXTERNAL TABLE IF NOT EXISTS course_class(CODE string,SEMESTER string,TEACHER_ID int,COURSE_CODE string) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' stored as textfile
load data inpath '/hadoop_class/student_data/tb_course_class.csv' overwrite into table course_class;
1 2 3 4 5 6
/*major_class*/ CREATE EXTERNAL TABLE IF NOT EXISTS major_class(ID int,COLLEGE string,MAJOR string,GRADE int,CLASS int,TYPE string) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' stored as textfile;
load data inpath '/hadoop_class/student_data/tb_major_class.csv' overwrite into table major_class;
1 2 3 4 5 6
/*students*/ CREATE EXTERNAL TABLE IF NOT EXISTS students(ID int,NAME string,GENDER string,BIRTHDATE string,PHONENUMBER string,MAJOR_CLASS int) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' stored as textfile;
load data inpath '/hadoop_class/student_data/tb_student.csv' overwrite into table students;
1 2 3 4 5 6
/*electives*/ CREATE EXTERNAL TABLE IF NOT EXISTS electives(COURSE_CLASS_CODE string,STUDENT_ID int,SCORE int) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' stored as textfile;
load data inpath '/hadoop_class/student_data/tb_electives.csv' overwrite into table electives;
1 2 3 4 5 6
/*teacher*/ CREATE EXTERNAL TABLE IF NOT EXISTS teacher(ID int,NAME string,GENDER string,TITLE string) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' stored as textfile;
load data inpath '/hadoop_class/student_data/tb_teacher.csv' overwrite into table teacher;
altertable course set TBLPROPERTIES("skip.header.line.count"="1"); altertable course_class set TBLPROPERTIES("skip.header.line.count"="1"); altertable major_class set TBLPROPERTIES("skip.header.line.count"="1"); altertable students set TBLPROPERTIES("skip.header.line.count"="1"); altertable electives set TBLPROPERTIES("skip.header.line.count"="1"); altertable teacher set TBLPROPERTIES("skip.header.line.count"="1"); /*因为第一行有表头 会影响数据*/
3.解题
a、男女生比例
方法一:
1 2 3
SELECT GENDER,count(*),100*count(*)/SUM(count(*)) over() FROM students GROUP BY GENDER;
方法二:
1 2 3
select t1.male/t2.female from (select count(1) as male from students where GENDER='男') t1, (select count(1) as female from students where GENDER='女') t2 where 1=1;
b、学生整体及格率
方法一:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
CREATE TEMPORARY TABLE temp1 AS SELECT COURSE_CLASS_CODE, COUNT(*) AS totlsum FROM electives WHERE SCORE>=60 GROUP BY COURSE_CLASS_CODE;
CREATE TEMPORARY TABLE temp2 AS SELECT COURSE_CLASS_CODE, COUNT(*) AS totlsum2 FROM electives GROUP BY COURSE_CLASS_CODE;
SELECT temp1.COURSE_CLASS_CODE,CONCAT(CAST(temp1.totlsum*100/temp2.totlsum2 AS DECIMAL(10,2)),'%') FROM temp1,temp2 WHERE temp1.COURSE_CLASS_CODE=temp2.COURSE_CLASS_CODE; /*SELECT CONCAT(CAST(value AS DECIMAL(10,2)), '%') FROM table_name; 执行以上 HiveQL 查询后,返回的结果会将 value 列中的所有数值格式化为保留两位小数并添加百分号的形式*/
方法二:
1 2 3 4 5 6 7 8 9
SELECT t1.pass/t2.total FROM (SELECT COUNT(1) as pass FROM electives WHERE SCORE <60)t1, (SELECT COUNT(1) as total FROM electives)t2; SELECT ROUND(SUM(IF(SCORE>=60,1,0))/COUNT(1),2) FROM electives; /*ROUND函数 将数字四舍五入到指定的位数。*/
c、每门课程的平均分,要求显示出课程的中文名字
方法一:
1 2 3 4 5 6 7 8 9
CREATE TEMPORARY TABLE temp3 AS SELECT COURSE_CLASS_CODE,SPLIT(COURSE_CLASS_CODE, '\\.')[0] AS code,avg(score) AS avg1 from electives group by COURSE_CLASS_CODE;
SELECT t1.code,avg(avg1),t2.NAME FROM temp3 t1,course t2 WHERE t1.code=t2.CODE GROUP BY t1.code,t2.NAME;
方法二:
1 2 3 4 5 6 7
/*非严格模式*/ SELECT t2.NAME,ROUND(AVG(SCORE),2) AS avg_score FROM electives t1,course t2,course_class t3 WHERE t1.COURSE_CLASS_CODE = t3.CODE AND t2.CODE = t3.COURSE_CODE GROUP BY t2.NAME;
方法三:
1 2 3 4
SELECT t2.NAME,ROUND(AVG(SCORE),2)AS avg_score FROM electives t1 LEFT JOIN course_class t3 ON t1.COURSE_CLASS_CODE=t3.CODE LEFT JOIN course t2 ON t2.CODE=t3.COURSE_CODE GROUP BY t2.NAME;
方法四:
1 2 3 4 5 6 7
SELECT t1.COURSE_CLASS_CODE,t2.NAME,t1.avg_score FROM (SELECT COURSE_CLASS_CODE,ROUND(AVG(SCORE),2)AS avg_score FROM electives GROUP BY COURSE_CLASS_CODE)t1 LEFT JOIN course_class t3 ON t1.COURSE_CLASS_CODE=t3.CODE LEFT JOIN course t2 ON t2.CODE=t3.COURSE_CODE;
/*FULL OUTER,LEFT ,RIGHT,CROSS JOIN 都可以*/
d、有2门课不及格的学生(重修的学生)
方法一:
1 2 3 4 5 6 7 8 9
CREATE TEMPORARY TABLE temp4 AS SELECT STUDENT_ID,count(*) AS count1 FROM electives WHERE SCORE<60 GROUP BY STUDENT_ID;
SELECT t1.NAME,t2.count1 FROM students t1,temp4 t2 WHERE t1.ID=t2.STUDENT_ID AND count1>=2;/*有2门课不及格的学生(重修的学生) 所以是>=2*/
方法二:
1 2 3 4 5
SELECT t1.STUDENT_ID,t2.NAME,t1.score FROM (SELECT STUDENT_ID,SUM(IF(SCORE<60,1,0))AS score FROM electives GROUP BY STUDENT_ID HAVING score>=2)t1 LEFT JOIN student t2 ON t2.ID=t1.STUDENT_ID; /*姓名会显示失败,暂时没有解决*/
e、课程的及格率,同时要显示老师的名字(绩效考核)
方法一:
1 2 3 4 5 6 7 8 9 10 11
/*课程平均分绩效*/ CREATE TEMPORARY TABLE temp5 AS SELECT t1.COURSE_CLASS_CODE,t2.TEACHER_ID AS ID1,avg(t1.score) AS avg1 FROM electives t1,course_class t2 WHERE t1.COURSE_CLASS_CODE=t2.CODE GROUP BY t1.COURSE_CLASS_CODE,t2.TEACHER_ID ORDER BY t2.TEACHER_ID;
SELECT t1.COURSE_CLASS_CODE,t1.ID1,t2.NAME,t1.avg1 FROM temp5 t1,teacher t2 WHERE t1.ID1=t2.ID;
方法二:
1 2 3 4 5 6 7 8
SELECT t1.COURSE_CLASS_CODE,t1.per,t3.NAME FROM (SELECT COURSE_CLASS_CODE,ROUND(SUM(IF(SCORE>=60,1,0))/COUNT(*),4) AS per FROM electives GROUP BY COURSE_CLASS_CODE)t1 LEFT JOIN course_class t2 ON t1.COURSE_CLASS_CODE=t2.CODE LEFT JOIN teacher t3 on t2.teacher_id = ID ORDER BY t3.NAME;
f、老师教了几门课
1 2 3 4
SELECT t1.NAME AS TEACHER_NAME, COUNT(t2.COURSE_CODE) FROM teacher t1 JOIN course_class t2 ON t1.ID = t2.TEACHER_ID GROUP BY t1.NAME;