HIVE数据分析实验报告
信2105-3 20213728杨申龙
- 数据导入
将csv文件导入到虚拟机的hive数据库中,并通过datagrip软件实现对数据库的可视化操作
- 数据清洗
在datagrip中进行如下操作,将数值清洗为真实日期
insert
overwrite
table
test2
select
date_add(
'2021-09-00'
,
cast
(day_id
as
int
))
as
day_id,sale_nbr
as
sale_nbr,buy_nbr
as
buy_nbr,cnt
as
cnt,round
as
round
from
test;
3.
(1)统计每天各个机场的销售数量和销售金额
create table jieguo1(day_id string,sale_nbr string,
cnt string,round string)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
create table jieguo01(day_id string,sale_nbr string,
cnt string,round string)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
insert overwrite table jieguo1 select day_id as day_id ,sale_nbr as sale_nbr,sum(cnt) as cnt ,sum(round) as round from test2 where sale_nbr like 'C%' group by day_id,sale_nbr;
(2)统计每天各个代理商的销售数量和销售金额
create table jieguo02(day_id string,sale_nbr string,cnt string,round string)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
create table jieguo2(day_id string,sale_nbr string,
cnt string,round string)
ROW format delimited fields terminated by ',' STORED AS TEXTFILE;
insert overwrite table jieguo2 select day_id as day_id ,sale_nbr as sale_nbr,sum(cnt) as cnt ,sum(round) as round from test2 where sale_nbr like 'O%' group by day_id,sale_nbr;
(3)统计每天各个代理商的销售活跃度
create table jieguo03(day_id String,sale_nbr String,sale_number String) ROW format delimited fields terminated by ',';
insert overwrite table jieguo03 select day_id,sale_nbr,COUNT(*) as sale_number from sales3 group by day_id,sale_nbr having sale_nbr like 'O%';
(4)汇总统计9月1日到9月15日之间各个代理商的销售利润
create table daiout as select day_id as day_id,sale_nbr as sale_nbr,sum(cnt)as cnt,sum(round)as round from sales3 where sale_nbr like 'O%' group by day_id,sale_nbr;
create table daiin as select day_id as day_id,buy_nbr as buy_nbr,sum(cnt)as cnt,sum(round)as round from sales3 where buy_nbr like 'O%' group by day_id,buy_nbr;
create table daili as select daiin.day_id as day_id,daiout.sale_nbr as sale_nbr,daiin.cnt as incnt,daiin.round as inround,daiout.cnt as outcnt,daiout.round as outround,daiout.round-daiin.round as li from daiin join daiout on (daiin.buy_nbr=daiout.sale_nbr);
以上代码均在datagrip软件中实现
4、处理结果入库
用datagrip分别连接hive数据库和mysql数据库,将hive数据库中的表复制到mysql数据库中
5、数据可视化展示
利用echarts工具进行数据的图形化显示,java部分代码如下
DataDAO.java
package Database;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;
public class DataDAO {
private static final String JDBC_URL = "jdbc:mysql://localhost:3306/t";
private static final String JDBC_USER = "root";
private static final String JDBC_PASSWORD = "123456";
public List<DataPoint> getData() {
List<DataPoint> dataPoints = new ArrayList<>();
try {
Class.forName("com.mysql.cj.jdbc.Driver");
Connection connection = DriverManager.getConnection(JDBC_URL, JDBC_USER, JDBC_PASSWORD);
String query = "SELECT date, value FROM jieguo1";
PreparedStatement preparedStatement = connection.prepareStatement(query);
ResultSet resultSet = preparedStatement.executeQuery();
while (resultSet.next()) {
String date = resultSet.getString("date");
String value = resultSet.getString("value");
DataPoint dataPoint = new DataPoint(date, value);
dataPoints.add(dataPoint);
}
resultSet.close();
preparedStatement.close();
connection.close();
} catch (Exception e) {
e.printStackTrace();
}
return dataPoints;
}
}
DataPoint.java
package Database;
public class DataPoint {
private String date;
private String value;
public DataPoint(String date, String value) {
this.date = date;
this.value = value;
}
public String getDate() {
return date;
}
public String getValue() {
return value;
}
}
Chart.jsp
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
<%@ page import="Database.DataPoint" %>
<%@ page import="Database.DataDAO" %>
<%@ page import="java.util.List" %>
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>柱状图</title>
<!-- 引入ECharts库 -->
<script src="js/echarts.min.js"></script>
</head>
<body>
<!-- Create a container for the bar chart -->
<div id="barChart" style="width: 1000px; height: 600px;"></div>
<%
// 获取数据并转换为JSON格式
DataDAO dataDAO = new DataDAO();
List<DataPoint> dataPoints = dataDAO.getData();
String dataJSON = "[";
for (DataPoint dataPoint : dataPoints) {
// 在这里,将日期作为柱状图的横坐标,值作为柱状图的数据
dataJSON += "{'name': '" + dataPoint.getDate() + "', 'value': " + dataPoint.getValue() + "},";
}
dataJSON = dataJSON.substring(0, dataJSON.length() - 1) + "]";
%>
<script>
var data = <%= dataJSON %>;
// 使用ECharts绘制柱状图
var myChart = echarts.init(document.getElementById('barChart'));
var option = {
title: {
text: '柱状图'
},
xAxis: {
type: 'category',
data: data.map(item => item.name) // Use dates as category labels
},
yAxis: {
type: 'value',
name: 'Value'
},
tooltip: {
trigger: 'axis',
axisPointer: {
type: 'shadow'
},
formatter: '{b}: {c}'
},
series: [
{
name: '数据',
type: 'bar',
data: data.map(item => item.value) // Use values for the bar heights
}
]
};
myChart.setOption(option);
</script>
</body>
</html>
标签:数据分析,string,nbr,sale,hive,day,id,round From: https://www.cnblogs.com/ysl666/p/17728282.html