小时级实时数据仓库构建方法_A项目
项目描述
天亮舆情是一个简单易用的互联网舆论分析平台。通过对互联网常见的舆论信息传播媒介进行采集分析,提供了实时舆情、情感分析、地域统计及事件脉络等数据与分析能力,助力客户把握时事脉搏。用户只需设置关键词即可实现对全互联网的信息进行检测分析。通过该产品可以满足用户对网络舆情对网络舆情监测和热点事件专题追踪等需求!
项目背景
为了帮助客户全面掌握舆情动态,正确进行舆论引导,为确保我国互联网络大众媒体的舆论导向的正确性起到了一定的辅助作用,实现为政府分忧,对网络舆情进行监控和管理。用舆情系统,宣传部门可以有效的规范互联网信息,引导健康有益的舆论导向。系统对于促进加强互联网信息监管,组织力量展开信息整理和深入分析,应对网络突发的公共事件,全面掌握社情民意起决定性作用。
项目功能
- 将热点词汇以词云图的形式显示
功能亮点
- 通过nlp(nature language process)分词处理,将一条一条的博文拆解成一个一个的词序列(Term Sequene)。对词序列中的全部元素做词频统计,对词统统计结果做倒排输出,得到舆情热词。
- 使用echarts,对echarts进行二次开发,以此来展示出词云图
社会价值-企业价值
- 迅速捕捉社交网络内热词,可以作为新闻素材、视频推荐、商品推荐等依据。
梳理数据流
zip文件 (原始数据)
-> csv文件(原始数据解压后)(local本地)
-> weibo_origin
-> weibo_product
-> weibo_seg_result
-> weibo_seg_wc -> download_weibo_hot_words(hdfs文件)
-> download_weibo_hot_words(本地文件)
-> upload_to_mysql_table -> web可视化
停用词表提前建立和初始化 weibo_stopwords表
代码实现
zip原始数据批量解压
ls weibo/*.zip | xargs -n1 unzip -d weibo_text/
在hive中创建weibo_origin和weibo_product两张同构表
CREATE external TABLE weibo_origin(
mid string,
retweeted_status_mid string,
uid string,
retweeted_uid string,
source string,
image string,
text string,
geo string,
created_at string,
deleted_last_seen string,
permission_denied string
)
comment 'weibo content table'
partitioned by (day_seq string comment 'the day sequence')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
STORED AS textfile;
CREATE TABLE weibo_product(
mid string,
retweeted_status_mid string,
uid string,
retweeted_uid string,
source string,
image string,
text string,
geo string,
created_at string,
deleted_last_seen string,
permission_denied string
)
comment 'weibo content table'
partitioned by (day_seq string comment 'the day sequence')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
STORED AS orcfile;
数据按天分区载入weibo_origin表,创建脚本load_to_weibo_origin.sh
#! /bin/bash
#1、定义csv文件所在的目录
csv_root_dir_local=/home/zel/corpus_dw_hive/day_csv_data/
#2、定义csv上传到hdfs空间的目录
csv_root_dir_hdfs=/user/zel/csv_root_dir/
#3、获取csv的文件名称,作为分区表的分区字段值
csv_filename_list=`ls $csv_root_dir_local | cut -d . -f1`
#4、进行遍历csv_filename_list集合,逐个处理csv文件的上传
for filename in $csv_filename_list
do
echo $filename
#嵌入hdfs相关操作
#1、将本地csv上传到hdfs指定路径当中
hdfs dfs -copyFromLocal -f $csv_root_dir_local""$filename".csv" $csv_root_dir_hdfs
#2、将hdfs的csv文件加载到指定的hive表分区当中
hive -e "
use zel;
load data inpath '$csv_root_dir_hdfs$filename.csv' overwrite into table weibo_origin partition(day_seq='$filename');
"
#break
done
#脚本执行完成
//保证shell在断开客户端情况下,依然可以继续执行
nohup ./load_to_weibo_origin.sh &
数据检验与校正
- 校验导入数据的正确与否
- 样例数据查是否正常(limit m,查看数据样例数据有无明显异常)
- 数据量级是否相同(主要是指数据当量,十成级、百万级)
- 数据完整性是否一致(主要是指别丢数据)
- 数据格式与字段是否对齐
- 校正方法
- 将数据的导入方式与数据表的解析方式保持完全一致
- 修改weibo_origin表创建脚本
CREATE external TABLE weibo_origin(
mid string,
retweeted_status_mid string,
uid string,
retweeted_uid string,
source string,
image string,
text string,
geo string,
created_at string,
deleted_last_seen string,
permission_denied string
)
comment 'weibo content table'
partitioned by (day_seq string comment 'the day sequence')
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
STORED AS textfile;
将数据表weibo_origin经过清洗规则处理,按天分区载入weibo_product表
- 清洗逻辑
- 将每个表中的第1行字段名称行去掉,属于多余的
//一次查询多次插入
from weibo_origin
insert overwrite table weibo_product partition(day_seq) select * where mid!='mid';
分词UDF编写
- ansj分词器
<project xmlns="http://maven.apache.org/POM/4.0.0"; xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0http://maven.apache.org/xsd/maven-4.0.0.xsd";>
<modelVersion>4.0.0</modelVersion>
<!—- 我是谁-->
<groupId>com.tianliangedu.course</groupId>
<artifactId>TlHadoopCore</artifactId>
<version>0.0.1-SNAPSHOT</version>
<!-- 首先配置仓库的服务器位置,首选阿里云,也可以配置镜像方式,效果雷同 -->
<repositories>
<repository>
<id>nexus-aliyun</id>
<name>Nexus aliyun</name>
<url>http://maven.aliyun.com/nexus/content/groups/public</url>
</repository>
</repositories>
<dependencies>
<!-- 引入hadoop-cli-2.7.4依赖 -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.4</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-cli</artifactId>
<version>1.2.1</version>
<scope>provided</scope>
</dependency>
<!-- ansj依赖jar配置 ==start -->
<dependency>
<groupId>org.ansj</groupId>
<artifactId>ansj_seg</artifactId>
<version>5.1.1</version>
</dependency>
<!-- ansj依赖jar配置 ==end -->
</dependencies>
<!-—项目构建配置 -->
<build>
<finalName>TlHadoopCore</finalName>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>assembly</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
</plugins>
</build>
</project>
- 分词测试用例
import java.util.List;
import org.ansj.domain.Result;
import org.ansj.domain.Term;
import org.ansj.splitWord.analysis.NlpAnalysis;
public class TianLiangAnsjTest {
public static void main(String[] args) {
// 指定要进行分词的句子
String str = "马蓉王宝强事件";
// 采用nlp分词,具备(用户自定义词典/数字识别/人名识别/机构名识别/新词发现)功能
Result result = NlpAnalysis.parse(str);
// 将分词结果集合返回给变量itemList
List<Term> termList = result.getTerms();
//存储每次分词完成后的词序列集合,词之间以'\001'分隔
StringBuilder stringBuilder = new StringBuilder();
//循环记数器,当counter>0的时候,每次添加元素前先添加分隔符
int counter = 0;
//遍历集合,加入结果集中
for (Term term : termList) {
if (counter > 0) {
stringBuilder.append('\001');
}
//只要分词的名字结果,不要词性部分
stringBuilder.append(term.getName());
counter++;
}
//将最后的汇总结果输出
System.out.println(stringBuilder.toString());
}
}
ansj分词udf编码与测试
package com.tianliangedu.hive.udf;
import java.util.List;
import org.ansj.domain.Result;
import org.ansj.domain.Term;
import org.ansj.splitWord.analysis.NlpAnalysis;
import org.apache.hadoop.hive.ql.exec.UDF;
/*
* 功能:给定任意一个字符串,输出其分词结果
* 输入:"马蓉王宝强事件"
* 输出:"马蓉\001王宝强\001事件"
*/
public class CwsUDF extends UDF {
public String evaluate(String input) {
//如果输入为空,则直接返回空即可
if(input==null || input.trim().length()==0){
return null;
}
// 采用nlp分词,具备(用户自定义词典/数字识别/人名识别/机构名识别/新词发现)功能
Result result = NlpAnalysis.parse(input);
//如果处理结果为null,则直接返回一个null即可
if (result == null || result.getTerms() == null) {
return null;
}
// 将分词结果集合返回给变量itemList
List<Term> termList = result.getTerms();
// 存储每次分词完成后的词序列集合,词之间以'\001'分隔
StringBuilder stringBuilder = new StringBuilder();
// 循环记数器,当counter>0的时候,每次添加元素前先添加分隔符
int counter = 0;
// 遍历集合,加入结果集中
for (Term term : termList) {
if (counter > 0) {
stringBuilder.append('\001');
}
// 只要分词的名字结果,不要词性部分
stringBuilder.append(term.getName());
counter++;
}
return stringBuilder.toString();
}
public static void main(String[] args) {
System.out.println(new CwsUDF().evaluate("河北省石家庄市高新区万达广场"));
}
}
生成分词结果表
CREATE TABLE weibo_seg_result(
mid string,
retweeted_status_mid string,
uid string,
retweeted_uid string,
source string,
text string,
text_seg string,
geo string,
created_at string
)
comment 'weibo seg result table'
partitioned by (day_seq string comment 'the day sequence')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
STORED AS orcfile;
调用分词udf生成分词结果表
#! /bin/sh
#设置所使用数据库
db_name=tianliangedu
#设置jar包位置
jar_path="hdfs:///user/zel/jars/TlHadoopCore-jar-with-dependencies.jar"
#设置udf classpath
class_path="com.tianliangedu.hive.udf.CwsUDF"
#数据的来源表
from_table=weibo_product
#要生成的数据表
to_table=weibo_seg_result
#发起执行hql脚本
hive -e "
use $db_name;
add jar $jar_path;
create temporary function seg as '$class_path';
from $from_table
insert overwrite table $to_table partition(day_seq)
select mid,retweeted_status_mid,uid,retweeted_uid,source,text,seg(text) as text_seg,geo,created_at,day_seq;
- 后台执行脚本
nohup ./produce_weibo_seg_result.sh &
生成wordcount倒排表-按词频降序排列
- 无任何过滤直接生成倒排表-V0
CREATE TABLE weibo_seg_wc(
word string,
freq int
)
comment 'weibo seg wc'
partitioned by (day_seq string comment 'the day sequence')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
STORED AS orcfile;
生成wordcount倒排表-先生成一个分区做测试
insert overwrite table weibo_seg_wc partition(day_seq)
select word,count(1) as freq,day_seq from weibo_seg_result lateral view explode(split(text_seg,'\001')) word_table as word where day_seq='20120102' and text_seg is not null group by day_seq,word order by freq desc;
优化生成倒排表-加入按word长度做过滤-V1
insert overwrite table weibo_seg_wc partition(day_seq)
select word,count(1) as freq,day_seq from weibo_seg_result lateral view explode(split(text_seg,'\001')) word_table as word where day_seq='20120102' and text_seg is not null and length(word)>1 group by day_seq,word order by freq desc;
优化生成倒排表-加入按词性过滤-V2
package com.tianliangedu.hive.udf;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.ansj.domain.Result;
import org.ansj.domain.Term;
import org.ansj.splitWord.analysis.NlpAnalysis;
import org.apache.hadoop.hive.ql.exec.UDF;
/*
* 功能:给定任意一个字符串,输出其分词结果
* 输入:"马蓉王宝强事件"
* 输出:"马蓉\001王宝强\001\事件"
*/
public class CwsV2UDF extends UDF {
// 用于标记用于去重的natureSet是否已经初始化的标志位
public boolean natureSetHaveInitFlag = false;
// 用于词性过滤的set集合初始化
public Set<String> natureSet = new HashSet<String>();
public String evaluate(String input, String natureStr) {
// 如果输入为空,则直接返回空即可
if (input == null || input.trim().length() == 0) {
return null;
}
//白名单的Set集合初始化,只需要初始化一次即可反复使用
if (!natureSetHaveInitFlag && natureStr != null) {
String[] natureArray = natureStr.split(",");
for (String nature : natureArray) {
natureSet.add(nature);
}
natureSetHaveInitFlag = true;
}
// 采用nlp分词,具备(用户自定义词典/数字识别/人名识别/机构名识别/新词发现)功能
Result result = NlpAnalysis.parse(input);
// 如果处理结果为null,则直接返回一个null即可
if (result == null || result.getTerms() == null) {
return null;
}
// 将分词结果集合返回给变量itemList
List<Term> termList = result.getTerms();
// 存储每次分词完成后的词序列集合,词之间以'\001'分隔
StringBuilder stringBuilder = new StringBuilder();
// 循环记数器,当counter>0的时候,每次添加元素前先添加分隔符
int counter = 0;
// 遍历集合,加入结果集中
for (Term term : termList) {
// 判断分词的Term的词性是否包含在词性的白名单中,如果在则加入,否则忽略掉
if (natureSet.contains(term.getNatureStr())) {
if (counter > 0) {
stringBuilder.append('\001');
}
stringBuilder.append(term.getName());
counter++;
}
}
return stringBuilder.toString();
}
public static void main(String[] args) {
String natureList = "n,nr,nr1,nr2,nrj,nrf,ns,nsf,nt,nz,nl,ng,nw,v,vd,vn,vf,vx,vi,vl,vg,a,ad,an,ag,al";
System.out.println(new CwsV2UDF()
.evaluate("河北省石家庄市高新区万达广场,好美啊", natureList));
}
}
更新脚本
#! /bin/sh
#设置所使用数据库
db_name=tianliangedu
#设置jar包位置
jar_path=hdfs:///user/zel/jars/TlHadoopCore-jar-with-dependencies.jar
#设置udf classpath
class_path="com.tianliangedu.hive.udf.CwsV2UDF"
#数据的来源表
from_table=weibo_product
#要生成的数据表
to_table=weibo_seg_result
#初始化nature白名单
nature_list='n,nr,nr1,nr2,nrj,nrf,ns,nsf,nt,nz,nl,ng,nw,v,vd,vn,vf,vx,vi,vl,vg,a,ad,an,ag,al'
#发起执行hql脚本
hive -e "
use $db_name;
add jar $jar_path;
create temporary function seg as '$class_path';
from (select * from $from_table where day_seq='20120102') temp
insert overwrite table $to_table partition(day_seq)
select mid,retweeted_status_mid,uid,retweeted_uid,source,text,seg(text,'$nature_list') as text_seg,geo,created_at,day_seq;
insert overwrite table weibo_seg_wc partition(day_seq)
select word,count(1) as freq,day_seq from weibo_seg_result lateral view explode(split(text_seg,'\001')) word_table as word where day_seq='20120102' and text_seg is not null and length(word)>1 group by day_seq,word order by freq desc;
优化生成倒排表-优化加入按词性过滤-V3
- 更新词性白名单列表
#! /bin/sh
#设置所使用数据库
db_name=tianliangedu
#设置jar包位置
jar_path=hdfs:///user/zel/jars/TlHadoopCore-jar-with-dependencies.jar
#设置udf classpath
class_path="com.tianliangedu.hive.udf.CwsV2UDF"
#数据的来源表
from_table=weibo_product
#要生成的数据表
to_table=weibo_seg_result
#初始化nature白名单
nature_list='n,nr,nr1,nr2,nrj,nrf,ns,nsf,nt,nz,nl,ng,nw'
#发起执行hql脚本
hive -e "
use $db_name;
add jar $jar_path;
create temporary function seg as '$class_path';
from (select * from $from_table where day_seq='20120102') temp
insert overwrite table $to_table partition(day_seq)
select mid,retweeted_status_mid,uid,retweeted_uid,source,text,seg(text,'$nature_list') as text_seg,geo,created_at,day_seq;
insert overwrite table weibo_seg_wc partition(day_seq)
select word,count(1) as freq,day_seq from weibo_seg_result lateral view explode(split(text_seg,'\001')) word_table as word where day_seq='20120102' and text_seg is not null and length(word)>1 group by day_seq,word order by freq desc;
优化生成倒排表-加入热词黑名单-V4
- 创建停用词表
CREATE external TABLE weibo_stopwords(
word string
)
comment 'weibo stopwords'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
STORED AS textfile;
- 将停用词集合加入到停用词表
LOAD DATA LOCAL INPATH '/home/hive/corpus/stopwords/*' OVERWRITE INTO TABLE weibo_stopwords;
- 在生成词频倒排表时,过滤停用词表
insert overwrite table weibo_seg_wc partition(day_seq)
select main.word,freq,day_seq from
(select word,count(1) as freq,day_seq from weibo_seg_result lateral view explode(split(text_seg,'\001')) word_table as word where day_seq='20120102' and text_seg is not null and length(word)>1 group by day_seq,word order by freq desc) main
left join (select word from weibo_stopwords) filter
on main.word=filter.word
where filter.word is null
将hive表推送到mysql表中
将hive表数据生成到文件中
insert overwrite directory "/user/zel/corpus/hot_words"
row format delimited fields terminated by '\t'
select * from weibo_seg_wc where day_seq='20120102' order by freq desc limit 100;
拥有一个mysql库,以及相应的读写权限(经典的多用户共享使用关系型数据库操作方法)
通过root帐户创建相应用户,并赋于相应的权限(一般是针对每个用户分配一个数据库,与之相对应,使用户之间互相不影响)
- mysql -uroot -p登陆进去。
- 创建新用户:CREATE USER 'name' IDENTIFIED BY 'password';
- create database test DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
- 为数据库操作赋权-可以从本地访问mysql: GRANT ALL ON database.*TO'username'@'localhost' IDENTIFIED BY 'password';
- 为数据库操作赋权-可以从任意远程访问mysql: GRANT ALL ON database.* TO'username'@'%' IDENTIFIED BY 'password';
- flush privileges ( 重启mysqld服务也可以 );
在mysql中创建词频表weibo_hot_words
CREATE TABLE `weibo_hot_words` (
`id` int UNSIGNED NOT NULL AUTO_INCREMENT,
`word` varchar(255) DEFAULT NULL,
`freq` int DEFAULT NULL,
`day_seq` int DEFAULT NULL,
PRIMARY KEY (id)
) ENGINE=MyISAM DEFAULT CHARSET=utf8
执行mysql命令将weibo_hot_result.txt导入到weibo_hot_result表中(synchronized_data_to_mysql.sh)
#! /bin/sh
mysql –h host -uroot -p******** -e "
use yuqing_oncourse;
LOAD DATA LOCAL INFILE './hot_words/000000_0' INTO TABLE weibo_hot_words FIELDS TERMINATED BY '\t' (word,freq,day_seq);
web展示
- entity层
package com.tledu.springboot.springboot01.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class WeiboHotWord {
private int id;
private String word;
private int freq;
private int daySeq;
}
- mapper层
package com.tledu.springboot.springboot01.mapper;
import com.tledu.springboot.springboot01.model.WeiboHotWord;
import com.tledu.springboot.springboot01.model.sourceo;
import org.apache.ibatis.annotations.Mapper;
import java.util.List;
@Mapper
public interface WeiboHotWordMapper {
List<WeiboHotWord> sel();
List<WeiboHotWord> sel1(String yearNum);
List<WeiboHotWord> sele();
List<String> selo();
List<sourceo> sel5();
}
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<!--
namespace 是用于调用的时候的映射
-->
<mapper namespace="com.tledu.springboot.springboot01.mapper.WeiboHotWordMapper">
<!--
id : 表示调用的SQL的名字,相当于方法名
parameterType : 表示传入参数的类型,写类全名,但是由于设置的别名,所以可以写User
resultType : 结果集类型
-->
<select id="sel" resultType="com.tledu.springboot.springboot01.model.WeiboHotWord" >
select * from cc_weibo_hot_words order by freq desc limit 100;
</select>
<select id="sel1" parameterType="string" resultType="com.tledu.springboot.springboot01.model.WeiboHotWord" >
select * from cc_weibo_hot_words where day_seq=#{yearNum};
</select>
<select id="sele" resultType="com.tledu.springboot.springboot01.model.WeiboHotWord">
select day_seq from cc_weibo_hot_words group by day_seq;
</select>
<select id="selo" resultType="String">
select sum(sum) from cc_weibo_source;
</select>
<select id="sel5" resultType="com.tledu.springboot.springboot01.model.sourceo">
select * from cc_weibo_source limit 5;
</select>
</mapper>
- dao层
package com.tledu.springboot.springboot01.dao;
import com.tledu.springboot.springboot01.model.WeiboHotWord;
import com.tledu.springboot.springboot01.model.sourceo;
import org.springframework.stereotype.Repository;
import javax.xml.transform.Source;
import java.util.List;
@Repository
public interface WeiboHotWordDao {
public List<WeiboHotWord> sel();
public List<WeiboHotWord> sel1(String yearNum);
public List<WeiboHotWord> sele();
public List<sourceo> sel5();
}
package com.tledu.springboot.springboot01.dao.impl;
import com.tledu.springboot.springboot01.dao.WeiboHotWordDao;
import com.tledu.springboot.springboot01.mapper.WeiboHotWordMapper;
import com.tledu.springboot.springboot01.model.*;
import lombok.AllArgsConstructor;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Repository;
import java.util.List;
@Repository
@AllArgsConstructor
public class WeiboHotWordDaoImpl implements WeiboHotWordDao {
private WeiboHotWordMapper weiboHotWordMapper;
@Override
public List<WeiboHotWord> sel() {
// String sql="select * from cc_weibo_hot_words";
List<WeiboHotWord> wordList= weiboHotWordMapper.sel();
return wordList;
}
@Override
public List<WeiboHotWord> sel1(String yearNum) {
// String sql="select * from cc_weibo_hot_words";
List<WeiboHotWord> wordList= weiboHotWordMapper.sel1(yearNum);
return wordList;
}
@Override
public List<WeiboHotWord> sele() {
List<WeiboHotWord> list= weiboHotWordMapper.sele();
return list;
}
@Override
public List<sourceo> sel5() {
List<String> list1 = weiboHotWordMapper.selo();
List<sourceo> list2 = weiboHotWordMapper.sel5();
int num1=0;
int num2=0;
num2=Integer.valueOf(list1.get(0));
for (sourceo so:list2) {
num1+=Integer.valueOf(so.getSum());
}
num1=num2-num1;
sourceo s=new sourceo();
s.setSource("其他");
s.setSum(String.valueOf(num1));
list2.add(s);
return list2;
}
}
- controller层
package com.tledu.springboot.springboot01.controller;
import com.tledu.springboot.springboot01.dao.WeiboHotWordDao;
import lombok.AllArgsConstructor;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.ui.ModelMap;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
@Controller
@AllArgsConstructor
@RequestMapping("/words")
public class WeiboHotWordController {
@Autowired
private WeiboHotWordDao weiboHotWordDao;
@GetMapping("/hot")
public String index(ModelMap modelMap,String daySeq) {
System.out.println("1");
modelMap.put("yearlist",weiboHotWordDao.sele());
if (daySeq=="" || daySeq==null){
modelMap.put("list", weiboHotWordDao.sel());
}else {
modelMap.put("list", weiboHotWordDao.sel1(daySeq));
}
return "words";
}
@GetMapping("/pie")
public String index1(ModelMap modelMap) {
System.out.println("1");
modelMap.put("list",weiboHotWordDao.sel5());
return "pie";
}
}
- web前端页面
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>微博2012热词云图</title>
<script src="js/echarts.min.js"></script>
<script src="js/jquery-3.5.1.js"></script>
<script src="js/echarts-wordcloud.js"></script>
</head>
<body>
<input type="hidden" id="yearid" name="yearid" value="">
<select id="yearnum" name="yearnum">
<option value="20120102">2012-01-02</option>
<option value="20120103">2012-01-03</option>
<option value="20120103">2012-01-04</option>
</select>
<div id="main" style="width: 800px;height: 800px; "></div>
<script type="text/javascript">
var mycharts = echarts.init(document.getElementById("main"));
var jsonlist = [];
$(function (){
$.ajax({
type:"GET",
url:"/sysUser/roles",
data:"",
dataType:"json",
success:function(vo){
let list=vo.list;
for(let i=0;i<list.length;i++){
let id=list[i].id;
let roleName = list[i].roleName;
$("#yearnum").append('option value="'+ id +'">'+roleName+'</option>');
}
}
});
$("#yearnum").change(function(){
var roleId=$("#yearnum option:selected").val();
$("#yearid").val(yearid);
})
})
//用来存储数据
$.ajax({
type:"post",
url:"/word",
dataType:"json",
async:false,
success:function (result) {
for (var i = 0; i < result.length; i++){
jsonlist.push(
{
"name": result[i].word,
"value": result[i].freq,
},
);
}
},
error :function(errorMsg) {
alert("获取后台数据失败!");
}
});
// 人像的base64编码
// image1= ""
image1= ""
var maskResource = new Image()
maskResource.src=image1;
var option ={
//设置标题,居中显示
title:{
text: '微博2012热词云图',
left:'center',
},
//数据可以点击
tooltip:{
},
series:[
{
maskImage:maskResource,
//词的类型
type: 'wordCloud',
//设置字符大小范围
sizeRange:[6,78],
rotationRange:[-45,90],
textStyle: {
normal:{
//生成随机的字体颜色
color:function () {
return 'rgb(' + [
Math.round(Math.random() * 160),
Math.round(Math.random() * 160),
Math.round(Math.random() * 160)
].join(',')+')';
}
}
},
//不要忘记调用数据
data:jsonlist
}
]
};
//加载图像,将数据放在图像中
maskResource.onload = function(){
mycharts.setOption(option)
};
</script>
</body>
</html>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>词云图</title>
<script th:src="@{/js/echarts.min.js}"></script>
<script th:src="@{/js/echarts-wordcloud.min.js}"></script>
<style>
#main {
height: 500px;
}
</style>
</head>
<body>
<h1>热点词云图</h1>
<form>
<select name="daySeq">
<option th:each="item:${yearlist}" th:value="${item.daySeq}" th:text="${item.daySeq}"></option>
</select>
<button onclick="sou()" >搜索</button>
</form>
<!--<form>-->
<!-- <select>-->
<!--<!– <option value="" selected="selected">全部</option>–>-->
<!-- <option th:each="item:${yearlist}" th:value="${item.daySeq}" th:text="${item.daySeq}"></option>-->
<!-- </select>-->
<!-- <button onclick="sou()">搜索</button>-->
<!--</form>-->
<div id="main"></div>
<script th:inline="javascript">
var list = [[${list}]]
// 学过map这个api吗?基于当前的数组生成一个新的数组
// console.log(list.map(function (item) {
// return {name: item.word, value: item.freq}
// }))
// 学过箭头函数吗
// console.log(list.map((item) => {
// return {name: item.word, value: item.freq}
// }))
// 箭头函数的简写
// console.log(list.map(item => ({name:item.word,value:item.freq})))
var chartDom = document.getElementById('main');
var myChart = echarts.init(chartDom);
var option = {
tooltip: {},
series: [{
type: 'wordCloud',
gridSize: 2,
sizeRange: [12, 50],
rotationRange: [-90, 90],
shape: 'pentagon',
width: 600,
height: 400,
drawOutOfBound: true,
textStyle: {
color: function () {
return 'rgb(' + [
Math.round(Math.random() * 160),
Math.round(Math.random() * 160),
Math.round(Math.random() * 160)
].join(',') + ')';
}
},
emphasis: {
textStyle: {
shadowBlur: 10,
shadowColor: '#333'
}
},
data: list.map(item => ({name:item.word,value:item.freq}))
}]
};
myChart.setOption(option);
</script>
</body>
</html>
标签:weibo,string,seq,离线,云图,seg,及词,import,day
From: https://blog.51cto.com/u_15806490/7132985