Apache Griffin 编译安装
1. 环境准备
-
Maven(Apache Maven 3.6.3)
-
Mysql 数据库 (可以是 PostgreSQL,mysql版本5.7 )
-
npm(版本6.14.6)(version 6.0.0+,用于编译ui模块)(推荐此版本)
-
Scala (版本2.11.8)
-
Hadoop (版本3.0.0或更高版本) (本地:2.6.0)
-
Hive (版本2.1.1)(本地:1.1.0)
-
Spark (版本2.4.0) (本地:2.4.0)
-
Livy (版本0.5.0)。 (推荐此版本)
-
ElasticSearch(版本5.0或更高版本)
-
Zookeeper (版本3.4.5)
2. 下载源码包
下载: wget https://github.com/apache/griffin/archive/griffin-0.5.0.tar.gz
解压: tar -zxf griffin-0.5.0.tar.gz -C /opt/software/
3. 修改配置文件
3.1 配置MySQL
mysql -uroot -e "create database quartz" -p123456 -hhadoop-node3
3.2 配置环境变量
vim /etc/profile
export HADOOP_HOME=/opt/cloudera/parcels/CDH/lib/hadoop
export HADOOP_COMMON_HOME=/opt/cloudera/parcels/CDH/lib/hadoop
export HADOOP_COMMON_LIB_NATIVE_DIR=/opt/cloudera/parcels/CDH/lib/hadoop/lib/native
export HADOOP_HDFS_HOME=/opt/cloudera/parcels/CDH/lib/hadoop-hdfs
export HADOOP_INSTALL=/opt/cloudera/parcels/CDH/lib/hadoop
export HADOOP_MAPRED_HOME=/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce
export HADOOP_USER_CLASSPATH_FIRST=true
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_HOME=/opt/cloudera/parcels/SPARK2/lib/spark2
export LIVY_HOME=/opt/software/livy-0.5.0
export HIVE_HOME=/opt/cloudera/parcels/CDH/lib/hive
export YARN_HOME=/opt/cloudera/parcels/CDH/lib/hadoop-yarn
export SCALA_HOME=/opt/software/scala-2.11.12
export PATH=$PATH:$HIVE_HOME/bin:$HADOOP_HOME/bin:$SPARK_HOME/bin:$LIVY_HOME/bin:$SCALA_HOME/bin
export GRIFFIN_HOME=/opt/software/griffin-0.5.0/
export PATH=$PATH:$GRIFFIN_HOME/bin
source /etc/profile
3.3 Hive 配置
# 将 hive-site.xml 上传至 hdfs
sudo -u hdfs hadoop fs -mkdir -p /home/spark_conf
sudo -u hdfs hadoop fs -put /opt/cloudera/parcels/CDH/lib/hive/conf/hive-site.xml /home/spark_conf
useradd griffin
sudo -u hdfs hadoop fs -chown -R griffin /home/spark_conf
3.4 配置 Griffin 的 配置文件 application.properties
vim /opt/software/griffin-0.5.0/service/src/main/resources/application.properties
spring.application.name=griffin_service
server.port=8081
spring.datasource.url=jdbc:mysql://hadoop-node3:3306/quartz?autoReconnect=true&useSSL=false
spring.datasource.username=root
spring.datasource.password=123456
spring.jpa.generate-ddl=true
spring.datasource.driver-class-name=com.mysql.jdbc.Driver
spring.jpa.show-sql=true
# Hive metastore
hive.metastore.uris=thrift://node01:9083
hive.metastore.dbname=hive
hive.hmshandler.retry.attempts=15
hive.hmshandler.retry.interval=2000ms
# Hive cache time
cache.evict.hive.fixedRate.in.milliseconds=900000
# Kafka schema registry
kafka.schema.registry.url=http://localhost:8081
# Update job instance state at regular intervals
jobInstance.fixedDelay.in.milliseconds=60000
# Expired time of job instance which is 7 days that is 604800000 milliseconds.Time unit only supports milliseconds
jobInstance.expired.milliseconds=604800000
# schedule predicate job every 5 minutes and repeat 12 times at most
#interval time unit s:second m:minute h:hour d:day,only support these four units
predicate.job.interval=5m
predicate.job.repeat.count=12
# external properties directory location
external.config.location=
# external BATCH or STREAMING env
external.env.location=
# login strategy ("default" or "ldap")
login.strategy=default
# ldap
ldap.url=ldap://hostname:port
ldap.email=@example.com
ldap.searchBase=DC=org,DC=example
ldap.searchPattern=(sAMAccountName={0})
# hdfs default name
fs.defaultFS=hdfs://node01:8020
# elasticsearch
elasticsearch.host=node01
elasticsearch.port=9200
elasticsearch.scheme=http
# elasticsearch.user = user
# elasticsearch.password = password
# livy
livy.uri=http://node01:8998/batches
livy.need.queue=false
livy.task.max.concurrent.count=20
livy.task.submit.interval.second=3
livy.task.appId.retry.count=3
# yarn url
yarn.uri=http://node01:8088
# griffin event listener
internal.event.listeners=GriffinJobEventHook
3.5 配置 Griffin 的 quartz.properties
vim /opt/software/griffin-0.5.0/service/src/main/resources/quartz.properties
org.quartz.scheduler.instanceName=spring-boot-quartz
org.quartz.scheduler.instanceId=AUTO
org.quartz.threadPool.threadCount=5
org.quartz.jobStore.class=org.quartz.impl.jdbcjobstore.JobStoreTX
# If you use postgresql as your database,set this property value to org.quartz.impl.jdbcjobstore.PostgreSQLDelegate
# If you use mysql as your database,set this property value to org.quartz.impl.jdbcjobstore.StdJDBCDelegate
# If you use h2 as your database, it's ok to set this property value to StdJDBCDelegate, PostgreSQLDelegate or others
#org.quartz.jobStore.driverDelegateClass=org.quartz.impl.jdbcjobstore.PostgreSQLDelegate
org.quartz.jobStore.driverDelegateClass=org.quartz.impl.jdbcjobstore.StdJDBCDelegate
org.quartz.jobStore.useProperties=true
org.quartz.jobStore.misfireThreshold=60000
org.quartz.jobStore.tablePrefix=QRTZ_
org.quartz.jobStore.isClustered=true
org.quartz.jobStore.clusterCheckinInterval=20000
3.6 配置 Griffin 的 sparkProperties.json
vim /opt/software/griffin-0.5.0/service/src/main/resources/sparkProperties.json
{
"file": "hdfs:///griffin/griffin-measure.jar",
"className": "org.apache.griffin.measure.Application",
"name": "griffin",
"queue": "default",
"numExecutors": 2,
"executorCores": 1,
"driverMemory": "1g",
"executorMemory": "1g",
"conf": {
"spark.yarn.dist.files": "hdfs:///home/spark_conf/hive-site.xml"
},
"files": [
]
}
3.7 配置 Griffin 的 env_batch.json
vim /opt/software/griffin-0.5.0/service/src/main/resources/env/env_batch.json
{
"spark": {
"log.level": "WARN"
},
"sinks": [
{
"type": "CONSOLE",
"config": {
"max.log.lines": 10
}
},
{
"type": "HDFS",
"config": {
"path": "hdfs:///griffin/persist",
"max.persist.lines": 10000,
"max.lines.per.file": 10000
}
},
{
"type": "ELASTICSEARCH",
"config": {
"method": "post",
"api": "http://node01:9200/griffin/accuracy",
"connection.timeout": "1m",
"retry": 10
}
}
],
"griffin.checkpoint": []
}
3.8 配置 Griffin 的 env_streaming.json
vim /opt/software/griffin-0.5.0/service/src/main/resources/env/env_streaming.json
{
"spark": {
"log.level": "WARN",
"checkpoint.dir": "hdfs:///griffin/checkpoint/${JOB_NAME}",
"init.clear": true,
"batch.interval": "1m",
"process.interval": "5m",
"config": {
"spark.default.parallelism": 4,
"spark.task.maxFailures": 5,
"spark.streaming.kafkaMaxRatePerPartition": 1000,
"spark.streaming.concurrentJobs": 4,
"spark.yarn.maxAppAttempts": 5,
"spark.yarn.am.attemptFailuresValidityInterval": "1h",
"spark.yarn.max.executor.failures": 120,
"spark.yarn.executor.failuresValidityInterval": "1h",
"spark.hadoop.fs.hdfs.impl.disable.cache": true
}
},
"sinks": [
{
"type": "CONSOLE",
"config": {
"max.log.lines": 100
}
},
{
"type": "HDFS",
"config": {
"path": "hdfs:///griffin/persist",
"max.persist.lines": 10000,
"max.lines.per.file": 10000
}
},
{
"type": "ELASTICSEARCH",
"config": {
"method": "post",
"api": "http://node01:9200/griffin/accuracy"
}
}
],
"griffin.checkpoint": [
{
"type": "zk",
"config": {
"hosts": "node01:2181,node02:2181,node03:2181,node04:2181,node05:2181",
"namespace": "griffin/infocache",
"lock.path": "lock",
"mode": "persist",
"init.clear": true,
"close.clear": false
}
}
]
}
3.9 Elasticsearch设置
这里提前在Elasticsearch设置索引,以便将分片数,副本数和其他设置配置为所需的值:
curl -k -H "Content-Type: application/json" -X PUT http://localhost:9200/griffin?pretty \
-d '{
"aliases": {},
"mappings": {
"accuracy": {
"properties": {
"name": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
},
"tmst": {
"type": "date"
}
}
}
},
"settings": {
"index": {
"number_of_replicas": "0",
"number_of_shards": "1"
}
}
}'
# 看见下面结果即为创建成功
{
"acknowledged":true,
"shards_acknowledged":true,
"index":"griffin"
}
3.10 驱动包的依赖修改,修改 service/pom.xml,取消 注释
vim /opt/software/griffin-0.5.0/service/pom.xml
<!-- 114-118行-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql.java.version}</version>
</dependency>
3.11 配置 Griffin 的measure的env-batch.json
vim /opt/software/griffin-0.5.0/measure/src/main/resources/env-batch.json
{
"spark": {
"log.level": "WARN",
"config": {
"spark.master": "local[*]"
}
},
"sinks": [
{
"type": "CONSOLE",
"config": {
"max.log.lines": 10
}
},
{
"type": "HDFS",
"config": {
"path": "hdfs:///griffin/batch/persist",
"max.persist.lines": 10000,
"max.lines.per.file": 10000
}
},
{
"type": "ELASTICSEARCH",
"config": {
"method": "post",
"api": "http://node01:9200/griffin/accuracy",
"connection.timeout": "1m",
"retry": 10
}
}
],
"griffin.checkpoint": []
}
3.12 配置 Griffin 的measure的env-streaming.json
vim /opt/software/griffin-0.5.0/measure/src/main/resources/env-streaming.json
{
"spark": {
"log.level": "WARN",
"checkpoint.dir": "hdfs:///griffin/cp",
"batch.interval": "2s",
"process.interval": "10s",
"init.clear": true,
"config": {
"spark.master": "local[*]",
"spark.task.maxFailures": 5,
"spark.streaming.kafkaMaxRatePerPartition": 1000,
"spark.streaming.concurrentJobs": 4,
"spark.yarn.maxAppAttempts": 5,
"spark.yarn.am.attemptFailuresValidityInterval": "1h",
"spark.yarn.max.executor.failures": 120,
"spark.yarn.executor.failuresValidityInterval": "1h",
"spark.hadoop.fs.hdfs.impl.disable.cache": true
}
},
"sinks": [
{
"type": "CONSOLE",
"config": {
"max.log.lines": 100
}
},
{
"type": "HDFS",
"config": {
"path": "hdfs:///griffin/streaming/persist",
"max.persist.lines": 10000,
"max.lines.per.file": 10000
}
},
{
"type": "ELASTICSEARCH",
"config": {
"method": "post",
"api": "http://node01:9200/griffin/accuracy"
}
}
],
"griffin.checkpoint": [
{
"type": "zk",
"config": {
"hosts": "node01:2181,node02:2181,node03:2181,node04:2181,node05:2181",
"namespace": "griffin/infocache",
"lock.path": "lock",
"mode": "persist",
"init.clear": true,
"close.clear": false
}
}
]
}
~
3.13 新建 hdfs 目录
sudo -u hdfs hadoop fs -mkdir -p /griffin
sudo -u hdfs hdfs dfs -chown -R griffin:griffin /griffin
sudo -u griffin hadoop fs -mkdir -p /griffin/persist
sudo -u griffin hadoop fs -mkdir -p /griffin/checkpoint
sudo -u griffin hadoop fs -mkdir -p /griffin/cp
sudo -u griffin hadoop fs -mkdir -p /griffin/streaming/dump/source
sudo -u griffin hadoop fs -mkdir -p /griffin/streaming/persist
4. 编译
4.1 第一步:编译 measure
cd /opt/software/griffin-0.5.0/measure/ && mvn -Dmaven.test.skip=true clean install
4.1.1 修改 部分 spark 读取 hive 的源码
# 解决不支持读取hive分区样式 为 “YYYY-MM-dd” 类型的数据表,(默认仅支持 YYYYMMdd等不带特殊字符的分区)
vim /opt/software/griffin-0.5.0/measure/src/main/scala/org/apache/griffin/
measure/datasource/connector/batch/HiveBatchDataConnector.scala
private def dataSql(): String = {
val tableClause = s"SELECT * FROM ${concreteTableName}"
if (wheres.length > 0) {
val clauses = wheres.map { w =>
s"${tableClause} WHERE ${w}"
}
clauses.mkString(" UNION ALL ")
} else tableClause
}
// 替换为下面内容
private def dataSql(): String = {
val tableClause = s"SELECT * FROM ${concreteTableName}"
if (wheres.length > 0) {
val clauses = wheres.map { item => {
val sw: Array[String] = item.split("and|AND").map(_.trim).filter(_.nonEmpty)
val whereList = sw.map{
w => {
val nw = w.split("=").map(_.trim)
val new_w1 = s"${nw(0)} = '${nw(1)}'"
new_w1
}
}
val new_w = whereList.mkString(" AND ")
s"${tableClause} WHERE ${new_w}"
}
}
clauses.mkString(" UNION ALL ")
} else tableClause
}
4.2 第二步:编译service
cd /opt/software/griffin-0.5.0/service/ && mvn -Dmaven.test.skip=true clean install
4.3 第三步:编译ui
# 修改 /opt/software/griffin-0.5.0/ui/angular/src/environments/environment.ts
export const environment = {
production: false,
BACKEND_SERVER: 'http://node01:8081',
};
cd /opt/software/griffin-0.5.0/ui/ && mvn -Dmaven.test.skip=true clean install
# 常见问题:
### --- 编译报错解决方案:编译报错:
~~~ 这个文件在编译之前是没有的
[root@hadoop02 griffin-0.5.0]# mvn -Dmaven.test.skip=true clean install
~~~ # 编译报错
[ERROR] Failed to execute goal com.github.eirslett:frontend-maven-plugin:1.6:npm (npm build) on project ui: Failed to run task: 'npm run build' failed. org.apache.commons.exec.ExecuteException: Process exited with an error: 1 (Exit value: 1) -> [Help 1]
[ERROR] ERROR in /opt/yanqi/servers/griffin-0.5.0/ui/angular/node_modules/@types/jquery/JQuery.d.ts (4137,26): Cannot find name 'SVGElementTagNameMap'. [ERROR] ERROR in /opt/yanqi/servers/griffin-0.5.0/ui/angular/node_modules/@types/jquery/JQuery.d.ts (4137,89): Cannot find name 'SVGElementTagNameMap'.
# 解决方案
### --- 解决方案
[root@hadoop02 ~]# vim /opt/yanqi/servers/griffin-0.5.0/ui/angular/node_modules/@types/jquery/JQuery.d.ts
~~~ # 删除4137行内容,8705行也删掉
4137 <!-- find<K extends keyof SVGElementTagNameMap>(selector_element: K | JQuery<K>): JQuery<SVGElementTagNameMap[K]>; -->
8705 <!-- parents<K extends keyof SVGElementTagNameMap>(selector: K | JQuery<K>): JQuery<SVGElementTagNameMap[K]>; -->
5. 安装
5.1 将 measure-0.5.0.jar 这个jar上传到HDFS的/griffin
mv /opt/software/griffin-0.5.0/measure/target/measure-0.5.0.jar /opt/software/griffin-0.5.0/griffin-measure.jar
# 因为spark在yarn集群上执行任务时,需要到HDFS的/griffin目录下加载griffin-measure.jar
sudo -u hdfs hadoop fs -put /opt/software/griffin-0.5.0/griffin-measure.jar /griffin/
sudo -u hdfs hdfs dfs -chown -R griffin:griffin /griffin
5.2 启动service.jar,运行Griffin管理服务
mv /opt/software/griffin-0.5.0/service/target/service-0.5.0.jar /opt/software/griffin-0.5.0/griffin-service.jar
nohup java -jar $GRIFFIN_HOME/griffin-service.jar>$GRIFFIN_HOME/service.out 2>&1 &
# 启动之后我们可以查看启动日志,如果日志中没有错误,则启动成功,
tail -f $GRIFFIN_HOME/service.out
5.3 启动前端
cd /opt/software/griffin-0.5.0/ui/angular/
node_modules/.bin/ng serve -host cdh04 -port 8081
# 访问地址,默认用户: admin 密码 为空
# http://localhost:8081
标签:opt,hdfs,Griffin,0.5,griffin,编译,spark,安装,software
From: https://www.cnblogs.com/yzyang/p/18211477