curl http://10.99.67.4:8000/apache-hive-2.3.9-bin.tar.gz -o apache-hive-2.3.9-bin.tar.gz
curl http://10.99.67.4:8000/apache-zookeeper-3.5.7-bin.tar.gz -o apache-zookeeper-3.5.7-bin.tar.gz
curl http://10.99.67.4:8000/flink-1.14.5-bin-scala_2.12.tgz -o flink-1.14.5-bin-scala_2.12.tgz
curl http://10.99.67.4:8000/hadoop-2.9.2.tar.gz -o hadoop-2.9.2.tar.gz
curl http://10.99.67.4:8000/hbase-2.2.3-bin.tar.gz -o hbase-2.2.3-bin.tar.gz
curl http://10.99.67.4:8000/hbase-2.2.3-client-bin.tar.gz -o hbase-2.2.3-client-bin.tar.gz
curl http://10.99.67.4:8000/jdk-8u202-linux-x64.tar.gz -o jdk-8u202-linux-x64.tar.gz
curl http://10.99.67.4:8000/mysql-5.7.20-1.el7.x86_64.rpm-bundle.tar -o mysql-5.7.20-1.el7.x86_64.rpm-bundle.tar
hadoop-2.9.2安装
0,如果使用非root安装和管理
useradd hadoop
passwd hadoop
vim /etc/sudoers
hadoop ALL=(ALL) ALL
如果是非root则使用对应用户开始ssh免密登陆
0.1,(时间同步NTP)
yum install ntp,ntpdate
1,设置/etc/hosts(三台都设)
10.99.69.34 hadoop34
10.99.69.35 hadoop35
10.99.69.36 hadoop36
各设置各的
hostname hadoop34
hostname hadoop35
hostname hadoop36
2,配置sshd免登陆
如果没有安装ssh可用用下面命令安装
yum -y install openssh
yum -y install openssh-server
yum -y install openssh-clients
ssh-keygen -t rsa
cat .ssh/id_rsa.pub >> .ssh/authorized_keys
chmod 600 .ssh/authorized_keys
3,安装JDK8
tar -xvzf jdk-8u202-linux-x64.tar.gz
mv jdk1.8.0_202 /usr/local/
vim /etc/profile
JAVA_HOME=/usr/local/jdk1.8.0_202
PATH=$JAVA_HOME/bin:$PATH
CLASSPATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jar
export JAVA_HOME
export PATH
export CLASSPATH
source /etc/profile
4,安装hadoop
先设定目录和对应权限;
我使用的是root用户没有单独新建hadoop用户;
我这里使用/home/opt/hadoop;
mkdir /home/opt
完整路径 说明
/home/opt/hadoop-2.9.2 hadoop 的程序安装主目录
/home/opt/hadoopdata hadoop数据大目录
/home/opt/hadoopdata/temp 临时目录
/home/opt/hadoopdata/hdfs/name NameNode上存储 HDFS 名字空间元数据
/home/opt/hadoopdata/hdfs/data DataNode上数据块的物理存储位置
/home/opt/hadoopdata/mapreduce/local tasktracker上执行MapReduce程序时的本地目录
/home/opt/hadoopdata/mapreduce/system HDFS中的
mkdir -p /home/opt/hadoopdata
mkdir -p /home/opt/hadoopdata/temp
mkdir -p /home/opt/hadoopdata/hdfs/name
mkdir -p /home/opt/hadoopdata/hdfs/data
mkdir -p /home/opt/hadoopdata/mapreduce/local
mkdir -p /home/opt/hadoopdata/mapreduce/system
tar -xvzf hadoop-2.9.2.tar.gz
mv hadoop-2.9.2 /home/opt/
vim /etc/profile
# set hadoop environment
export HADOOP_HOME=/home/opt/hadoop-2.9.2
export HADOOP_CONF_DIR=/home/opt/hadoop-2.9.2/etc/hadoop
export YARN_CONF_DIR=/home/opt/hadoop-2.9.2/etc/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export HADOOP_CLASSPATH=`/home/opt/hadoop-2.9.2/bin/hadoop classpath`
source /etc/profile
|--------------------+---------------------------+-----------------------------+---------------------------|
| | hadoop34 | hadoop35 | hadoop36 |
|--------------------+---------------------------+-----------------------------+---------------------------|
| HDFS | NameNode,DataNode | DataNode | SecondaryNameNode,DataNode|
|--------------------+---------------------------+-----------------------------+---------------------------|
| YARN | NodeManager | NodeManager, ResourceManger | NodeManager |
|--------------------+---------------------------+-----------------------------+---------------------------|
| YARN(备) |NodeManager,ResourceManger | NodeManager | NodeManager |
|--------------------+---------------------------+-----------------------------+---------------------------|
配置 hadoop-env.sh
vim /home/opt/hadoop-2.9.2/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_202
#export HDFS_NAMENODE_USER=root #以哪个用户启动HDFS NAMENODE
#export HDFS_DATANODE_USER=root #以哪个用户启动HDFS DATANODE
#export HDFS_SECONDARYNAMENODE_USER=root #以哪个用户启动HDFS SECONDARYNAMENODE
配置 mapred-env.sh
vim /home/opt/hadoop-2.9.2/etc/hadoop/mapred-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_202
配置 yarn-env.sh
vim /home/opt/hadoop-2.9.2/etc/hadoop/yarn-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_202
配置 core-site.xml
vim /home/opt/hadoop-2.9.2/etc/hadoop/core-site.xml
<!-- 添加以下配置 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop34:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/opt/hadoopdata/temp</value>
</property>
配置 hdfs-site.xml
vim /home/opt/hadoop-2.9.2/etc/hadoop/hdfs-site.xml
<!-- 添加以下配置 -->
<property>
<name>dfs.name.dir</name>
<value>/home/opt/hadoopdata/hdfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/home/opt/hadoopdata/hdfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
<description>副本个数,默认配置是 3,应小于datanode机器数量</description>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop36:50090</value>
<description>The secondary namenode http server address and port.辅助管理者的节点</description>
</property>
配置 mapred-site.xml
#此文件不存在的话拷贝一份
cp mapred-site.xml.template mapred-site.xml
vim /home/opt/hadoop-2.9.2/etc/hadoop/mapred-site.xml
<!-- 添加以下配置 -->
<property>
<name>mapreduce.cluster.local.dir</name>
<value>/home/opt/hadoopdata/mapreduce/local</value>
</property>
<property>
<name>mapreduce.cluster.system.dir</name>
<value>/home/opt/hadoopdata/mapreduce/system</value>
</property>
<!-- 通知框架MR使用YARN -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop34:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop34:19888</value>
</property>
#也有极少数将上面2个jobhistory的配置放在yarn-site.xml中的。
配置 masters
#删除localhost,把localhost修改为NameNode的主机名,如果没有则新建一个masters文件
vim /home/opt/hadoop-2.9.2/etc/hadoop/masters
hadoop34
配置 slaves
#删除localhost,加入所有DataNode的主机名
vim /opt/hadoop-2.9.2/etc/hadoop/slaves
hadoop34
hadoop35
hadoop36
配置 yarn-site.xml
vim /home/opt/hadoop-2.9.2/etc/hadoop/yarn-site.xml
<!-- Reducer获取数据的方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 指定YARN的ResourceManager的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop34</value>
</property>
<!-- master(JobManager)失败重启的最大尝试次数-->
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>4</value>
<description>The maximum number of application master execution attempts.</description>
</property>
<!-- Site specific YARN configuration properties -->
<!-- 忽略虚拟内存的检查 虚拟机上设置有很大用处 -->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<!-- 日志聚集功能使能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 日志保留时间设置7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<!-- yarn的其它配置,不建议配置上 -->
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>hadoop34:18030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>hadoop34:18025</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>hadoop34:18141</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>hadoop34:18088</value>
</property>
同步配置文件
scp /etc/profile root@hadoop35:/etc/profile
scp /etc/profile root@hadoop36:/etc/profile
scp /home/opt/hadoop-2.9.2/etc/hadoop/* root@hadoop35:/home/opt/hadoop-2.9.2/etc/hadoop/
scp /home/opt/hadoop-2.9.2/etc/hadoop/* root@hadoop36:/home/opt/hadoop-2.9.2/etc/hadoop/
启动HDFS,在master上,运行命令
hdfs namenode -format
#初始化hdfs,如果重启集群以前数据还在,就不用再初始化,直接运行下一条命令
启动hdfs系统:start-dfs.sh
停止hdfs系统:stop-dfs.sh
*****启动yarn,在master(resourcemanager)上,运行命令
启动yarn系统:start-yarn.sh
停止yarn系统:stop-yarn.sh
start-all.sh和stop-all.sh可代替以上两步。
(新)mapred --daemon start historyserver
(新)mapred --daemon stop historyserver
(旧)mr-jobhistory-daemon.sh start historyserver
(旧)mr-jobhistory-daemon.sh stop historyserver
#启动resourceManager
yarn-daemon.sh start resourcemanager
http://10.99.69.34:50070/dfshealth.html#tab-overview
yarn的web界面:默认是8088,我上面修改成了18088
http://10.99.69.34:18088
来一个自带的测试实例:
cd /home/opt/hadoop-2.9.2/share/hadoop/mapreduce/
hadoop jar hadoop-mapreduce-examples-2.9.2.jar pi 10 10
99,安装zookeeper-3.5.7
98,安装hive-2.3.9
hive需要安装在name节点
/home/opt/apache-hive-2.3.9
create database metastore;
grant all on metastore.* to hive@'%' identified by 'root__123AbCdHjKl';
grant all on metastore.* to hive@'localhost' identified by 'root__123AbCdHjKl';
flush privileges;
在hdfs 中创建下面的目录 ,并且授权
hdfs dfs -mkdir -p /user/hive/warehouse
hdfs dfs -mkdir -p /user/hive/tmp
hdfs dfs -mkdir -p /user/hive/log
hdfs dfs -chmod -R 777 /user/hive/warehouse
hdfs dfs -chmod -R 777 /user/hive/tmp
hdfs dfs -chmod -R 777 /user/hive/log
拷贝驱动到hive的lib库
cp mysql-connector-java-5.1.43.tar.gz /home/opt/apache-hive-2.3.9-bin/lib
设置HIVE的根目录环境变量
vim /etc/profile 添加如下
export HIVE_HOME=/home/opt/apache-hive-2.3.9-bin
export PATH=$PATH:$HIVE_HOME/bin
source /etc/profile
配置HIVE的配置
cd /home/opt/apache-hive-2.3.9-bin/conf
cp hive-env.sh.template hive-env.sh
cp hive-default.xml.template hive-site.xml
cp hive-log4j2.properties.template hive-log4j2.properties
cp hive-exec-log4j2.properties.template hive-exec-log4j2.properties
修改hive-env.sh
##Java路径
##Hadoop安装路径
##Hive安装路径
##Hive配置文件路径
export JAVA_HOME=/usr/local/jdk1.8.0_202
#export JAVA_HOME=/opt/jdk1.8.0_231
export HADOOP_HOME=/home/opt/hadoop-2.9.2
export HIVE_HOME=/home/opt/apache-hive-2.3.9-bin
export HIVE_CONF_DIR=/home/opt/apache-hive-2.3.9-bin/conf
修改hive-site.xml
<property>
<name>hive.exec.scratchdir</name>
<value>/user/hive/tmp</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>
<property>
<!-- 这个到底是本地还是hdfs上的 -->
<name>hive.querylog.location</name>
<value>/user/hive/log</value>
</property>
## 配置 MySQL 数据库连接信息
<property>
<name>javax.jdo.option.ConnectionURL</name>
<!-- <value>jdbc:mysql://localhost:3306/metastore?createDatabaseIfNotExist=true&characterEncoding=UTF-8&useSSL=false</value> -->
<value>jdbc:mysql://localhost:3306/metastore?createDatabaseIfNotExist=true&characterEncoding=UTF-8&useSSL=false</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>root__123AbCdHjKl</value>
</property>
创建tmp文件
cd /home/opt/apache-hive-2.3.9-bin
mkdir tmp
并在hive-site.xml中修改:
把{system:java.io.tmpdir} 改成 /home/opt/apache-hive-2.3.9-bin/tmp/
把 {system:user.name} 改成 {user.name}
初始化HIVE
schematool -dbType mysql -initSchema hive root__123AbCdHjKl
如果有问题可以把jdbc-url修改成下面这个不带参数的
jdbc:mysql://localhost:3306/metastore
在xml配置 jdbc信息,需要把 & 转义 &
9、启动hive
下面介绍有两种启动方式:
9.1 hive CLI
hive
hive> show databases;
hive> exit;
9.2 beeline
9.2.1 启动hiveserver2
nohup hiveserver2 &
9.2.2 查看hiveserver2启动状态
netstat -nptl | grep 10000
9.2.3 启动beeline
beeline
9.2.4连接hive
!connect jdbc:hive2://localhost:10000 hive root__123AbCdHjKl
9.2.5 查看数据库
show databases;
启动metastore服务:
# 后台启动 metastore 服务器端,默认使用 9083 端口
nohup hive --service metastore &
# 或者使用指定的端口号:
nohup hive --service metastore -p 9083 &
启动hiveserver2服务:
# 后台启动 metastore 服务器端,默认使用 10000 端口
nohup hive --service hiveserver2 &
# 或者使用指定的端口号:
nohup hive --service hiveserver2 --hiveconf hive.server2.thrift.port 10001 &
97,安装mysql-5.7.20
标签:opt,hadoop,hive,etc,2.9,集群,home From: https://blog.51cto.com/lenglingx/6390806