首页 > 其他分享 >Hadoop 集群搭建

Hadoop 集群搭建

时间:2022-11-18 16:33:51浏览次数:55  
标签:senior01 Hadoop hadoop bigdata 集群 3.3 home yicheng 搭建

#1.配置机器名字
vim /etc/sysconfig/network
NETWORKING=yes  #使用网络
HOSTNAME=bigdata-senior01.yicheng.com  #设置主机名

#2.配置host
vim /etc/hosts
192.168.197.100 bigdata-senior01.yicheng.com
192.168.197.101 bigdata-senior02.yicheng.com
192.168.197.102 bigdata-senior03.yicheng.com

#3.关闭防火墙
systemctl disable firewalld

#4.关闭SELinux
vim /etc/sysconfig/selinux

    # This file controls the state of SELinux on the system.
    # SELINUX= can take one of these three values:
    #     enforcing - SELinux security policy is enforced.
    #     permissive - SELinux prints warnings instead of enforcing.
    #     disabled - No SELinux policy is loaded.
    #SELINUX=enforcing
    SELINUX=disable
    # SELINUXTYPE= can take one of three two values:
    #     targeted - Targeted processes are protected,
    #     minimum - Modification of targeted policy. Only selected processes are protected. 
    #     mls - Multi Level Security protection.
    SELINUXTYPE=targeted 

#5.查看JDK版本,如果是open jdk 安装 JPS
java -version
    openjdk version "1.8.0_131"
    OpenJDK Runtime Environment (build 1.8.0_131-b12)
    OpenJDK 64-Bit Server VM (build 25.131-b12, mixed mode)
yum install -y  java-1.8.0-openjdk-devel

[root@bigdata-senior01 ~]# jps
41598 Jps
[root@bigdata-senior01 ~]# 

#6.创建用户 hadoop
[root@bigdata-senior01 ~]# groupadd bigdata
[root@bigdata-senior01 ~]# useradd -m -g bigdata hadoop
[root@bigdata-senior01 ~]# ls /home/
alex  hadoop
[root@bigdata-senior01 ~]# ll /home/
总用量 4
drwx------. 14 alex   alex    4096 4月  10 08:19 alex
drwx------.  3 hadoop bigdata   78 4月  10 10:02 hadoop
[root@bigdata-senior01 ~]# 

#7.克隆虚拟机
#a.虚拟机--》右键--》clone--》克隆全部
#b.重新生成网卡MAC地址
#c.启动虚拟机修改网络地址
192.168.197.101 bigdata-senior02.yicheng.com
192.168.197.102 bigdata-senior03.yicheng.com

#8.调整到字符界面运行
[root@bigdata-senior03 ~]# systemctl get-default
graphical.target
[root@bigdata-senior03 ~]# systemctl set-default multi-user.target
Removed symlink /etc/systemd/system/default.target.
Created symlink from /etc/systemd/system/default.target to /usr/lib/systemd/system/multi-user.target.
[root@bigdata-senior03 ~]# 

#9.配置SSH无密码互联
#在bigdata01上生成公钥
 ssh-keygen -t rsa
#分发公钥
ssh-copy-id bigdata-senior01.yicheng.com
ssh-copy-id bigdata-senior02.yicheng.com
ssh-copy-id bigdata-senior03.yicheng.com
#在bigdata02,bigdata03机器上做相同的操作

#服务器功能规划
bigdata-senior01.yicheng.com    bigdata-senior02.yicheng.com    bigdata-senior03.yicheng.com
    NameNode                    ResourceManage     
    DataNode                    DataNode                        DataNode
    NodeManager                    NodeManager                        NodeManager
    HistoryServer                                                 SecondaryNameNode

#10.在第一台机器上安装Hadoop
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.3.0/hadoop-3.3.0.tar.gz
tar -xvf hadoop-3.3.0.tar.gz

#配置环境变量 export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/
[hadoop@bigdata-senior01 ~]$ which java
/usr/bin/java
[hadoop@bigdata-senior01 ~]$ ls -lr /user/bin/java
ls: 无法访问/user/bin/java: 没有那个文件或目录
[hadoop@bigdata-senior01 ~]$ ls -lr /usr/bin/java
lrwxrwxrwx. 1 root root 22 4月  10 09:40 /usr/bin/java -> /etc/alternatives/java
[hadoop@bigdata-senior01 ~]$ ls -lrt /etc/alternatives/java
lrwxrwxrwx. 1 root root 73 4月  10 09:40 /etc/alternatives/java -> /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/jre/bin/java
[hadoop@bigdata-senior01 ~]$

#配置core-site.xml
<configuration>
 <property>
   <name>fs.defaultFS</name>
   <value>hdfs://bigdata-senior01.yicheng.com:8020</value>
 </property>
 <property>
   <name>hadoop.tmp.dir</name>
   <value>/home/hadoop/hadoop-3.3.0/data/tmp</value>
 </property>
</configuration>
#hadoop.tmp.dir为hadoop临时目录的地址,默认情况下,NameNode和DataNode的数据文件都会存在这个目录下的对应子目录下。应该保证此目录是存在的,如果不存在,先创建。

#配置hdfs-site.xml
<configuration>
 <property>
   <name>dfs.namenode.secondary.http-address</name>
   <value>bigdata-senior03.yicheng.com:50090</value>
 </property>
</configuration>
#dfs.namenode.secondary.http-address是指定secondaryNameNode的http访问地址和端口号

#配置slave hadoop 3 中已经修改为 workers 文件了
[hadoop@bigdata-senior01 hadoop]$ vim workers
[hadoop@bigdata-senior01 hadoop]$ pwd
/home/hadoop/hadoop-3.3.0/etc/hadoop
[hadoop@bigdata-senior01 hadoop]$ cat workers 
bigdata-senior01.yicheng.com
bigdata-senior02.yicheng.com
bigdata-senior03.yicheng.com
[hadoop@bigdata-senior01 hadoop]$ 
#slaves文件是指定HDFS上有哪些DataNode节点,hadoop 3中需要修改为workers 不然在主节点执行 start-all.sh的时候
#从节点的datanode服务无法启动,需要手动启动。

#配置yarn-site.xml
<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>
<property>
    <name>yarn.resourcemanager.hostname</name>
    <value>bigdata-senior02.yicheng.com</value>
</property>
<property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
</property>
<property>
    <name>yarn.log-aggregation.retain-seconds</name>
    <value>106800</value>
</property>
#根据规划yarn.resourcemanager.hostname这个指定resourcemanager服务器指向bigdata-senior02.yicheng.com
#yarn.log-aggregation-enable是配置是否启用日志聚集功能。
#yarn.log-aggregation.retain-seconds是配置聚集的日志在HDFS上最多保存多长时间

#配置mapred-site.xml
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>bigdata-senior01.yicheng.com:10020</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>bigdata-senior01.yicheng.com:19888</value>
    </property>
#mapreduce.framework.name设置mapreduce任务运行在yarn上
#mapreduce.jobhistory.address是设置mapreduce的历史服务安装在BigData03机器上。
#mapreduce.jobhistory.webapp.address是设置历史服务的web页面地址和端口号。

#通过scp 分发配置好的Hadoop
scp -r /home/hadoop/hadoop-3.3.0/ bigdata-senior02.yicheng.com:/home/hadoop
scp -r /home/hadoop/hadoop-3.3.0/ bigdata-senior03.yicheng.com:/home/hadoop

#11.执行格式化
#在core-site.xml 中指定dsf.namenode.name.dir dfs.datanode.data.dir 目录
<property>
     <name>dfs.namenode.name.dir</name>
     <value>file://${hadoop.tmp.dir}/dfs/name</value>
  </property>
<property>
     <name>dfs.datanode.data.dir</name>
     <value>file://${hadoop.tmp.dir}/dfs/data</value>
</property>
#保存好后scp 到另外两个机器
[hadoop@bigdata-senior01 hadoop]$ scp /home/hadoop/hadoop-3.3.0/etc/hadoop/core-site.xml bigdata-senior02.yicheng.com:/home/hadoop/hadoop-3.3.0/etc/hadoop
core-site.xml                                                                                                                                                                  100% 1232   600.7KB/s   00:00    
[hadoop@bigdata-senior01 hadoop]$ scp /home/hadoop/hadoop-3.3.0/etc/hadoop/core-site.xml bigdata-senior03.yicheng.com:/home/hadoop/hadoop-3.3.0/etc/hadoop
core-site.xml                                                                                                                                                                  100% 1232   291.8KB/s   00:00    
[hadoop@bigdata-senior01 hadoop]$ 
#因为每次格式化,默认是创建一个集群ID,并写入NameNode和DataNode的VERSION文件中(VERSION文件所在目录为dfs/name/current 和 dfs/data/current),
#重新格式化时,默认会生成一个新的集群ID,如果不删除原来的目录,会导致namenode中的VERSION文件中是新的集群ID,而DataNode中是旧的集群ID,不一致时会报错。
#另一种方法是格式化时指定集群ID参数,指定为旧的集群ID。
#执行格式化操作
[hadoop@bigdata-senior01 hadoop]$ /home/hadoop/hadoop-3.3.0/bin/hdfs namenode -format


#12.启动集群
#启动hdfs
[hadoop@bigdata-senior01 ~]$ /home/hadoop/hadoop-3.3.0/sbin/start-dfs.sh
Starting namenodes on [bigdata-senior01.yicheng.com]
bigdata-senior01.yicheng.com: ERROR: JAVA_HOME is not set and could not be found.
Starting datanodes
localhost: Warning: Permanently added 'localhost' (ECDSA) to the list of known hosts.
localhost: ERROR: JAVA_HOME is not set and could not be found.
Starting secondary namenodes [bigdata-senior03.yicheng.com]
bigdata-senior03.yicheng.com: ERROR: JAVA_HOME is not set and could not be found.
[hadoop@bigdata-senior01 ~]$ echo $JAVA_HOME
/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/
[hadoop@bigdata-senior01 ~]$ 
#在hadoop-env.sh 中指定JAVA_HOME
[hadoop@bigdata-senior01 hadoop]$ export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64/
[hadoop@bigdata-senior01 hadoop]$ scp /home/hadoop/hadoop-3.3.0/etc/hadoop/hadoop-env.sh bigdata-senior02.yicheng.com:/home/hadoop/hadoop-3.3.0/etc/hadoop
[hadoop@bigdata-senior01 hadoop]$ scp /home/hadoop/hadoop-3.3.0/etc/hadoop/hadoop-env.sh bigdata-senior03.yicheng.com:/home/hadoop/hadoop-3.3.0/etc/hadoop
[hadoop@bigdata-senior01 hadoop-3.3.0]$ /home/hadoop/hadoop-3.3.0/sbin/start-dfs.sh
Starting namenodes on [bigdata-senior01.yicheng.com]
Starting datanodes
Starting secondary namenodes [bigdata-senior03.yicheng.com]
bigdata-senior03.yicheng.com: WARNING: /home/hadoop/hadoop-3.3.0/logs does not exist. Creating.
[hadoop@bigdata-senior01 hadoop-3.3.0]$

    #启动yarn
    [hadoop@bigdata-senior01 hadoop-3.3.0]$ /home/hadoop/hadoop-3.3.0/sbin/start-yarn.sh
    Starting resourcemanager
    Starting nodemanagers
    [hadoop@bigdata-senior01 hadoop-3.3.0]$

    #在BigData02上启动ResourceManager:
    [hadoop@bigdata-senior02 hadoop-3.3.0]$ /home/hadoop/hadoop-3.3.0/sbin/yarn-daemon.sh start resourcemanager
    WARNING: Use of this script to start YARN daemons is deprecated.
    WARNING: Attempting to execute replacement "yarn --daemon start" instead.
    WARNING: /home/hadoop/hadoop-3.3.0/logs does not exist. Creating.
    [hadoop@bigdata-senior02 hadoop-3.3.0]$
    #3.3.0 中已经不推荐使用这种启动方式了,直接用yarn命令启动
    [hadoop@bigdata-senior02 bin]$ ./yarn --daemon start resourcemanager
    resourcemanager is running as process 5361.  Stop it first.
    [hadoop@bigdata-senior02 bin]$ 
    #前面已经启动起来,此命令也只是查看

#hadoop 3.0后直接在BigData02上启动yarn resourcemanager 和nodemanagers 就可以一起启动起来了。


#bigdata01上启动日志服务
[hadoop@bigdata-senior01 hadoop-3.3.0]$ sbin/mr-jobhistory-daemon.sh start historyserver
WARNING: Use of this script to start the MR JobHistory daemon is deprecated.
WARNING: Attempting to execute replacement "mapred --daemon start" instead.
[hadoop@bigdata-senior01 hadoop-3.3.0]$
#3.3.0中这种启动方式同样被抛弃,直接用 mapred命令启动
[hadoop@bigdata-senior03 hadoop-3.3.0]$ ./bin/mapred --daemon start historyserver


#查看 web 页面
#Once the Hadoop cluster is up and running check the web-ui of the components as described below:
#NameNode
http://192.168.197.100:9870/
#ResourceManager
http://192.168.197.101:8088/
#MapReduce JobHistory Server
http://192.168.197.100:19888/

#测试 maperreduce 的 wordcount job
#上传测试文件
[hadoop@bigdata-senior01 data]$ rz

[hadoop@bigdata-senior01 data]$ ls
SingleCluster.html  tmp
[hadoop@bigdata-senior01 data]$ 
#在hdfs 创建输入目录 input
[hadoop@bigdata-senior01 hadoop-3.3.0]$ ./bin/hdfs dfs -mkdir /input
[hadoop@bigdata-senior01 hadoop-3.3.0]$ 
#将文件SingleCluster.html 上传到hdfs
[hadoop@bigdata-senior01 hadoop-3.3.0]$ ./bin/hdfs dfs -put /home/hadoop/hadoop-3.3.0/data/SingleCluster.html /input/SingleCluster.html
[hadoop@bigdata-senior01 hadoop-3.3.0]$ ./bin/hdfs dfs -ls /input
Found 1 items
-rw-r--r--   3 hadoop supergroup      36814 2021-04-10 21:13 /input/SingleCluster.html
[hadoop@bigdata-senior01 hadoop-3.3.0]$ 
#运行hadoop自带的mapreduce Demo
 bin/yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.0.jar wordcount /input/SingleCluster.html /output
#报错
    2_0002_000002 exited with  exitCode: 1
    Failing this attempt.Diagnostics: [2021-04-10 21:18:33.832]Exception from container-launch.
    Container id: container_1618053263682_0002_02_000001
    Exit code: 1

    [2021-04-10 21:18:33.838]Container exited with a non-zero exit code 1. Error file: prelaunch.err.
    Last 4096 bytes of prelaunch.err :
    Last 4096 bytes of stderr :
    错误: 找不到或无法加载主类 org.apache.hadoop.mapreduce.v2.app.MRAppMaster


    [2021-04-10 21:18:33.838]Container exited with a non-zero exit code 1. Error file: prelaunch.err.
    Last 4096 bytes of prelaunch.err :
    Last 4096 bytes of stderr :
    错误: 找不到或无法加载主类 org.apache.hadoop.mapreduce.v2.app.MRAppMaster


    For more detailed output, check the application tracking page: http://bigdata-senior02.yicheng.com:8088/cluster/app/application_1618053263682_0002 Then click on links to logs of each attempt.
    . Failing the application.
    2021-04-10 21:18:34,892 INFO mapreduce.Job: Counters: 0

#将执行路径写入到配置文件
[hadoop@bigdata-senior01 hadoop-3.3.0]$ bin/hadoop classpath
/home/hadoop/hadoop-3.3.0/etc/hadoop:/home/hadoop/hadoop-3.3.0/share/hadoop/common/lib/*:/home/hadoop/hadoop-3.3.0/share/hadoop/common/*:/home/hadoop/hadoop-3.3.0/share/hadoop/hdfs:/home/hadoop/hadoop-3.3.0/share/hadoop/hdfs/lib/*:/home/hadoop/hadoop-3.3.0/share/hadoop/hdfs/*:/home/hadoop/hadoop-3.3.0/share/hadoop/mapreduce/*:/home/hadoop/hadoop-3.3.0/share/hadoop/yarn:/home/hadoop/hadoop-3.3.0/share/hadoop/yarn/lib/*:/home/hadoop/hadoop-3.3.0/share/hadoop/yarn/*
vim yarn-site.xml
<property>
    <name>yarn.application.classpath</name>
    <value>/home/hadoop/hadoop-3.3.0/etc/hadoop:/home/hadoop/hadoop-3.3.0/share/hadoop/common/lib/*:/home/hadoop/hadoop-3.3.0/share/hadoop/common/*:/home/hadoop/hadoop-3.3.0/share/hadoop/hdfs:/home/hadoop/hadoop-3.3.0/share/hadoop/hdfs/lib/*:/home/hadoop/hadoop-3.3.0/share/hadoop/hdfs/*:/home/hadoop/hadoop-3.3.0/share/hadoop/mapreduce/*:/home/hadoop/hadoop-3.3.0/share/hadoop/yarn:/home/hadoop/hadoop-3.3.0/share/hadoop/yarn/lib/*:/home/hadoop/hadoop-3.3.0/share/hadoop/yarn/*</value>
</property>

scp /home/hadoop/hadoop-3.3.0/etc/hadoop/yarn-site.xml bigdata-senior03.yicheng.com:/home/hadoop/hadoop-3.3.0/etc/hadoop

#重新运行hadoop自带的mapreduce Demo
 bin/yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.0.jar wordcount /input/SingleCluster.html /output

 [hadoop@bigdata-senior01 hadoop-3.3.0]$ bin/hdfs dfs -ls /output
Found 2 items
-rw-r--r--   3 hadoop supergroup          0 2021-04-10 22:10 /output/_SUCCESS
-rw-r--r--   3 hadoop supergroup      20914 2021-04-10 22:10 /output/part-r-00000
[hadoop@bigdata-senior01 hadoop-3.3.0]$ 
#查看统计结果
[hadoop@bigdata-senior01 hadoop-3.3.0]$ bin/hdfs dfs -cat /output/part-r-00000 | more

 

标签:senior01,Hadoop,hadoop,bigdata,集群,3.3,home,yicheng,搭建
From: https://www.cnblogs.com/Alex-Zeng/p/16903675.html

相关文章

  • Hadoop伪分布式部署
    使用hadoop-3.1.0版本添加用户hadoopuseraddhadoop设置hadoop用户免密登录切换到hadoop用户操作ssh-keygen-trsa-P''-f~/.ssh/id_rsacat~/.ssh/id_rsa.pub......
  • kafka集群压力测试
    1)Kafka压测用Kafka官方自带的脚本,对Kafka进行压测。Kafka压测时,可以查看到哪个地方出现了瓶颈(CPU,内存,网络IO)。一般都是网络IO达到瓶颈。kafka-consumer-perf-test.shkaf......
  • python-飞机大战2.游戏框架搭建-项目实战
    目标——使用面相对象设计飞机大战游戏类目标明确主程序职责实现主程序类准备游戏精灵组01.明确主程序职责回顾快速入门案例,一个游戏主程序的职责可以分为两个部分......
  • Hadoop序列化之MapReduce案例
    Hadoop序列化序列化概述序列化就是把内存中的对象、转换成字节系列(或者其他数据传输协议)以便于存储到磁盘(持久化)和网络传输。反序列化就是将收到字节序列(或其他数据传输......
  • 搭建直播平台,vue+audio 有新消息时加提示音
    搭建直播平台,vue+audio有新消息时加提示音 <audiocontrols="controls"hiddensrc="./static/tip.mp3"ref="audio"></audio>​有新消息时,用以下代码即可播放指定的......
  • 定时自动备份【TFTP服务器搭建】
    一、准备在centos7上安装部署TFTP服务器。(我使用版本:CentOSLinuxrelease7.9.2009(Core))二、安装过程1,安装TFTP服务器yuminstallxinetdtftptftp-server2、......
  • LVS负载均衡集群--DR模式
    一、LVS-DR集群介绍LVS-DR(LinuxVirtualServerDirectorServer)工作模式,是生产环境中最常用的一种工作模式。1、LVS-DR工作原理LVS-DR模式,DirectorServer作为群......
  • kafka集群启动脚本失效
    问题描述:之前写的kafka集群启动脚本,今天重启服务器后失效了,只启动了本地的kafka1,另一台虚拟机上的kafka2没启动也没有日志#!/bin/bashcase$1in"start"){......
  • vscode搭建简单java运行环境
    打开官网​​​https://code.visualstudio.com/docs/languages/java​​安装这几个就够了(好像也可以通过安装JavaExtensionPack这个插件来安装上面的几个)正常安装能get到......
  • ELK8.5的搭建
    nodejs安装包http://nodejs.cn/download/可视化管理工具---Elasticsearch-headhttps://github.com/mobz/elasticsearch-head/archive/refs/tags/v5.0.0.tar.gzElast......