1、调试环境:
#关闭防火墙
systemctl stop firewalld
systemctl disable firewalld
#设置免密登录
ssh-keygen -t rsa #然后一路回车即可
#复制到公共密钥中
cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
#测试
ssh localhost
#修改hosts,添加
192.168.40.50 master
192.168.40.51 data01
192.168.40.52 data02
2、安装java
3、Hadoop 安装
#下载地址:http://hadoop.apache.org/
# 解压 Hadoop 到指定文件夹
tar -zxvf hadoop-3.2.0.tar.gz -C /usr/local
ln -sf hadoop-3.2.0 hadoop
vim /etc/profile
#Hadoop
export HADOOP_HOME=/usr/local/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
source /etc/profile
3.1、修改 Hadoop 配置文件
/usr/local/hadoop/etc/hadoop/hadoop-env.sh
vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh
#修改为如下配置 这里是测试,正式环境建议新增用户,不要用root
export JAVA_HOME=/usr/local/jdk1.8
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export HDFS_NAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
/usr/local/hadoop/etc/hadoop/core-site.xml
vim /usr/local/hadoop/etc/hadoop/core-site.xml
#修改为如下配置
<configuration>
<!-- 指定HDFS老大(namenode)的通信地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<!-- 指定hadoop运行时产生文件的存储路径 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/data/hadoop/tmp</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
</configuration>
/usr/local/hadoop/etc/hadoop/hdfs-site.xml
vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
#修改为如下配置
<configuration>
<!-- 设置hdfs副本数量 -->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/data/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data/hadoop/dfs/data</value>
</property>
<!-- 设置namenode的http通讯地址 -->
<property>
<name>dfs.namenode.http-address</name>
<value>master:50070</value>
</property>
<!-- 设置secondarynamenode的http通讯地址 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:50090</value>
</property>
</configuration>
/usr/local/hadoop/etc/hadoop/yarn-site.xml
vim /usr/local/hadoop/etc/hadoop/yarn-site.xml
#修改为如下配置
<configuration>
<!-- Site specific YARN configuration properties -->
<!-- 设置 resourcemanager 在哪个节点-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.hostname</name>
<value>master</value>
</property>
<!-- reducer取数据的方式是mapreduce_shuffle -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>6144</value>
</property>
<property>
<name>yarn.app.mapreduce.am.resource.memory-mb</name>
<value>512</value>
<description>设置application master 容器请求的内存的值,以MB为单位。默认1536</description>
</property>
<property>
<name>mapreduce.map.resource.memory-mb</name>
<value>256</value>
<description>设置所有map任务的Container的请求memory,以MB为单位。默认值1024</description>
</property>
<property>
<name>mapreduce.reduce.resource.memory-mb</name>
<value>512</value>
<description>设置所有reduce任务Containers请求的memory的值,以MB为单位。默认1024</description>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>64</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>6144</value>
</property>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>100</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>
/usr/local/hadoop/etc/hadoop,
/usr/local/hadoop/share/hadoop/common/*,
/usr/local/hadoop/share/hadoop/common/lib/*,
/usr/local/hadoop/share/hadoop/hdfs/*,
/usr/local/hadoop/share/hadoop/hdfs/lib/*,
/usr/local/hadoop/share/hadoop/mapreduce/*,
/usr/local/hadoop/share/hadoop/mapreduce/lib/*,
/usr/local/hadoop/share/hadoop/yarn/*,
/usr/local/hadoop/share/hadoop/yarn/lib/*
</value>
</property>
</configuration>
/usr/local/hadoop/etc/hadoop/mapred-site.xml
vim /usr/local/hadoop/etc/hadoop/mapred-site.xml
#修改为如下配置
<configuration>
<!-- 通知框架MR使用YARN -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
<description>Whether virtual memory limits will be enforced for containers</description>
</property>
<!-- 额外新增 -->
<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>512</value>
</property>
<property>
<name>yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx409m</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx409m</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx409m</value>
</property>
</configuration>
/usr/local/hadoop/etc/hadoop/hdfs-site.xml
vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
#修改为如下配置
<configuration>
<!-- 设置hdfs副本数量 -->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/data/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data/hadoop/dfs/data</value>
</property>
<!-- 设置namenode的http通讯地址 -->
<property>
<name>dfs.namenode.http-address</name>
<value>master:50070</value>
</property>
<!-- 设置secondarynamenode的http通讯地址 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:50090</value>
</property>
</configuration>
3.2、如果需要多台架构的话,克隆机器
hostnamectl set-hostname hadoop-data01
vim /etc/sysconfig/network-scripts/ifcfg-ens192
systemctl restart network
3.3、配置启动hadoop
#首先格式化HDFS目录
/usr/local/hadoop/bin/hdfs namenode -format
#启动服务
/usr/local/hadoop/sbin/start-all.sh