环境及工具
- Hadoop 2.7.3
- Windows 10
- CentOS 7
- VMware 15.1.0
- OpenJDK 8
说明
- 伪分布式:只有一个节点
- 完全分布式:多个节点
1 创建用户
useradd -p "123" hadoop
# useradd hadoop
# echo '请设置hadoop用户密码....'
# passwd hadoop
gpasswd -a hadoop root
chmod 771 /usr
chmod 771 /usr/local
chown -R hadoop:hadoop /usr/local/hadoop
2 配置免密登录
(1) 使用命令 ssh-keygen -t rsa 生成 ssh 密钥对,均使用默认选项即可,在 ~/.ssh 隐藏目录下生成 id_rsa(私钥)和 id_rsa.pub(公钥)
3 安装 Hadoop伪分布式一键脚本
将脚本上传至/usr/local/
然后给与脚本执行权限 运行即可一键安装
#!/bin/bash
#下载Hadoop并进行解压
ls /usr/local | grep '^hadoop.*[gz]$'
if [ $? -ne 0 ]; then
echo '开始下载hadoop安装包...'
wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz
mv $(ls | grep 'hadoop.*gz$') /usr/local
fi
tar -zxvf /usr/local/$(ls | grep '^hadoop.*[gz]$')
mv /usr/local/$(ls | grep '^hadoop.*[^gz]$') /usr/local/hadoop
#hadoop环境变量配置
#PATH设置
grep -q "export PATH=" /etc/profile
if [ $? -ne 0 ]; then
#末行插入
echo 'export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin'>>/etc/profile
else
#行尾添加
sed -i '/^export PATH=.*/s/$/:\$HADOOP_HOME\/bin:\$HADOOP_HOME\/sbin/' /etc/profile
fi
#HADOOP_HOME设置
grep -q "export HADOOP_HOME=" /etc/profile
if [ $? -ne 0 ]; then
#在PATH前面一行插入HADOOP_HOME
sed -i '/^export PATH=.*/i\export HADOOP_HOME=\/usr\/local\/hadoop' /etc/profile
else
#修改文件内的HADOOP_HOME
sed -i 's/^export HADOOP_HOME=.*/export HADOOP_HOME=\/usr\/local\/hadoop/' /etc/profile
fi
source /etc/profile
#伪分布式设置
echo '<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/hadoop/tmp</value>
<description>指定hadoop运行时产生文件的存储路径</description>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
<description>hdfs namenode的通信地址,通信端口</description>
</property>
</configuration>'>$HADOOP_HOME/etc/hadoop/core-site.xml
echo '<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<!-- 该文件指定与HDFS相关的配置信息。
需要修改HDFS默认的块的副本属性,因为HDFS默认情况下每个数据块保存3个副本,
而在伪分布式模式下运行时,由于只有一个数据节点,
所以需要将副本个数改为1;否则Hadoop程序会报错。 -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
<description>指定HDFS存储数据的副本数目,默认情况下是3份</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop/hadoopdata/namenode</value>
<description>namenode存放数据的目录</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop/hadoopdata/datanode</value>
<description>datanode存放block块的目录</description>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
<description>关闭权限验证</description>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
</configuration>'>$HADOOP_HOME/etc/hadoop/hdfs-site.xml
echo '<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<!-- 在该配置文件中指定与MapReduce作业相关的配置属性,需要指定JobTracker运行的主机地址-->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>指定mapreduce运行在yarn上</description>
</property>
</configuration>'>$HADOOP_HOME/etc/hadoop/mapred-site.xml
echo '<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>mapreduce执行shuffle时获取数据的方式</description>
</property>
</configuration>'>$HADOOP_HOME/etc/hadoop/yarn-site.xml
echo 'localhost'>$HADOOP_HOME/etc/hadoop/slaves
sed -i 's/export JAVA_HOME=.*/\#&/' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
sed -i "/#export JAVA_HOME=.*/a export JAVA_HOME=$JAVA_HOME" $HADOOP_HOME/etc/hadoop/hadoop-env.sh
chown -R hadoop:hadoop $HADOOP_HOME
#初始化Hadoop
hdfs namenode -format
#启动Hadoop
. /usr/local/hadoop/sbin/start-all.sh
4.可视化界面测试结果
Netstat -luntp查看端口