Hbase高可用集群Docker安装
配置基于 Hadoop 和 HBase 的分布式系统
为了配置一个基于 Hadoop 和 HBase 的分布式系统,以下是准备 Dockerfile 和补充必要环境设置的步骤。
资源配置
- 3 个 Hadoop Master 节点
- 3 个 Hadoop Worker 节点
- 3 个 ZooKeeper 节点
配置ZooKeeper
pull官方的ZooKeeper镜像
docker pull zookeeper
写docker-compose.yml
version: '3.1'
services:
zoo1:
image: zookeeper:3.7.1-temurin
container_name: zoo1
restart: always
hostname: zoo1
ports:
- 2181:2181
environment:
ZOO_MY_ID: 1
ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
networks:
zookeeper-cluster:
ipv4_address: 10.10.1.10
zoo2:
image: zookeeper:3.7.1-temurin
container_name: zoo2
restart: always
hostname: zoo2
ports:
- 2182:2181
environment:
ZOO_MY_ID: 2
ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
networks:
zookeeper-cluster:
ipv4_address: 10.10.1.11
zoo3:
image: zookeeper:3.7.1-temurin
container_name: zoo3
restart: always
hostname: zoo3
ports:
- 2183:2181
environment:
ZOO_MY_ID: 3
ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
networks:
zookeeper-cluster:
ipv4_address: 10.10.1.12
networks:
zookeeper-cluster:
name: zookeeper-cluster
ipam:
config:
- subnet: "10.10.1.0/24"
运行Dockerfile
docker-compose up -d
构建镜像david/hbase:2.5.10
准备Dockerfile
Dockerfile 本身只配置了基础系统环境和 JDK。
# 指定路径下创建文件Dockerfile
touch /路径/Dockerfile
Dockerfile文件内容
Dockerfile for HBase and Hadoop Setup (ARM64 架构)
FROM ubuntu:22.04
# 环境变量设置
ENV HADOOP_HOME /opt/hadoop
ENV HBASE_HOME /opt/hbase
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-arm64
# 以 root 用户执行
USER root
# 更新并安装依赖包
RUN apt-get update && \
apt-get install -y sudo openjdk-8-jdk openssh-server openssh-client && \
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa && \
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys && \
chmod 0600 ~/.ssh/authorized_keys && \
mkdir -p /data/hdfs && \
mkdir -p /data/hdfs/journal/node/local/data
# 启动 SSH 服务
RUN service ssh start
# 暴露端口
EXPOSE 9870 9868 9864 9866 8088 8020 16000 16010 16020 22
# 容器启动时启动 SSH
CMD ["/usr/sbin/sshd", "-D"]
Dockerfile for HBase and Hadoop Setup (AMD64 架构)
FROM ubuntu:22.04
# 环境变量设置
ENV HADOOP_HOME /opt/hadoop
ENV HBASE_HOME /opt/hbase
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
# 以 root 用户执行
USER root
# 更新并安装依赖包
RUN apt-get update && \
apt-get install -y sudo openjdk-8-jdk openssh-server openssh-client && \
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa && \
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys && \
chmod 0600 ~/.ssh/authorized_keys && \
mkdir -p /data/hdfs && \
mkdir -p /data/hdfs/journal/node/local/data
# 启动 SSH 服务
RUN service ssh start
# 暴露端口
EXPOSE 9870 9868 9864 9866 8088 8020 16000 16010 16020 22
# 容器启动时启动 SSH
CMD ["/usr/sbin/sshd", "-D"]
启动Dockerfile
docker build -t david/hbase:2.5.10
安装 Hadoop 和 HBase
Dockerfile 本身只配置了基础系统环境和 JDK。接下来需要手动下载和安装 Hadoop 和 HBase。
下载和安装 Hadoop
- 访问 Hadoop 下载页面 并选择适合你的版本。
- 可以使用2024年9月6号已经验证hadoop可用性的下载:
wget https://dlcdn.apache.org/hadoop/common/hadoop-3.4.0/hadoop-3.4.0.tar.gz
下载和安装 HBase
- 访问 HBase 下载页面 并选择适合的版本。
- 可以使用2024年9月6号已经验证hbase可用性的下载:
wget https://dlcdn.apache.org/hbase/2.5.10/hbase-2.5.10-hadoop3-bin.tar.gz
使用docker-compose.yml去创建容器
配置解释:
- Hadoop 和 HBase 文件被绑定到
/opt/docker-data
, 如何自定义的路径请修改所以的source
:后面的参数。 - 端口根据每个节点的功能映射。
- zookeeper-cluster 用于多节点间的网络通信。
version: '3'
services:
hadoop-master1:
image: david/hbase:2.5.10
container_name: hadoop-master1
hostname: hadoop-master1
stdin_open: true
tty: true
command:
- sh
- -c
- |
/usr/sbin/sshd -D
volumes:
- type: bind
source: /opt/docker-data/hadoop-3.4.0
target: /opt/hadoop
- type: bind
source: /opt/docker-data/hbase-2.5.10-hadoop3
target: /opt/hbase
ports:
- "8020:8020"
- "8042:8042"
- "9870:9870"
- "8088:8088"
- "8032:8032"
- "10020:10020"
- "16000:16000"
- "16010:16010"
networks:
zookeeper-cluster:
ipv4_address: 10.10.1.20
hadoop-master2:
image: david/hbase:2.5.10
container_name: hadoop-master2
hostname: hadoop-master2
stdin_open: true
tty: true
command:
- sh
- -c
- |
/usr/sbin/sshd -D
volumes:
- type: bind
source: /opt/docker-data/hadoop-3.4.0
target: /opt/hadoop
- type: bind
source: /opt/docker-data/hbase-2.5.10-hadoop3
target: /opt/hbase
ports:
- "28020:8020"
- "18042:8042"
- "29870:9870"
- "28088:8088"
- "28032:8032"
- "20020:10020"
networks:
zookeeper-cluster:
ipv4_address: 10.10.1.21
hadoop-master3:
image: david/hbase:2.5.10
container_name: hadoop-master3
hostname: hadoop-master3
stdin_open: true
tty: true
command:
- sh
- -c
- |
/usr/sbin/sshd -D
volumes:
- type: bind
source: /opt/docker-data/hadoop-3.4.0
target: /opt/hadoop
- type: bind
source: /opt/docker-data/hbase-2.5.10-hadoop3
target: /opt/hbase
ports:
- "38020:8020"
- "28042:8042"
- "39870:9870"
- "38088:8088"
- "38032:8032"
- "30020:10020"
networks:
zookeeper-cluster:
ipv4_address: 10.10.1.22
hadoop-worker1:
image: david/hbase:2.5.10
container_name: hadoop-worker1
hostname: hadoop-worker1
stdin_open: true
tty: true
command:
- sh
- -c
- |
/usr/sbin/sshd -D
volumes:
- type: bind
source: /opt/docker-data/hadoop-3.4.0
target: /opt/hadoop
- type: bind
source: /opt/docker-data/hbase-2.5.10-hadoop3
target: /opt/hbase
ports:
- "9867:9867"
- "38042:8042"
- "9866:9866"
- "9865:9865"
- "9864:9864"
networks:
zookeeper-cluster:
ipv4_address: 10.10.1.23
hadoop-worker2:
image: david/hbase:2.5.10
container_name: hadoop-worker2
hostname: hadoop-worker2
stdin_open: true
tty: true
command:
- sh
- -c
- |
/usr/sbin/sshd -D
volumes:
- type: bind
source: /opt/docker-data/hadoop-3.4.0
target: /opt/hadoop
- type: bind
source: /opt/docker-data/hbase-2.5.10-hadoop3
target: /opt/hbase
ports:
- "29867:9867"
- "48042:8042"
- "29866:9866"
- "29865:9865"
- "29864:9864"
networks:
zookeeper-cluster:
ipv4_address: 10.10.1.24
hadoop-worker3:
image: david/hbase:2.5.10
container_name: hadoop-worker3
hostname: hadoop-worker3
stdin_open: true
tty: true
command:
- sh
- -c
- |
/usr/sbin/sshd -D
volumes:
- type: bind
source: /opt/docker-data/hadoop-3.4.0
target: /opt/hadoop
- type: bind
source: /opt/docker-data/hbase-2.5.10-hadoop3
target: /opt/hbase
ports:
- "39867:9867"
- "58042:8042"
- "39866:9866"
- "39865:9865"
- "39864:9864"
networks:
zookeeper-cluster:
ipv4_address: 10.10.1.25
networks:
zookeeper-cluster:
external: true
注意事项
- 存储权限:确保
source
:后面的路径有足够的读写权限,如有必要,可以执行chmod +777
。 - ZooKeeper:如果需要更多的 ZooKeeper 节点,可以增加 zookeeper 服务并指定不同的 ipv4_address。
启动docker-compose.yml文件
# 在存在docker-compose.yml目录下
docker-compose up -d
修改配置文件
这些配置文件用于设置 Hadoop 和 HBase 环境。在 Docker 容器内的 /opt/hadoop 和 /opt/hbase 是共享路径,因此只需要在本地路径下的配置文件中进行修改。
- hadoop-env.sh
将下面的内容添加到hadoop-env.sh文件末尾即可
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export HADOOP_MAPRED_HOME=/opt/hadoop
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export HDFS_RESOURCEMANAGER_USER=root
export HDFS_NODEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export YARN_RESOURCEMANAGER_USER=root
export HDFS_ZKFC_USER=root
export HDFS_JOURNALNODE_USER=root
- core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- HDFS 默认文件系统设置 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<!-- Zookeeper Quorum 设置 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>zoo1:2181,zoo2:2181,zoo3:2181</value>
</property>
<!-- Hadoop Proxy 用户配置 -->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<!-- 启用 WebHDFS -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<!-- HTTP/HTTPS 配置 -->
<property>
<name>dfs.http.policy</name>
<value>HTTP_ONLY</value>
</property>
<!-- CORS 支持 -->
<property>
<name>dfs.webhdfs.rest-cors-enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.webhdfs.rest-cors-allowed-origins</name>
<value>*</value>
</property>
<property>
<name>dfs.webhdfs.rest-cors-allowed-methods</name>
<value>GET, POST, PUT, DELETE, OPTIONS</value>
</property>
<property>
<name>dfs.webhdfs.rest-cors-allowed-headers</name>
<value>Content-Type, Authorization, Access-Control-Allow-Origin</value>
</property>
<property>
<name>dfs.webhdfs.rest-cors-allowed-credentials</name>
<value>true</value>
</property>
</configuration>
- hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!-- 配置副本数量 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 禁用HDFS权限检查 -->
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<!-- 设置NameNode存储目录 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/hdfs/namenode</value>
</property>
<!-- 设置DataNode存储目录 -->
<property>
<name>dfs.namenode.data.dir</name>
<value>/data/hdfs/datanode</value>
</property>
<!-- 自动失效切换时的首选活跃节点 -->
<property>
<name>dfs.ha.automatic-failover.preferred-active</name>
<value>nn1</value>
</property>
<!-- 配置高可用集群 -->
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2,nn3</value>
</property>
<!-- NameNode 节点的 RPC 地址 -->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>hadoop-master1:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>hadoop-master2:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn3</name>
<value>hadoop-master3:8020</value>
</property>
<!-- NameNode 节点的 HTTP 地址 -->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>hadoop-master1:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>hadoop-master2:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn3</name>
<value>hadoop-master3:9870</value>
</property>
<property>
<name>dfs.ha.automatic-failover.preferred-active</name>
<value>nn1</value>
</property>
<!-- 共享编辑日志的目录 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop-master1:8485;hadoop-master2:8485;hadoop-master3:8485/mycluster</value>
</property>
<!-- 配置客户端失效切换 -->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置在安全模式下不允许成为活跃节点 -->
<property>
<name>dfs.ha.nn.not-become-active-in-safemode</name>
<value>true</value>
</property>
<!-- 日志节点的目录 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/hdfs/journal/node/local/data</value>
</property>
<!-- 启用自动失效切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- SSH fencing 配置 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
</configuration>
- mapred-site.xml 目前这个配置文件可以不写任何内容
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
</configuration>
- yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster1</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop-master1</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop-master2</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>hadoop-master1:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>hadoop-master2:8088</value>
</property>
<property>
<name>hadoop.zk.address</name>
<value>zoo1:2181,zoo2:2181,zoo3:2181</value>
</property>
</configuration>
修改 /etc/hosts
文件
# 添加以下内容
10.10.1.20 hadoop-master1
10.10.1.21 hadoop-master2
10.10.1.22 hadoop-master3
10.10.1.23 hadoop-worker1
10.10.1.24 hadoop-worker2
10.10.1.25 hadoop-worker3
初始化与启动服务
setup.sh
此脚本为手动版本,初始化可能会出现很多错误,因此需要逐步排查,确保集群的稳定性。
# SSH 配置检查
docker exec hadoop-master1 ssh -o StrictHostKeyChecking=no hadoop-master2 exit
docker exec hadoop-master1 ssh -o StrictHostKeyChecking=no hadoop-master3 exit
docker exec hadoop-master2 ssh -o StrictHostKeyChecking=no hadoop-master1 exit
docker exec hadoop-master2 ssh -o StrictHostKeyChecking=no hadoop-master3 exit
docker exec hadoop-master3 ssh -o StrictHostKeyChecking=no hadoop-master1 exit
docker exec hadoop-master3 ssh -o StrictHostKeyChecking=no hadoop-master2 exit
# 启动 journalnode
docker exec hadoop-master1 /opt/hadoop/bin/hdfs --daemon start journalnode
docker exec hadoop-master2 /opt/hadoop/bin/hdfs --daemon start journalnode
docker exec hadoop-master3 /opt/hadoop/bin/hdfs --daemon start journalnode
# 可以不启动 worker 节点上的 journalnode
docker exec hadoop-worker1 /opt/hadoop/bin/hdfs --daemon start journalnode
docker exec hadoop-worker2 /opt/hadoop/bin/hdfs --daemon start journalnode
docker exec hadoop-worker3 /opt/hadoop/bin/hdfs --daemon start journalnode
# 初始化 NameNode
docker exec -it hadoop-master1 bash
/opt/hadoop/bin/hdfs namenode -format
exit # 退出容器
docker exec hadoop-master1 /opt/hadoop/bin/hdfs --daemon start namenode
# Bootstrap Standby
docker exec -it hadoop-master2 bash
/opt/hadoop/bin/hdfs namenode -bootstrapStandby
exit
docker exec hadoop-master2 /opt/hadoop/bin/hdfs --daemon start namenode
docker exec -it hadoop-master3 bash
/opt/hadoop/bin/hdfs namenode -bootstrapStandby
exit
docker exec hadoop-master3 /opt/hadoop/bin/hdfs --daemon start namenode
# 停止 DFS
docker exec hadoop-master1 /opt/hadoop/sbin/stop-dfs.sh
# Zookeeper 数据重新格式化(如果需要,一般不是第一次初始化,都需要使用)
# docker exec -it hadoop-master1 bash
# /opt/hadoop/bin/hdfs zkfc -formatZK
# exit
# 启动 zkfc 和 DFS/YARN
docker exec hadoop-master1 /opt/hadoop/bin/hdfs --daemon start zkfc
docker exec hadoop-master1 /opt/hadoop/sbin/start-dfs.sh
docker exec hadoop-master1 /opt/hadoop/sbin/start-yarn.sh
clear_namenode_data.sh
用于清理 NameNode 数据的脚本。
只能用于初始化hadoop的时候使用
#!/bin/bash
# 定义容器列表
containers=("hadoop-master1" "hadoop-master2" "hadoop-master3" "hadoop-worker1" "hadoop-worker2" "hadoop-worker3")
# 定义要移除的目录和文件
dirs=(
"/data/hdfs/journal/node/local/data/mycluster"
"/tmp/hadoop-root/dfs/data"
)
files=(
"/tmp/hadoop-root-journalnode.pid"
)
# 遍历每个容器,检查并移除指定的目录和文件
for container in "${containers[@]}"; do
echo "Checking and removing directories and files in $container..."
# 移除目录
for dir in "${dirs[@]}"; do
docker exec "$container" sh -c "if [ -d '$dir' ]; then rm -r '$dir'; echo 'Removed $dir from $container'; else echo '$dir does not exist in $container'; fi"
done
# 移除文件
for file in "${files[@]}"; do
docker exec "$container" sh -c "if [ -f '$file' ]; then rm '$file'; echo 'Removed $file from $container'; else echo '$file does not exist in $container'; fi"
done
done
echo "Cleanup completed."
start.sh
启动集群服务的脚本,先停止所有已运行的服务,然后重新启动。
#! /bin/bash
echo "starting all journalnode"
docker exec hadoop-master1 /opt/hadoop/bin/hdfs --daemon start journalnode
docker exec hadoop-master2 /opt/hadoop/bin/hdfs --daemon start journalnode
docker exec hadoop-master3 /opt/hadoop/bin/hdfs --daemon start journalnode
docker exec hadoop-worker1 /opt/hadoop/bin/hdfs --daemon start journalnode
docker exec hadoop-worker2 /opt/hadoop/bin/hdfs --daemon start journalnode
docker exec hadoop-worker3 /opt/hadoop/bin/hdfs --daemon start journalnode
echo "starting hadoop-master1..."
docker exec hadoop-master1 /opt/hadoop/bin/hdfs --daemon start namenode
sleep 2
echo "starting hadoop-master2..."
docker exec hadoop-master2 /opt/hadoop/bin/hdfs --daemon start namenode
echo "starting hadoop-master3..."
docker exec hadoop-master3 /opt/hadoop/bin/hdfs --daemon start namenode
sleep 2
echo "starting zkfc..."
docker exec hadoop-master1 /opt/hadoop/bin/hdfs --daemon start zkfc
echo "starting dfs..."
docker exec hadoop-master1 /opt/hadoop/sbin/start-dfs.sh
sleep 3
echo "starting yarn..."
docker exec hadoop-master1 /opt/hadoop/sbin/start-yarn.sh
echo "Done!"
stop.sh
用于停止所有集群服务的脚本。
echo "stoping yarn..."
docker exec hadoop-master1 /opt/hadoop/sbin/stop-yarn.sh
sleep 3
echo "stoping dfs..."
docker exec hadoop-master1 /opt/hadoop/sbin/stop-dfs.sh
echo "stoping zkfc..."
docker exec hadoop-master1 /opt/hadoop/bin/hdfs --daemon stop zkfc
sleep 2
echo "stoping hadoop-master3..."
docker exec hadoop-master3 /opt/hadoop/bin/hdfs --daemon stop namenode
echo "stoping hadoop-master2..."
docker exec hadoop-master2 /opt/hadoop/bin/hdfs --daemon stop namenode
sleep 2
echo "stoping hadoop-master1..."
docker exec hadoop-master1 /opt/hadoop/bin/hdfs --daemon stop namenode
echo "stoping all journalnode"
docker exec hadoop-worker3 /opt/hadoop/bin/hdfs --daemon stop journalnode
docker exec hadoop-worker2 /opt/hadoop/bin/hdfs --daemon stop journalnode
docker exec hadoop-worker1 /opt/hadoop/bin/hdfs --daemon stop journalnode
docker exec hadoop-master3 /opt/hadoop/bin/hdfs --daemon stop journalnode
docker exec hadoop-master2 /opt/hadoop/bin/hdfs --daemon stop journalnode
docker exec hadoop-master1 /opt/hadoop/bin/hdfs --daemon stop journalnode
check_services.sh
检查所有服务状态的脚本。
#! /bin/bash
echo "====================hadoop-master1:status===================="
docker exec hadoop-master1 jps
echo "====================hadoop-master2:status===================="
docker exec hadoop-master2 jps
echo "====================hadoop-master3:status===================="
docker exec hadoop-master3 jps
echo "====================hadoop-worker1:status===================="
docker exec hadoop-worker1 jps
echo "====================hadoop-worker2:status===================="
docker exec hadoop-worker2 jps
echo "====================hadoop-worker3:status===================="
docker exec hadoop-worker3 jps
echo "=========================zoo1:status========================="
docker exec zoo1 /apache-zookeeper-3.7.1-bin/bin/zkServer.sh status
HBase 启动指南
修改 HBase 的配置文件
- 修改
hbase-env.sh
文件
路径:/opt/docker-data/hbase-2.5.10-hadoop3/conf/hbase-env.sh
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 # 如果使用苹果电脑,需改为适配 arm 的 JAVA_HOME
export HBASE_MANAGES_ZK=false # 确保前面的 # 被删除,确保后面的值为 false
export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true" # 确保前面的 # 被删除,确保后面的值为 true
- 修改
hbase-site.xml
文件
路径:/opt/docker-data/hbase-2.5.10-hadoop3/conf/hbase-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 设置集群为分布式模式 -->
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<!-- 设置 HBase 根目录,确保路径指向 HDFS 的高可用配置 -->
<property>
<name>hbase.rootdir</name>
<value>hdfs://hadoop-master1:8020/hbase</value>
</property>
<!-- 临时目录设置 -->
<property>
<name>hbase.tmp.dir</name>
<value>./tmp</value>
</property>
<!-- 允许使用本地文件系统 -->
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
<!-- ZooKeeper 配置 -->
<property>
<name>hbase.zookeeper.quorum</name>
<value>zoo1,zoo2,zoo3</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<!-- 启用 ZooKeeper 管理 HBase Master 的故障转移 -->
<property>
<name>hbase.master.wait.on.zk</name>
<value>true</value>
</property>
<!-- 设置 ZooKeeper 中用于监控 HBase Master 状态的路径 -->
<property>
<name>hbase.master.znode</name>
<value>/hbase/master</value>
</property>
</configuration>
- 修改
regionservers
文件
路径:/opt/docker-data/hbase-2.5.10-hadoop3/conf/regionservers
hadoop-master1
hadoop-master2
hadoop-master3
- 建立
backup-masters
文件
路径:/opt/docker-data/hbase-2.5.10-hadoop3/conf/backup-masters
hadoop-master1
hadoop-master2
hadoop-master3
- 复制配置文件
将Hadoop下的hdfs-site.xml
和core-site.xml
复制到Hbase下的conf
启动 HBase
- 启动 HBase
docker exec -it hadoop-master1 bash
/opt/hbase/bin/start-hbase.sh
- 使用
jps
命令查看 HMaster 进程是否已启动。
jps
输出结果示例:
12112 NodeManager
11601 DFSZKFailoverController
12818 Jps
11109 NameNode
11430 JournalNode
11223 DataNode
12623 HMaster
11999 ResourceManager
- 登录到 HBase Shell
/opt/hbase/bin/hbase shell
- 使用
status
命令查看 HBase 集群状态。
status
输出结果示例:
1 active master, 1 backup masters, 2 servers, 0 dead, 0.5000 average load
HMaster
一定要在active``NameNode
上,否则会报错ERROR: KeeperErrorCode = NoNode for /hbase/master
并且上面的错误可能是因为zookeeper
的dataDir
被格式化了和Hbase
指定的路径不同,这种时候需要格式化zookeeper