我使用docker compose成功设置了rabbitmq集群,以了解集群概念。它的工作正常如下:docker compose file:
rabbit1:
image: ipf-queue-node
hostname: rabbit1
cap_add:
- ALL
- NET_ADMIN
- SYS_ADMIN
ports:
- "5671:5671"
- "5672:5672"
- "15672:15672"
rabbit2:
image: ipf-queue-node
hostname: rabbit2
cap_add:
- ALL
- NET_ADMIN
- SYS_ADMIN
links:
- rabbit1
environment:
- CLUSTERED=true
- CLUSTER_WITH=rabbit1
- RAM_NODE=true
ports:
- "5675:5671"
- "5673:5672"
- "15673:15672"
Docker文件内容:
FROM queue-base
# Create directories
RUN mkdir /opt/rabbit
RUN mkdir /opt/simulator
RUN mkdir /opt/simulator/tools
# Add the files from the local repository into the container
ADD rabbitmq.config /etc/rabbitmq/
ADD rabbitmq-env.conf /etc/rabbitmq/
ADD erlang.cookie /var/lib/rabbitmq/.erlang.cookie
ADD startclusternode.sh /opt/rabbit/
ADD debugnodes.sh /opt/rabbit/
ADD tl /bin/tl
ADD rl /bin/rl
ADD rst /bin/rst
# Add the simulator tooling
ADD simulator_tools/ /opt/simulator/tools/
ADD ./testca /tmp/ssl
ADD ./server /tmp/ssl
# Set the file permissions in the container
RUN chmod 644 /etc/rabbitmq/rabbitmq.config
RUN chmod 644 /etc/rabbitmq/rabbitmq-env.conf
RUN chmod 400 /var/lib/rabbitmq/.erlang.cookie
RUN chmod 777 /opt/rabbit/startclusternode.sh
RUN chmod 777 /opt/rabbit/debugnodes.sh
RUN chmod 777 /bin/tl
RUN chmod 777 /bin/rl
RUN chmod 777 /bin/rst
RUN chmod -R 777 /opt/simulator
# Set ownership permissions on files in the container
RUN chown rabbitmq:rabbitmq /var/lib/rabbitmq/.erlang.cookie
# Expose ports inside the container to the host
EXPOSE 5672
EXPOSE 5671
EXPOSE 15672
EXPOSE 25672
# Run this to debug the cluster nodes by allowing ssh login
#CMD /opt/rabbit/debugnodes.sh
# Run this to autostart the cluster nodes
CMD /opt/rabbit/startclusternode.sh
startclusternode.sh是设置集群的脚本:
#!/bin/bash
logfile="/tmp/rabbitnode.log"
firsttimefile="/tmp/firsttimerunning"
curhostname=`hostname`
username="<<username>>"
password="<<password>>"
echo "" > $logfile
echo "New Start Date:" >> $logfile
date >> $logfile
echo "" >> $logfile
( sleep 40 ; \
rabbitmqctl add_user $username $password ; \
rabbitmqctl set_user_tags $username administrator ; \
rabbitmqctl add_vhost $curhostname ; \
rabbitmqctl add_vhost localhost; \
rabbitmqctl set_permissions -p $curhostname $username ".*" ".*" ".*" ; \
rabbitmqctl set_permissions -p localhost $username ".*" ".*" ".*" ; \
rabbitmqctl set_policy ha-all "" '{"ha-mode":"all","ha-sync-mode":"automatic"}'
) &
sleep 5
# For version 3.5.6 the first time running the cluster needs to enable the plugins
if [ -f $firsttimefile ]; then
echo "First Time Running Enabling Plugins" >> $logfile
/usr/sbin/rabbitmq-server -d &
echo "Waiting for RabbitMQ Server to start" >> $logfile
sleep 3
echo "Enabling Plugins" >> $logfile
/usr/sbin/rabbitmq-plugins enable rabbitmq_stomp rabbitmq_management rabbitmq_management_agent rabbitmq_management_visualiser rabbitmq_federation rabbitmq_federation_management sockjs >> $logfile
echo "Waiting for Plugins to finish" >> $logfile
sleep 1
echo "Stopping the RabbitMQ using stop_app" >> $logfile
/usr/sbin/rabbitmqctl stop_app
echo "Stopping the RabbitMQ using stop" >> $logfile
/usr/sbin/rabbitmqctl stop
echo "Stopping the RabbitMQ Server" >> $logfile
kill -9 `ps auwwx | grep rabbitmq-server | awk '{print $2}'`
sleep 1
echo "Done First Time Running Enabling Plugins" >> $logfile
rm -f $firsttimefile >> $logfile
echo "Done Cleanup First Time File" >> $logfile
# Allow the cluster nodes to wait for the master to start the first time
if [ -z "$CLUSTERED" ]; then
echo "Ignoring as this is the server node" >> $logfile
else
if [ -z "$CLUSTER_WITH" ]; then
echo "Ignoring as this is the cluster master node" >> $logfile
else
echo "Waiting for the master node to start up" >> $logfile
sleep 5
echo "Done waiting for the master node to start up" >> $logfile
fi
fi
fi
if [ -z "$CLUSTERED" ]; then
echo "Starting non-Clustered Server Instance" >> $logfile
# if not clustered then start it normally as if it is a single server
/usr/sbin/rabbitmq-server >> $logfile
echo "Done Starting non-Clustered Server Instance" >> $logfile
# Tail to keep the foreground process active.
tail -f /var/log/rabbitmq/*
else
if [ -z "$CLUSTER_WITH" ]; then
# If clustered, but cluster is not specified then start normally as this could be the first server in the cluster
echo "Starting Single Server Instance" >> $logfile
/usr/sbin/rabbitmq-server >> $logfile
echo "Done Starting Single Server Instance" >> $logfile
else
echo "Starting Clustered Server Instance as a DETACHED single instance" >> $logfile
/usr/sbin/rabbitmq-server -detached >> $logfile
echo "Stopping App with /usr/sbin/rabbitmqctl stop_app" >> $logfile
/usr/sbin/rabbitmqctl stop_app >> $logfile
# This should attempt to join a cluster master node from the yaml file
if [ -z "$RAM_NODE" ]; then
echo "Attempting to join as DISC node: /usr/sbin/rabbitmqctl join_cluster rabbit@$CLUSTER_WITH" >> $logfile
/usr/sbin/rabbitmqctl join_cluster rabbit@$CLUSTER_WITH >> $logfile
else
echo "Attempting to join as RAM node: /usr/sbin/rabbitmqctl join_cluster --ram rabbit@$CLUSTER_WITH" >> $logfile
/usr/sbin/rabbitmqctl join_cluster --ram rabbit@$CLUSTER_WITH >> $logfile
fi
echo "Starting App" >> $logfile
/usr/sbin/rabbitmqctl start_app >> $logfile
echo "Done Starting Cluster Node" >> $logfile
fi
# Tail to keep the foreground process active.
tail -f /var/log/rabbitmq/*
fi
问题是当我尝试使用kubernetes进行相同的设置时,我无法从slave节点连接到master。我采用的方法是,我为主节点创建了一个pod,为从节点创建了另一个pod,通过环境变量传递了master(当前是硬编码的)的主机名。我还检查了/tmp/rabbitmq.log中的日志文件,它正确地获取了所有环境变量。但是它无法向主人注册。我也尝试使用rabbitmqctl命令手动完成。但它确实有工作表明主机无法访问。尝试更改/ etc / hosts文件。
根据我的理解,kubernetes中的pod通过服务进行通信,我想因为这样,直接传递容器主机名并不工作,Rabbitmq集群工作基于主机名。
有人尝试过任何解决方法吗?我想在不同的节点上运行master和slave。以下是主要和从属pod的内容:
apiVersion: v1
kind: Service
metadata:
name: rabbitmqsvc
labels:
app: queue-master
spec:
ports:
- port: 5672
name: queue-rw-port
- port: 15672
name: queue-mgt-port
nodePort: 31606
- port: 5671
name: queue-ssl
nodePort: 32718
selector:
app: queue-master
type: NodePort
clusterIP: 10.16.0.121
---
apiVersion: v1
kind: ReplicationController
metadata:
name: rabbitmq-controller
labels:
app: queue-master
spec:
replicas: 1
selector:
app: queue-master
template:
metadata:
name: rabbitmq-pod
labels:
app: queue-master
spec:
nodeSelector:
nodesize: small1
containers:
- name: rabbitmq-master
image: 172.17.0.1:5000/queue-node
ports:
- containerPort: 5672
name: queue-rw-port
- containerPort: 15672
name: queue-mgt-port
- containerPort: 5671
name: queue-ssl
从属:
apiVersion: v1
kind: Service
metadata:
name: rabbitmqsvc-slave
labels:
app: queue-slave
spec:
ports:
- port: 5672
name: queue-rw-port
- port: 15672
name: queue-mgt-port
nodePort: 31607
- port: 5671
name: queue-ssl
nodePort: 32719
selector:
app: queue-slave
type: NodePort
clusterIP: 10.16.0.122
---
apiVersion: v1
kind: ReplicationController
metadata:
name: rabbitmq-controller-slave
labels:
app: queue-slave
spec:
replicas: 1
selector:
app: queue-slave
template:
metadata:
name: rabbitmq-pod
labels:
app: queue-slave
spec:
nodeSelector:
nodesize: small2
containers:
- name: rabbitmq-slave
image: 172.17.0.1:5000/queue-node
env:
- name: CLUSTERED
value: "true"
- name: CLUSTER_WITH
value: "rabbitmq-controller-2ll1s"
- name: RAM_NODE
value: "true"
- name: HOST_NAME
value: "rabbit2"
ports:
- containerPort: 5672
name: queue-rw-port
- containerPort: 15672
name: queue-mgt-port
- containerPort: 5671
name: queue-ssl
答案 0 :(得分:4)
您无法使用Docker主机名和Kubernetes链接。它仅在您拥有一台计算机时才有效,但Kubernetes被指定为集群解决方案。
在群集中启用DNS。创建Master service
(rabbitmqsvc - 已经完成),然后在您的从属配置中使用Master service
的DNS名称:
- name: CLUSTER_WITH
value: "rabbitmqsvc.svc.cluster.local"
您也可以使用environment variables,但恕我直言DNS是更好的选择。
答案 1 :(得分:2)
我们刚刚为kubernetes开源了一个部署就绪的rabbitmq集群。 它使用StatefulSets,因此它需要Kubernetes 1.5.X或更高版本。
您可以在此处找到它:https://github.com/nanit/kubernetes-rabbitmq-cluster