我的团队和我一直在与弹性搜索进行长时间的战斗,我想知道是否有人可以提供帮助。我们在kubernetes集群上的ubuntu docker容器上运行ES 2.1。所有节点都安装相同的驱动器以进行日志和数据存储。群集本身工作正常,直到节点发生故障,或者我们重建群集,然后它永远无法自行恢复。例如,现在我杀死了所有的实例,当它恢复时它将碎片恢复到44%并且不会再进一步了。通常我们可以通过删除某些节点并强制分配其他节点来获取所有分片
The logs only seem to yield: IllegalIndexShardStateException[CurrentState[RECOVERING] operations only allowed when shard state is one of [POST_RECOVERY, STARTED, RELOCATED]]
UnavailableShardsException[[.marvel-es-2016.03.15][0] Primary shard is not active or isn't assigned to a known node. T imeout: [1m], request: org.elasticsearch.action.bulk.BulkShardRequest@186bacfb]]
这是我的配置:
cluster.name: Alice
security.manager.enabled: false
cluster.routing.allocation.enable: all
cluster.routing.allocation.allow_rebalance: always
indices.store.throttle.type: none
path.data: /data/data
path.logs: /data/logs
network.host: 0.0.0.0
script.inline: on
script.indexed: on
script.file: on
cloud:
kubernetes:
service: elasticsearch-cluster
namespace: els
discovery:
type: kubernetes
discovery.zen.minimum_master_node: 2
我也在使用以下初始化脚本:
#!/bin/sh
#
# /etc/init.d/elasticsearch -- startup script for Elasticsearch
#
# Written by Miquel van Smoorenburg <miquels@cistron.nl>.
# Modified for Debian GNU/Linux by Ian Murdock <imurdock@gnu.ai.mit.edu>.
# Modified for Tomcat by Stefan Gybas <sgybas@debian.org>.
# Modified for Tomcat6 by Thierry Carrez <thierry.carrez@ubuntu.com>.
# Additional improvements by Jason Brittain <jason.brittain@mulesoft.com>.
# Modified by Nicolas Huray for Elasticsearch <nicolas.huray@gmail.com>.
#
### BEGIN INIT INFO
# Provides: elasticsearch
# Required-Start: $network $remote_fs $named
# Required-Stop: $network $remote_fs $named
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Starts elasticsearch
# Description: Starts elasticsearch using start-stop-daemon
### END INIT INFO
PATH=/bin:/usr/bin:/sbin:/usr/sbin
NAME=elasticsearch
DESC="Elasticsearch Server"
DEFAULT=/etc/default/$NAME
if [ `id -u` -ne 0 ]; then
echo "You need root privileges to run this script"
exit 1
fi
. /lib/lsb/init-functions
if [ -r /etc/default/rcS ]; then
. /etc/default/rcS
fi
# The following variables can be overwritten in $DEFAULT
# Run Elasticsearch as this user ID and group ID
ES_USER=ubuntu
ES_GROUP=ubuntu
# Directory where the Elasticsearch binary distribution resides
ES_HOME=/opt/elasticsearch
# Heap size defaults to 256m min, 1g max
# Set ES_HEAP_SIZE to 50% of available RAM, but no more than 31g
ES_HEAP_SIZE=4g
# Heap new generation
#ES_HEAP_NEWSIZE=
# max direct memory
#ES_DIRECT_SIZE=
# Additional Java OPTS
#ES_JAVA_OPTS=
# Maximum number of open files
MAX_OPEN_FILES=65535
# Maximum amount of locked memory
#MAX_LOCKED_MEMORY=
# Elasticsearch log directory
LOG_DIR=/data/logs
# Elasticsearch data directory
DATA_DIR=/data/data
# Elasticsearch configuration directory
CONF_DIR=/opt/elasticsearch/config
# Maximum number of VMA (Virtual Memory Areas) a process can own
MAX_MAP_COUNT=262144
# Path to the GC log file
#ES_GC_LOG_FILE=/var/log/elasticsearch/gc.log
# Elasticsearch PID file directory
PID_DIR="/var/run/elasticsearch"
# End of variables that can be overwritten in $DEFAULT
# overwrite settings from default file
if [ -f "$DEFAULT" ]; then
. "$DEFAULT"
fi
# CONF_FILE setting was removed
if [ ! -z "$CONF_FILE" ]; then
echo "CONF_FILE setting is no longer supported. elasticsearch.yml must be placed in the config directory and cannot be renamed."
exit 1
fi
# Define other required variables
PID_FILE="$PID_DIR/$NAME.pid"
DAEMON=$ES_HOME/bin/elasticsearch
DAEMON_OPTS="-d -p $PID_FILE --default.path.home=$ES_HOME --default.path.logs=$LOG_DIR --default.path.data=$DATA_DIR --default.path.conf=$CONF_DIR"
export ES_HEAP_SIZE
export ES_HEAP_NEWSIZE
export ES_DIRECT_SIZE
export ES_JAVA_OPTS
export ES_GC_LOG_FILE
export JAVA_HOME
# Check DAEMON exists
test -x $DAEMON || exit 0
checkJava() {
if [ -x "$JAVA_HOME/bin/java" ]; then
JAVA="$JAVA_HOME/bin/java"
else
JAVA=`which java`
fi
if [ ! -x "$JAVA" ]; then
echo "Could not find any executable java binary. Please install java in your PATH or set JAVA_HOME"
exit 1
fi
}
case "$1" in
start)
checkJava
if [ -n "$MAX_LOCKED_MEMORY" -a -z "$ES_HEAP_SIZE" ]; then
log_failure_msg "MAX_LOCKED_MEMORY is set - ES_HEAP_SIZE must also be set"
exit 1
fi
log_daemon_msg "Starting $DESC"
pid=`pidofproc -p $PID_FILE elasticsearch`
if [ -n "$pid" ] ; then
log_begin_msg "Already running."
log_end_msg 0
exit 0
fi
# Prepare environment
mkdir -p "$LOG_DIR" "$DATA_DIR" && chown "$ES_USER":"$ES_GROUP" "$LOG_DIR" "$DATA_DIR"
# Ensure that the PID_DIR exists (it is cleaned at OS startup time)
if [ -n "$PID_DIR" ] && [ ! -e "$PID_DIR" ]; then
mkdir -p "$PID_DIR" && chown "$ES_USER":"$ES_GROUP" "$PID_DIR"
fi
if [ -n "$PID_FILE" ] && [ ! -e "$PID_FILE" ]; then
touch "$PID_FILE" && chown "$ES_USER":"$ES_GROUP" "$PID_FILE"
fi
if [ -n "$MAX_OPEN_FILES" ]; then
ulimit -n $MAX_OPEN_FILES
fi
if [ -n "$MAX_LOCKED_MEMORY" ]; then
ulimit -l $MAX_LOCKED_MEMORY
fi
if [ -n "$MAX_MAP_COUNT" -a -f /proc/sys/vm/max_map_count ]; then
sysctl -q -w vm.max_map_count=$MAX_MAP_COUNT
fi
# Start Daemon
start-stop-daemon -d $ES_HOME --start -b --user "$ES_USER" -c "$ES_USER" --pidfile "$PID_FILE" --exec $DAEMON -- $DAEMON_OPTS
return=$?
if [ $return -eq 0 ]; then
i=0
timeout=10
# Wait for the process to be properly started before exiting
until { cat "$PID_FILE" | xargs kill -0; } >/dev/null 2>&1
do
sleep 1
i=$(($i + 1))
if [ $i -gt $timeout ]; then
log_end_msg 1
exit 1
fi
done
fi
log_end_msg $return
exit $return
;;
stop)
log_daemon_msg "Stopping $DESC"
if [ -f "$PID_FILE" ]; then
start-stop-daemon --stop --pidfile "$PID_FILE" \
--user "$ES_USER" \
--quiet \
--retry forever/TERM/20 > /dev/null
if [ $? -eq 1 ]; then
log_progress_msg "$DESC is not running but pid file exists, cleaning up"
elif [ $? -eq 3 ]; then
PID="`cat $PID_FILE`"
log_failure_msg "Failed to stop $DESC (pid $PID)"
exit 1
fi
rm -f "$PID_FILE"
else
log_progress_msg "(not running)"
fi
log_end_msg 0
;;
status)
status_of_proc -p $PID_FILE elasticsearch elasticsearch && exit 0 || exit $?
;;
restart|force-reload)
if [ -f "$PID_FILE" ]; then
$0 stop
sleep 1
fi
$0 start
;;
*)
log_success_msg "Usage: $0 {start|stop|restart|force-reload|status}"
exit 1
;;
esac
exit 0
有人有什么想法吗?
答案 0 :(得分:0)
我很担心:
所有节点都安装了用于日志和数据存储的相同驱动器
也许我误解了你说的话,但你的节点不应该共享同一个数据文件夹。每个节点都需要写入自己的数据文件夹