我正在运行简单的DCOS群集,以便使用一个主节点和两个私有节点进行测试。第一天我安装了Kafka并做了一些测试,一切正常。 然而,在重新启动群集后,Kafka失败了状态,重新启动之后又恢复了健康,但它没有经纪人。过了一会儿,我发现这是因为第二个节点完全没用了。
当第一个节点具有100%的CPU使用率时,其余任务只是等待而不是在第二个节点上运行,并且它们在Marathon中具有等待状态。第二个节点始终具有0%CPU和0%RAM分配而没有任何更改。所有任务仅在第一个节点上运行。 第二个节点处于健康状态且显示正确,我可以从主服务器ssh到它没有任何问题。重启不起作用。
我该如何解决这个问题?我不知道在哪里搜索。
@edit:
kafka json:
{
"id": "/kafka",
"cmd": "export LD_LIBRARY_PATH=$MESOS_SANDBOX/libmesos-bundle/lib:$LD_LIBRARY_PATH && export MESOS_NATIVE_JAVA_LIBRARY=$(ls $MESOS_SANDBOX/libmesos-bundle/lib/libmesos-*.so) && export PATH=$(ls -d $MESOS_SANDBOX/jre*/bin):$PATH && ./scheduler/bin/kafka-scheduler server ./scheduler/conf/scheduler.yml",
"cpus": 1,
"mem": 3072,
"disk": 0,
"instances": 1,
"acceptedResourceRoles": [
"*"
],
"env": {
"KAFKA_OVERRIDE_LEADER_IMBALANCE_CHECK_INTERVAL_SECONDS": "300",
"KAFKA_OVERRIDE_OFFSET_METADATA_MAX_BYTES": "4096",
"KAFKA_OVERRIDE_PRODUCER_PURGATORY_PURGE_INTERVAL_REQUESTS": "1000",
"KAFKA_OVERRIDE_ZOOKEEPER_SESSION_TIMEOUT_MS": "6000",
"BROKER_STATSD_PORT": "0",
"KAFKA_OVERRIDE_LEADER_IMBALANCE_PER_BROKER_PERCENTAGE": "10",
"KAFKA_OVERRIDE_CONTROLLED_SHUTDOWN_MAX_RETRIES": "3",
"KAFKA_OVERRIDE_LOG_CLEANER_DEDUPE_BUFFER_SIZE": "134217728",
"LD_LIBRARY_PATH": "/opt/mesosphere/lib",
"KAFKA_OVERRIDE_CONTROLLER_SOCKET_TIMEOUT_MS": "30000",
"BROKER_JMX_REMOTE_AUTH": "false",
"KAFKA_OVERRIDE_OFFSETS_LOAD_BUFFER_SIZE": "5242880",
"BROKER_MEM": "2304",
"KAFKA_OVERRIDE_QUEUED_MAX_REQUESTS": "500",
"KAFKA_OVERRIDE_LOG_CLEANER_IO_BUFFER_LOAD_FACTOR": "0.9",
"KAFKA_OVERRIDE_OFFSETS_TOPIC_REPLICATION_FACTOR": "3",
"BROKER_COUNT": "1",
"KAFKA_OVERRIDE_QUOTA_CONSUMER_DEFAULT": "9223372036854775807",
"KAFKA_OVERRIDE_INTER_BROKER_PROTOCOL_VERSION": "0.10.0.0",
"BROKER_HEALTH_CHECK_MAX_FAILURES": "3",
"KAFKA_OVERRIDE_LOG_CLEANER_DELETE_RETENTION_MS": "86400000",
"KAFKA_OVERRIDE_OFFSETS_TOPIC_SEGMENT_BYTES": "104857600",
"BROKER_CPUS": "1",
"KAFKA_OVERRIDE_LOG_FLUSH_OFFSET_CHECKPOINT_INTERVAL_MS": "60000",
"KAFKA_OVERRIDE_OFFSETS_COMMIT_TIMEOUT_MS": "5000",
"JAVA_HOME": "./jre1.8.0_121",
"KAFKA_OVERRIDE_AUTO_CREATE_TOPICS_ENABLE": "true",
"KAFKA_OVERRIDE_LOG_CLEANER_THREADS": "1",
"JAVA_URI": "https://downloads.mesosphere.com/java/jre-8u121-linux-x64.tar.gz",
"KAFKA_OVERRIDE_DELETE_TOPIC_ENABLE": "false",
"KAFKA_OVERRIDE_MIN_INSYNC_REPLICAS": "1",
"PHASE_STRATEGY": "INSTALL",
"ENABLE_BROKER_HEALTH_CHECK": "true",
"BROKER_JMX_REMOTE_SSL": "false",
"KAFKA_OVERRIDE_QUOTA_PRODUCER_DEFAULT": "9223372036854775807",
"KAFKA_OVERRIDE_GROUP_MIN_SESSION_TIMEOUT_MS": "6000",
"KAFKA_OVERRIDE_SOCKET_RECEIVE_BUFFER_BYTES": "102400",
"KAFKA_OVERRIDE_COMPRESSION_TYPE": "producer",
"BROKER_HEALTH_CHECK_DELAY_SEC": "15",
"BROKER_JMX_REMOTE_SSL_NEED_CLIENT_AUTH": "false",
"KAFKA_ADVERTISE_HOST_IP": "true",
"KAFKA_OVERRIDE_REPLICA_SOCKET_TIMEOUT_MS": "30000",
"KAFKA_OVERRIDE_OFFSETS_RETENTION_CHECK_INTERVAL_MS": "600000",
"KAFKA_OVERRIDE_SOCKET_REQUEST_MAX_BYTES": "104857600",
"BROKER_HEALTH_CHECK_GRACE_SEC": "10",
"BROKER_PORT": "9100",
"KAFKA_OVERRIDE_OFFSETS_TOPIC_COMPRESSION_CODEC": "0",
"BROKER_JMX_ENABLE": "false",
"KAFKA_OVERRIDE_LOG_CLEANER_ENABLE": "true",
"KAFKA_OVERRIDE_DEFAULT_REPLICATION_FACTOR": "1",
"KAFKA_OVERRIDE_LOG_ROLL_JITTER_HOURS": "0",
"KAFKA_OVERRIDE_LOG_SEGMENT_DELETE_DELAY_MS": "60000",
"KAFKA_OVERRIDE_SOCKET_SEND_BUFFER_BYTES": "102400",
"KAFKA_OVERRIDE_BACKGROUND_THREADS": "10",
"KAFKA_OVERRIDE_LOG_CLEANER_IO_BUFFER_SIZE": "524288",
"KAFKA_OVERRIDE_NUM_REPLICA_FETCHERS": "1",
"BROKER_JMX_REMOTE_PORT": "9999",
"KAFKA_OVERRIDE_METRICS_NUM_SAMPLES": "2",
"OVERRIDER_URI": "https://downloads.mesosphere.com/kafka/assets/1.1.19-0.10.1.0/overrider.zip",
"BROKER_JMX_REMOTE_ENABLE": "false",
"KAFKA_OVERRIDE_AUTO_LEADER_REBALANCE_ENABLE": "true",
"KAFKA_OVERRIDE_UNCLEAN_LEADER_ELECTION_ENABLE": "true",
"KAFKA_OVERRIDE_NUM_RECOVERY_THREADS_PER_DATA_DIR": "1",
"BROKER_HEAP_MB": "2048",
"KAFKA_OVERRIDE_OFFSETS_TOPIC_NUM_PARTITIONS": "50",
"USER": "root",
"PLACEMENT_STRATEGY": "NODE",
"KAFKA_OVERRIDE_OFFSETS_RETENTION_MINUTES": "1440",
"KAFKA_OVERRIDE_LOG_FLUSH_SCHEDULER_INTERVAL_MS": "9223372036854775807",
"KAFKA_OVERRIDE_LOG_CLEANER_IO_MAX_BYTES_PER_SECOND": "1.7976931348623157E308",
"KAFKA_OVERRIDE_OFFSETS_COMMIT_REQUIRED_ACKS": "-1",
"BROKER_JMX_REMOTE_REGISTRY_SSL": "false",
"KAFKA_OVERRIDE_QUOTA_WINDOW_SIZE_SECONDS": "1",
"FRAMEWORK_PRINCIPAL": "kafka-principal",
"KAFKA_OVERRIDE_REPLICA_HIGH_WATERMARK_CHECKPOINT_INTERVAL_MS": "5000",
"KAFKA_OVERRIDE_LOG_RETENTION_HOURS": "168",
"RECOVERY_GRACE_PERIOD_SEC": "1200",
"BROKER_HEALTH_CHECK_INTERVAL_SEC": "10",
"KAFKA_OVERRIDE_CONNECTIONS_MAX_IDLE_MS": "600000",
"KAFKA_OVERRIDE_LOG_INDEX_INTERVAL_BYTES": "4096",
"KAFKA_OVERRIDE_RESERVED_BROKER_MAX_ID": "1000",
"KAFKA_OVERRIDE_LOG_CLEANER_BACKOFF_MS": "15000",
"REPLACE_DELAY_SEC": "600",
"KAFKA_OVERRIDE_MESSAGE_MAX_BYTES": "1000012",
"KAFKA_OVERRIDE_MAX_CONNECTIONS_PER_IP": "2147483647",
"KAFKA_OVERRIDE_LOG_CLEANER_MIN_CLEANABLE_RATIO": "0.5",
"KAFKA_OVERRIDE_LOG_ROLL_HOURS": "168",
"DISK_TYPE": "ROOT",
"KAFKA_OVERRIDE_LOG_INDEX_SIZE_MAX_BYTES": "10485760",
"KAFKA_OVERRIDE_QUOTA_WINDOW_NUM": "11",
"KAFKA_OVERRIDE_REPLICA_FETCH_MIN_BYTES": "1",
"KAFKA_OVERRIDE_REQUEST_TIMEOUT_MS": "30000",
"KAFKA_OVERRIDE_LOG_FLUSH_INTERVAL_MESSAGES": "9223372036854775807",
"KAFKA_OVERRIDE_LOG_RETENTION_CHECK_INTERVAL_MS": "300000",
"EXECUTOR_URI": "https://downloads.mesosphere.com/kafka/assets/1.1.19-0.10.1.0/executor.zip",
"KAFKA_OVERRIDE_ZOOKEEPER_SYNC_TIME_MS": "2000",
"KAFKA_OVERRIDE_REPLICA_LAG_TIME_MAX_MS": "10000",
"KAFKA_OVERRIDE_NUM_NETWORK_THREADS": "3",
"KAFKA_OVERRIDE_LOG_MESSAGE_FORMAT_VERSION": "0.10.0",
"FRAMEWORK_NAME": "kafka",
"KAFKA_URI": "https://downloads.mesosphere.com/kafka/assets/kafka_2.11-0.10.1.0.tgz",
"KAFKA_OVERRIDE_CONTROLLED_SHUTDOWN_RETRY_BACKOFF_MS": "5000",
"KAFKA_OVERRIDE_NUM_PARTITIONS": "1",
"BROKER_HEALTH_CHECK_TIMEOUT_SEC": "20",
"ENABLE_REPLACEMENT": "false",
"KAFKA_OVERRIDE_GROUP_MAX_SESSION_TIMEOUT_MS": "300000",
"KAFKA_OVERRIDE_REPLICA_FETCH_MAX_BYTES": "1048576",
"KAFKA_OVERRIDE_CONTROLLED_SHUTDOWN_ENABLE": "true",
"KAFKA_OVERRIDE_METRICS_SAMPLE_WINDOW_MS": "30000",
"KAFKA_OVERRIDE_NUM_IO_THREADS": "8",
"KAFKA_OVERRIDE_LOG_SEGMENT_BYTES": "1073741824",
"KAFKA_OVERRIDE_LOG_PREALLOCATE": "false",
"KAFKA_OVERRIDE_LOG_RETENTION_BYTES": "-1",
"KAFKA_OVERRIDE_REPLICA_FETCH_WAIT_MAX_MS": "500",
"KAFKA_OVERRIDE_FETCH_PURGATORY_PURGE_INTERVAL_REQUESTS": "1000",
"KAFKA_OVERRIDE_LOG_CLEANUP_POLICY": "delete",
"BROKER_DISK": "5000",
"KAFKA_ZOOKEEPER_URI": "master.mesos:2181",
"KAFKA_OVERRIDE_REPLICA_FETCH_BACKOFF_MS": "1000",
"KAFKA_OVERRIDE_REPLICA_SOCKET_RECEIVE_BUFFER_BYTES": "65536",
"KAFKA_VER_NAME": "kafka_2.11-0.10.1.0"
},
"healthChecks": [
{
"gracePeriodSeconds": 120,
"intervalSeconds": 30,
"timeoutSeconds": 5,
"maxConsecutiveFailures": 0,
"portIndex": 0,
"path": "/admin/healthcheck",
"protocol": "HTTP",
"ignoreHttp1xx": false
}
],
"labels": {
"DCOS_PACKAGE_RELEASE": "39",
"DCOS_SERVICE_SCHEME": "http",
"DCOS_PACKAGE_SOURCE": "https://universe.mesosphere.com/repo",
"DCOS_PACKAGE_COMMAND": "eyJwaXAiOlsiaHR0cHM6Ly9kb3dubG9hZHMubWVzb3NwaGVyZS5jb20va2Fma2EvYXNzZXRzLzEuMS4xOS0wLjEwLjEuMC9iaW5fd3JhcHBlci0wLjAuMS1weTIucHkzLW5vbmUtYW55LndobCJdfQ==",
"DCOS_PACKAGE_METADATA": "eyJwYWNrYWdpbmdWZXJzaW9uIjoiMy4wIiwibmFtZSI6ImthZmthIiwidmVyc2lvbiI6IjEuMS4xOS4xLTAuMTAuMS4wIiwibWFpbnRhaW5lciI6InN1cHBvcnRAbWVzb3NwaGVyZS5pbyIsImRlc2NyaXB0aW9uIjoiQXBhY2hlIEthZmthIHJ1bm5pbmcgb24gREMvT1MiLCJ0YWdzIjpbIm1lc3NhZ2UiLCJicm9rZXIiLCJwdWJzdWIiXSwic2VsZWN0ZWQiOmZhbHNlLCJmcmFtZXdvcmsiOnRydWUsInBvc3RJbnN0YWxsTm90ZXMiOiJEQy9PUyBLYWZrYSBTZXJ2aWNlIGlzIGJlaW5nIGluc3RhbGxlZC5cblxuXHREb2N1bWVudGF0aW9uOiBodHRwczovL2RvY3MubWVzb3NwaGVyZS5jb20vY3VycmVudC91c2FnZS9zZXJ2aWNlLWd1aWRlcy9rYWZrYS9cblx0SXNzdWVzOiBodHRwczovL2Rjb3NqaXJhLmF0bGFzc2lhbi5uZXQvcHJvamVjdHMvS0FGS0EvaXNzdWVzIiwicG9zdFVuaW5zdGFsbE5vdGVzIjoiREMvT1MgS2Fma2EgU2VydmljZSBoYXMgYmVlbiB1bmluc3RhbGxlZC5cblBsZWFzZSBmb2xsb3cgdGhlIGluc3RydWN0aW9ucyBhdCBodHRwczovL2RvY3MubWVzb3NwaGVyZS5jb20vY3VycmVudC91c2FnZS9zZXJ2aWNlLWd1aWRlcy9rYWZrYS91bmluc3RhbGwgdG8gcmVtb3ZlIGFueSBwZXJzaXN0ZW50IHN0YXRlIGlmIHJlcXVpcmVkLiIsImltYWdlcyI6eyJpY29uLXNtYWxsIjoiaHR0cHM6Ly9kb3dubG9hZHMubWVzb3NwaGVyZS5jb20vdW5pdmVyc2UvYXNzZXRzL2ljb24tc2VydmljZS1rYWZrYS1zbWFsbC5wbmciLCJpY29uLW1lZGl1bSI6Imh0dHBzOi8vZG93bmxvYWRzLm1lc29zcGhlcmUuY29tL3VuaXZlcnNlL2Fzc2V0cy9pY29uLXNlcnZpY2Uta2Fma2EtbWVkaXVtLnBuZyIsImljb24tbGFyZ2UiOiJodHRwczovL2Rvd25sb2Fkcy5tZXNvc3BoZXJlLmNvbS91bml2ZXJzZS9hc3NldHMvaWNvbi1zZXJ2aWNlLWthZmthLWxhcmdlLnBuZyJ9fQ==",
"DCOS_PACKAGE_REGISTRY_VERSION": "3.0",
"DCOS_SERVICE_NAME": "kafka",
"DCOS_PACKAGE_FRAMEWORK_NAME": "kafka",
"DCOS_SERVICE_PORT_INDEX": "1",
"DCOS_PACKAGE_VERSION": "1.1.19.1-0.10.1.0",
"DCOS_MIGRATION_API_PATH": "/v1/plan",
"DCOS_PACKAGE_NAME": "kafka",
"MARATHON_SINGLE_INSTANCE_APP": "true",
"DCOS_PACKAGE_IS_FRAMEWORK": "true",
"DCOS_MIGRATION_API_VERSION": "v1"
},
"portDefinitions": [
{
"port": 10001,
"protocol": "tcp",
"name": "health",
"labels": {}
},
{
"port": 10002,
"protocol": "tcp",
"name": "api",
"labels": {}
}
],
"uris": [
"https://downloads.mesosphere.com/java/jre-8u121-linux-x64.tar.gz",
"https://downloads.mesosphere.com/kafka/assets/1.1.19-0.10.1.0/scheduler.zip",
"https://downloads.mesosphere.com/kafka/assets/kafka_2.11-0.10.1.0.tgz",
"https://downloads.mesosphere.com/libmesos-bundle/libmesos-bundle-1.9-argus-1.1.x-3.tar.gz"
],
"fetch": [
{
"uri": "https://downloads.mesosphere.com/java/jre-8u121-linux-x64.tar.gz",
"extract": true,
"executable": false,
"cache": false
},
{
"uri": "https://downloads.mesosphere.com/kafka/assets/1.1.19-0.10.1.0/scheduler.zip",
"extract": true,
"executable": false,
"cache": false
},
{
"uri": "https://downloads.mesosphere.com/kafka/assets/kafka_2.11-0.10.1.0.tgz",
"extract": true,
"executable": false,
"cache": false
},
{
"uri": "https://downloads.mesosphere.com/libmesos-bundle/libmesos-bundle-1.9-argus-1.1.x-3.tar.gz",
"extract": true,
"executable": false,
"cache": false
}
],
"readinessChecks": [
{
"name": "kafkaUpdateProgress",
"protocol": "HTTP",
"path": "/v1/plan",
"portName": "api",
"intervalSeconds": 30,
"timeoutSeconds": 10,
"httpStatusCodesForReady": [
200
],
"preserveLastResponse": true
}
],
"upgradeStrategy": {
"minimumHealthCapacity": 0,
"maximumOverCapacity": 0
}
}
也无法在第二个节点上运行的示例应用程序:
{
"id": "/sleepscript",
"cmd": " while :; do echo 'Hit CTRL+C'; sleep 1; done",
"cpus": 1,
"mem": 128,
"disk": 0,
"instances": 0,
"acceptedResourceRoles": [
"*"
],
"portDefinitions": [
{
"port": 10000,
"protocol": "tcp",
"labels": {}
}
]
}
@ EDIT2:
这些是来自日志的一些有趣的行,我正在尝试运行同一任务的几个实例,因此第一个节点以100%(* .11节点)使用,第二个节点为空(* .12节点):< / p>
Sep 04 09:07:01 master-1 java[2406]: [2017-09-04 09:07:01,682] INFO Finished processing e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1269 from 10.132.0.11. Matched 0 ops after 1 passes. ports(*) 1025->2180,2182->3887,3889->5049,5052->807
9,8082->8180,8182->9380,9383->29823,29825->32000; disk(*) 50100.0; disk(*) 6270.0; mem(*) 8639.0 left. (mesosphere.marathon.core.matcher.manager.impl.OfferMatcherManagerActor:marathon-akka.actor.default-dispatcher-81)
Sep 04 09:07:01 master-1 java[2406]: [2017-09-04 09:07:01,682] INFO Finished processing e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1268 from 10.132.0.12. Matched 0 ops after 1 passes. ports(*) 1026->2180,2182->3887,3889->5049,5052->807
9,8082->8180,8182->9099,9101->32000; disk(*) 50100.0; disk(*) 1263.0; cpus(*) 0.5; mem(*) 9279.0 left. (mesosphere.marathon.core.matcher.manager.impl.OfferMatcherManagerActor:marathon-akka.actor.default-dispatcher-81)
Sep 04 09:07:01 master-1 mesos-master[1963]: I0904 09:07:01.683557 1994 master.cpp:4732] Processing DECLINE call for offers: [ e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1269 ] for framework a5370699-baae-4e77-a48e-1283d36b1906-0000 (m
arathon) at scheduler-88e19c66-42c6-48be-be03-662a9312cd91@10.132.0.10:15101
Sep 04 09:07:01 master-1 mesos-master[1963]: I0904 09:07:01.683799 1994 master.cpp:4732] Processing DECLINE call for offers: [ e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1268 ] for framework a5370699-baae-4e77-a48e-1283d36b1906-0000 (m
arathon) at scheduler-88e19c66-42c6-48be-be03-662a9312cd91@10.132.0.10:15101
在第二种模式中,这条线与第一个节点不同:cpus(*)0.5; 我猜第一个节点现在显示CPU,因为所有都被使用,但是第二个节点显示0.5 CPU,它是否意味着半CPU?
这些是首次提供,其中0.11节点接受一个要约并运行一个实例:
Sep 04 09:06:56 master-1 mesos-master[1963]: I0904 09:06:56.671070 1998 master.cpp:7029] Sending 2 offers to framework a5370699-baae-4e77-a48e-1283d36b1906-0000 (marathon) at scheduler-88e19c66-42c6-48be-be03-662a9312cd91@10.132.0
.10:15101
Sep 04 09:06:56 master-1 java[2406]: [2017-09-04 09:06:56,675] INFO Offer [e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1266]. Considering resources with roles {*} without resident reservation labels. Not all basic resources satisfied: c
pus NOT SATISFIED (1.0 > 0.5), mem SATISFIED (128.0 <= 128.0) (mesosphere.mesos.ResourceMatcher$:marathon-akka.actor.default-dispatcher-75)
Sep 04 09:06:56 master-1 java[2406]: [2017-09-04 09:06:56,675] INFO Offer [e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1265]. Considering resources with roles {*} without resident reservation labels. Not all basic resources satisfied: c
pus NOT SATISFIED (1.0 > 0.0), mem SATISFIED (128.0 <= 128.0) (mesosphere.mesos.ResourceMatcher$:marathon-akka.actor.default-dispatcher-75)
Sep 04 09:06:56 master-1 java[2406]: [2017-09-04 09:06:56,676] INFO Finished processing e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1266 from 10.132.0.12. Matched 0 ops after 1 passes. ports(*) 1026->2180,2182->3887,3889->5049,5052->807
9,8082->8180,8182->9099,9101->32000; disk(*) 50100.0; disk(*) 1263.0; cpus(*) 0.5; mem(*) 9279.0 left. (mesosphere.marathon.core.matcher.manager.impl.OfferMatcherManagerActor:marathon-akka.actor.default-dispatcher-75)
Sep 04 09:06:56 master-1 java[2406]: [2017-09-04 09:06:56,676] INFO Finished processing e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1265 from 10.132.0.11. Matched 1 ops after 2 passes. ports(*) 1025->2180,2182->3887,3889->5049,5052->807
9,8082->8180,8182->9380,9383->29823,29825->32000; disk(*) 50100.0; disk(*) 6270.0; mem(*) 8639.0 left. (mesosphere.marathon.core.matcher.manager.impl.OfferMatcherManagerActor:marathon-akka.actor.default-dispatcher-75)
仍然,它显示类似* .11节点有0个CPU,但最后一个日志显示它已通过,后来有一些ID为正在运行任务的日志。
@ EDIT3:
Bingo,在使用0.1 CPU最终创建任务之后* .12节点使用它拥有的所有0.5 CPU。该节点也不能同时运行两个以上的0.2个CPU任务,所以看起来这个0.5 CPU限制了它。似乎某些东西限制它为0.5 CPU但我不知道是什么。
@ edit4:
DCOS显示该节点的所有2个CPU,mesos UI还显示2个CPU总数,/ proc / cpuinfo也显示两个核心。
@ edit5:
我通过删除/ var / lib / dcos / mesos-resources和/ var / lib / mesos / slave / meta目录的整个内容来“解决”问题。但是我不知道是什么原因引起了这个问题。
@anotherEdit
mesos / master / slaves在工作时的内容:
{"slaves":[{"id":"6c620a26-7bc6-4287-b67a-de2b0eb8778c-S3","hostname":"10.132.0.12","port":5051,"attributes":{},"pid":"slave(1)@10.132.0.12:5051","registered_time":1504683660.37257,"reregistered_time":1504683660.37296,"resources":{"disk":52824.0,"mem":11839.0,"gpus":0.0,"cpus":2.0,"ports":"[1025-2180, 2182-3887, 3889-5049, 5052-8079, 8082-8180, 8182-32000]"},"used_resources":{"disk":50100.0,"mem":7168.0,"gpus":0.0,"cpus":1.3,"ports":"[7000-7001, 7199-7199, 9000-9001, 9042-9042, 9160-9160]"},"offered_resources":{"disk":0.0,"mem":0.0,"gpus":0.0,"cpus":0.0},"reserved_resources":{"cassandra-role":{"disk":50100.0,"mem":5376.0,"gpus":0.0,"cpus":1.5,"ports":"[7000-7001, 7199-7199, 9001-9001, 9042-9042, 9160-9160]"}},"unreserved_resources":{"disk":2724.0,"mem":6463.0,"gpus":0.0,"cpus":0.5,"ports":"[1025-2180, 2182-3887, 3889-5049, 5052-6999, 7002-7198, 7200-8079, 8082-8180, 8182-9000, 9002-9041, 9043-9159, 9161-32000]"},"active":true,"version":"1.2.2","reserved_resources_full":{"cassandra-role":[{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"5827371e-2139-4278-99b5-85e7dd573f4c"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":4096.0},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"c5c881b0-45df-458b-9731-11cfb1b251c3"}]}}},{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"1fed2809-f03e-4600-bfa7-b83a382dafbb"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":1024.0},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"3fa104ed-8fec-47ce-9702-6cde32bc97de"}]}}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":9001,"end":9001}]},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"072d9c13-4205-4ab0-bff2-59e2604774c1"}]}}},{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"c06733c6-fe7e-43a1-a3b7-70371d7cae08"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":256.0},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"18b050cf-f45d-4e2d-8e32-b7d7028d4939"}]}}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":7000,"end":7001},{"begin":7199,"end":7199},{"begin":9042,"end":9042},{"begin":9160,"end":9160}]},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"f49161b4-071f-45e6-9316-62b81c009f8a"}]}}},{"name":"disk","type":"SCALAR","scalar":{"value":50100.0},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"359a466c-1191-441e-8058-d77c84fbdf0f"}]}},"disk":{"persistence":{"id":"0df2d25d-8c64-42ac-ab65-1de63392b5e8","principal":"cassandra-principal"},"volume":{"mode":"RW","container_path":"volume"},"source":{"type":"MOUNT","mount":{"root":"\/dcos\/volume0"}}}}]},"used_resources_full":[{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"1fed2809-f03e-4600-bfa7-b83a382dafbb"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":1024.0},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"3fa104ed-8fec-47ce-9702-6cde32bc97de"}]}}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":9001,"end":9001}]},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"072d9c13-4205-4ab0-bff2-59e2604774c1"}]}}},{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"c06733c6-fe7e-43a1-a3b7-70371d7cae08"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":4096.0},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"c5c881b0-45df-458b-9731-11cfb1b251c3"}]}}},{"name":"disk","type":"SCALAR","scalar":{"value":50100.0},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"359a466c-1191-441e-8058-d77c84fbdf0f"}]}},"disk":{"persistence":{"id":"0df2d25d-8c64-42ac-ab65-1de63392b5e8","principal":"cassandra-principal"},"volume":{"mode":"RW","container_path":"volume"},"source":{"type":"MOUNT","mount":{"root":"\/dcos\/volume0"}}}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":7000,"end":7001},{"begin":7199,"end":7199},{"begin":9042,"end":9042},{"begin":9160,"end":9160}]},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"f49161b4-071f-45e6-9316-62b81c009f8a"}]}}},{"name":"cpus","type":"SCALAR","scalar":{"value":0.3},"role":"*","allocation_info":{"role":"slave_public"}},{"name":"mem","type":"SCALAR","scalar":{"value":2048.0},"role":"*","allocation_info":{"role":"slave_public"}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":9000,"end":9000}]},"role":"*","allocation_info":{"role":"slave_public"}}],"offered_resources_full":[]},{"id":"6c620a26-7bc6-4287-b67a-de2b0eb8778c-S2","hostname":"10.132.0.11","port":5051,"attributes":{},"pid":"slave(1)@10.132.0.11:5051","registered_time":1504683660.32389,"reregistered_time":1504683660.32425,"resources":{"disk":56370.0,"mem":11839.0,"gpus":0.0,"cpus":2.0,"ports":"[1025-2180, 2182-3887, 3889-5049, 5052-8079, 8082-8180, 8182-32000]"},"used_resources":{"disk":512.0,"mem":2048.0,"gpus":0.0,"cpus":0.6,"ports":"[23803-23805]"},"offered_resources":{"disk":0.0,"mem":0.0,"gpus":0.0,"cpus":0.0},"reserved_resources":{"kafka-role":{"disk":5000.0,"mem":2560.0,"gpus":0.0,"cpus":1.0,"ports":"[1025-1025, 9100-9100]"}},"unreserved_resources":{"disk":51370.0,"mem":9279.0,"gpus":0.0,"cpus":1.0,"ports":"[1026-2180, 2182-3887, 3889-5049, 5052-8079, 8082-8180, 8182-9099, 9101-32000]"},"active":true,"version":"1.2.2","reserved_resources_full":{"kafka-role":[{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"resource_id","value":"b722808d-79a2-4871-8c97-e95a2c847fbd"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":256.0},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"resource_id","value":"6eb2865d-f823-473f-8c69-49a72eb7dbf6"}]}}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":1025,"end":1025}]},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"dynamic_port","value":"API_PORT"},{"key":"resource_id","value":"1deb5417-7852-49fb-ac81-fceb073369a8"}]}}},{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"resource_id","value":"fe21a9d8-0ba0-4d82-9bae-ca252d72bf93"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":2304.0},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"resource_id","value":"1beca7e2-bf10-43be-be28-0f93b03eac37"}]}}},{"name":"disk","type":"SCALAR","scalar":{"value":5000.0},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"resource_id","value":"d7e9cb00-2204-481a-a999-cec4fee5d248"}]}},"disk":{"persistence":{"id":"661b8d81-8d33-432d-bd47-5538718730f9","principal":"kafka-principal"},"volume":{"mode":"RW","container_path":"kafka-volume-11c8a735-646f-4c46-9af5-ac2cbc52b697"}}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":9100,"end":9100}]},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"resource_id","value":"99c8303c-1892-4fb8-bd0f-8618ad877654"}]}}}]},"used_resources_full":[{"name":"cpus","type":"SCALAR","scalar":{"value":0.6},"role":"*","allocation_info":{"role":"slave_public"}},{"name":"mem","type":"SCALAR","scalar":{"value":2048.0},"role":"*","allocation_info":{"role":"slave_public"}},{"name":"disk","type":"SCALAR","scalar":{"value":512.0},"role":"*","allocation_info":{"role":"slave_public"}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":23803,"end":23805}]},"role":"*","allocation_info":{"role":"slave_public"}}],"offered_resources_full":[]}],"recovered_slaves":[]}