DCOS不分配资源

时间:2017-09-04 08:34:06

标签: marathon dcos

我正在运行简单的DCOS群集,以便使用一个主节点和两个私有节点进行测试。第一天我安装了Kafka并做了一些测试,一切正常。 然而,在重新启动群集后,Kafka失败了状态,重新启动之后又恢复了健康,但它没有经纪人。过了一会儿,我发现这是因为第二个节点完全没用了。

当第一个节点具有100%的CPU使用率时,其余任务只是等待而不是在第二个节点上运行,并且它们在Marathon中具有等待状态。第二个节点始终具有0%CPU和0%RAM分配而没有任何更改。所有任务仅在第一个节点上运行。 第二个节点处于健康状态且显示正确,我可以从主服务器ssh到它没有任何问题。重启不起作用。

我该如何解决这个问题?我不知道在哪里搜索。

@edit:

kafka json:

{
  "id": "/kafka",
  "cmd": "export LD_LIBRARY_PATH=$MESOS_SANDBOX/libmesos-bundle/lib:$LD_LIBRARY_PATH && export MESOS_NATIVE_JAVA_LIBRARY=$(ls $MESOS_SANDBOX/libmesos-bundle/lib/libmesos-*.so) && export PATH=$(ls -d $MESOS_SANDBOX/jre*/bin):$PATH && ./scheduler/bin/kafka-scheduler server ./scheduler/conf/scheduler.yml",
  "cpus": 1,
  "mem": 3072,
  "disk": 0,
  "instances": 1,
  "acceptedResourceRoles": [
    "*"
  ],
  "env": {
    "KAFKA_OVERRIDE_LEADER_IMBALANCE_CHECK_INTERVAL_SECONDS": "300",
    "KAFKA_OVERRIDE_OFFSET_METADATA_MAX_BYTES": "4096",
    "KAFKA_OVERRIDE_PRODUCER_PURGATORY_PURGE_INTERVAL_REQUESTS": "1000",
    "KAFKA_OVERRIDE_ZOOKEEPER_SESSION_TIMEOUT_MS": "6000",
    "BROKER_STATSD_PORT": "0",
    "KAFKA_OVERRIDE_LEADER_IMBALANCE_PER_BROKER_PERCENTAGE": "10",
    "KAFKA_OVERRIDE_CONTROLLED_SHUTDOWN_MAX_RETRIES": "3",
    "KAFKA_OVERRIDE_LOG_CLEANER_DEDUPE_BUFFER_SIZE": "134217728",
    "LD_LIBRARY_PATH": "/opt/mesosphere/lib",
    "KAFKA_OVERRIDE_CONTROLLER_SOCKET_TIMEOUT_MS": "30000",
    "BROKER_JMX_REMOTE_AUTH": "false",
    "KAFKA_OVERRIDE_OFFSETS_LOAD_BUFFER_SIZE": "5242880",
    "BROKER_MEM": "2304",
    "KAFKA_OVERRIDE_QUEUED_MAX_REQUESTS": "500",
    "KAFKA_OVERRIDE_LOG_CLEANER_IO_BUFFER_LOAD_FACTOR": "0.9",
    "KAFKA_OVERRIDE_OFFSETS_TOPIC_REPLICATION_FACTOR": "3",
    "BROKER_COUNT": "1",
    "KAFKA_OVERRIDE_QUOTA_CONSUMER_DEFAULT": "9223372036854775807",
    "KAFKA_OVERRIDE_INTER_BROKER_PROTOCOL_VERSION": "0.10.0.0",
    "BROKER_HEALTH_CHECK_MAX_FAILURES": "3",
    "KAFKA_OVERRIDE_LOG_CLEANER_DELETE_RETENTION_MS": "86400000",
    "KAFKA_OVERRIDE_OFFSETS_TOPIC_SEGMENT_BYTES": "104857600",
    "BROKER_CPUS": "1",
    "KAFKA_OVERRIDE_LOG_FLUSH_OFFSET_CHECKPOINT_INTERVAL_MS": "60000",
    "KAFKA_OVERRIDE_OFFSETS_COMMIT_TIMEOUT_MS": "5000",
    "JAVA_HOME": "./jre1.8.0_121",
    "KAFKA_OVERRIDE_AUTO_CREATE_TOPICS_ENABLE": "true",
    "KAFKA_OVERRIDE_LOG_CLEANER_THREADS": "1",
    "JAVA_URI": "https://downloads.mesosphere.com/java/jre-8u121-linux-x64.tar.gz",
    "KAFKA_OVERRIDE_DELETE_TOPIC_ENABLE": "false",
    "KAFKA_OVERRIDE_MIN_INSYNC_REPLICAS": "1",
    "PHASE_STRATEGY": "INSTALL",
    "ENABLE_BROKER_HEALTH_CHECK": "true",
    "BROKER_JMX_REMOTE_SSL": "false",
    "KAFKA_OVERRIDE_QUOTA_PRODUCER_DEFAULT": "9223372036854775807",
    "KAFKA_OVERRIDE_GROUP_MIN_SESSION_TIMEOUT_MS": "6000",
    "KAFKA_OVERRIDE_SOCKET_RECEIVE_BUFFER_BYTES": "102400",
    "KAFKA_OVERRIDE_COMPRESSION_TYPE": "producer",
    "BROKER_HEALTH_CHECK_DELAY_SEC": "15",
    "BROKER_JMX_REMOTE_SSL_NEED_CLIENT_AUTH": "false",
    "KAFKA_ADVERTISE_HOST_IP": "true",
    "KAFKA_OVERRIDE_REPLICA_SOCKET_TIMEOUT_MS": "30000",
    "KAFKA_OVERRIDE_OFFSETS_RETENTION_CHECK_INTERVAL_MS": "600000",
    "KAFKA_OVERRIDE_SOCKET_REQUEST_MAX_BYTES": "104857600",
    "BROKER_HEALTH_CHECK_GRACE_SEC": "10",
    "BROKER_PORT": "9100",
    "KAFKA_OVERRIDE_OFFSETS_TOPIC_COMPRESSION_CODEC": "0",
    "BROKER_JMX_ENABLE": "false",
    "KAFKA_OVERRIDE_LOG_CLEANER_ENABLE": "true",
    "KAFKA_OVERRIDE_DEFAULT_REPLICATION_FACTOR": "1",
    "KAFKA_OVERRIDE_LOG_ROLL_JITTER_HOURS": "0",
    "KAFKA_OVERRIDE_LOG_SEGMENT_DELETE_DELAY_MS": "60000",
    "KAFKA_OVERRIDE_SOCKET_SEND_BUFFER_BYTES": "102400",
    "KAFKA_OVERRIDE_BACKGROUND_THREADS": "10",
    "KAFKA_OVERRIDE_LOG_CLEANER_IO_BUFFER_SIZE": "524288",
    "KAFKA_OVERRIDE_NUM_REPLICA_FETCHERS": "1",
    "BROKER_JMX_REMOTE_PORT": "9999",
    "KAFKA_OVERRIDE_METRICS_NUM_SAMPLES": "2",
    "OVERRIDER_URI": "https://downloads.mesosphere.com/kafka/assets/1.1.19-0.10.1.0/overrider.zip",
    "BROKER_JMX_REMOTE_ENABLE": "false",
    "KAFKA_OVERRIDE_AUTO_LEADER_REBALANCE_ENABLE": "true",
    "KAFKA_OVERRIDE_UNCLEAN_LEADER_ELECTION_ENABLE": "true",
    "KAFKA_OVERRIDE_NUM_RECOVERY_THREADS_PER_DATA_DIR": "1",
    "BROKER_HEAP_MB": "2048",
    "KAFKA_OVERRIDE_OFFSETS_TOPIC_NUM_PARTITIONS": "50",
    "USER": "root",
    "PLACEMENT_STRATEGY": "NODE",
    "KAFKA_OVERRIDE_OFFSETS_RETENTION_MINUTES": "1440",
    "KAFKA_OVERRIDE_LOG_FLUSH_SCHEDULER_INTERVAL_MS": "9223372036854775807",
    "KAFKA_OVERRIDE_LOG_CLEANER_IO_MAX_BYTES_PER_SECOND": "1.7976931348623157E308",
    "KAFKA_OVERRIDE_OFFSETS_COMMIT_REQUIRED_ACKS": "-1",
    "BROKER_JMX_REMOTE_REGISTRY_SSL": "false",
    "KAFKA_OVERRIDE_QUOTA_WINDOW_SIZE_SECONDS": "1",
    "FRAMEWORK_PRINCIPAL": "kafka-principal",
    "KAFKA_OVERRIDE_REPLICA_HIGH_WATERMARK_CHECKPOINT_INTERVAL_MS": "5000",
    "KAFKA_OVERRIDE_LOG_RETENTION_HOURS": "168",
    "RECOVERY_GRACE_PERIOD_SEC": "1200",
    "BROKER_HEALTH_CHECK_INTERVAL_SEC": "10",
    "KAFKA_OVERRIDE_CONNECTIONS_MAX_IDLE_MS": "600000",
    "KAFKA_OVERRIDE_LOG_INDEX_INTERVAL_BYTES": "4096",
    "KAFKA_OVERRIDE_RESERVED_BROKER_MAX_ID": "1000",
    "KAFKA_OVERRIDE_LOG_CLEANER_BACKOFF_MS": "15000",
    "REPLACE_DELAY_SEC": "600",
    "KAFKA_OVERRIDE_MESSAGE_MAX_BYTES": "1000012",
    "KAFKA_OVERRIDE_MAX_CONNECTIONS_PER_IP": "2147483647",
    "KAFKA_OVERRIDE_LOG_CLEANER_MIN_CLEANABLE_RATIO": "0.5",
    "KAFKA_OVERRIDE_LOG_ROLL_HOURS": "168",
    "DISK_TYPE": "ROOT",
    "KAFKA_OVERRIDE_LOG_INDEX_SIZE_MAX_BYTES": "10485760",
    "KAFKA_OVERRIDE_QUOTA_WINDOW_NUM": "11",
    "KAFKA_OVERRIDE_REPLICA_FETCH_MIN_BYTES": "1",
    "KAFKA_OVERRIDE_REQUEST_TIMEOUT_MS": "30000",
    "KAFKA_OVERRIDE_LOG_FLUSH_INTERVAL_MESSAGES": "9223372036854775807",
    "KAFKA_OVERRIDE_LOG_RETENTION_CHECK_INTERVAL_MS": "300000",
    "EXECUTOR_URI": "https://downloads.mesosphere.com/kafka/assets/1.1.19-0.10.1.0/executor.zip",
    "KAFKA_OVERRIDE_ZOOKEEPER_SYNC_TIME_MS": "2000",
    "KAFKA_OVERRIDE_REPLICA_LAG_TIME_MAX_MS": "10000",
    "KAFKA_OVERRIDE_NUM_NETWORK_THREADS": "3",
    "KAFKA_OVERRIDE_LOG_MESSAGE_FORMAT_VERSION": "0.10.0",
    "FRAMEWORK_NAME": "kafka",
    "KAFKA_URI": "https://downloads.mesosphere.com/kafka/assets/kafka_2.11-0.10.1.0.tgz",
    "KAFKA_OVERRIDE_CONTROLLED_SHUTDOWN_RETRY_BACKOFF_MS": "5000",
    "KAFKA_OVERRIDE_NUM_PARTITIONS": "1",
    "BROKER_HEALTH_CHECK_TIMEOUT_SEC": "20",
    "ENABLE_REPLACEMENT": "false",
    "KAFKA_OVERRIDE_GROUP_MAX_SESSION_TIMEOUT_MS": "300000",
    "KAFKA_OVERRIDE_REPLICA_FETCH_MAX_BYTES": "1048576",
    "KAFKA_OVERRIDE_CONTROLLED_SHUTDOWN_ENABLE": "true",
    "KAFKA_OVERRIDE_METRICS_SAMPLE_WINDOW_MS": "30000",
    "KAFKA_OVERRIDE_NUM_IO_THREADS": "8",
    "KAFKA_OVERRIDE_LOG_SEGMENT_BYTES": "1073741824",
    "KAFKA_OVERRIDE_LOG_PREALLOCATE": "false",
    "KAFKA_OVERRIDE_LOG_RETENTION_BYTES": "-1",
    "KAFKA_OVERRIDE_REPLICA_FETCH_WAIT_MAX_MS": "500",
    "KAFKA_OVERRIDE_FETCH_PURGATORY_PURGE_INTERVAL_REQUESTS": "1000",
    "KAFKA_OVERRIDE_LOG_CLEANUP_POLICY": "delete",
    "BROKER_DISK": "5000",
    "KAFKA_ZOOKEEPER_URI": "master.mesos:2181",
    "KAFKA_OVERRIDE_REPLICA_FETCH_BACKOFF_MS": "1000",
    "KAFKA_OVERRIDE_REPLICA_SOCKET_RECEIVE_BUFFER_BYTES": "65536",
    "KAFKA_VER_NAME": "kafka_2.11-0.10.1.0"
  },
  "healthChecks": [
    {
      "gracePeriodSeconds": 120,
      "intervalSeconds": 30,
      "timeoutSeconds": 5,
      "maxConsecutiveFailures": 0,
      "portIndex": 0,
      "path": "/admin/healthcheck",
      "protocol": "HTTP",
      "ignoreHttp1xx": false
    }
  ],
  "labels": {
    "DCOS_PACKAGE_RELEASE": "39",
    "DCOS_SERVICE_SCHEME": "http",
    "DCOS_PACKAGE_SOURCE": "https://universe.mesosphere.com/repo",
    "DCOS_PACKAGE_COMMAND": "eyJwaXAiOlsiaHR0cHM6Ly9kb3dubG9hZHMubWVzb3NwaGVyZS5jb20va2Fma2EvYXNzZXRzLzEuMS4xOS0wLjEwLjEuMC9iaW5fd3JhcHBlci0wLjAuMS1weTIucHkzLW5vbmUtYW55LndobCJdfQ==",
    "DCOS_PACKAGE_METADATA": "eyJwYWNrYWdpbmdWZXJzaW9uIjoiMy4wIiwibmFtZSI6ImthZmthIiwidmVyc2lvbiI6IjEuMS4xOS4xLTAuMTAuMS4wIiwibWFpbnRhaW5lciI6InN1cHBvcnRAbWVzb3NwaGVyZS5pbyIsImRlc2NyaXB0aW9uIjoiQXBhY2hlIEthZmthIHJ1bm5pbmcgb24gREMvT1MiLCJ0YWdzIjpbIm1lc3NhZ2UiLCJicm9rZXIiLCJwdWJzdWIiXSwic2VsZWN0ZWQiOmZhbHNlLCJmcmFtZXdvcmsiOnRydWUsInBvc3RJbnN0YWxsTm90ZXMiOiJEQy9PUyBLYWZrYSBTZXJ2aWNlIGlzIGJlaW5nIGluc3RhbGxlZC5cblxuXHREb2N1bWVudGF0aW9uOiBodHRwczovL2RvY3MubWVzb3NwaGVyZS5jb20vY3VycmVudC91c2FnZS9zZXJ2aWNlLWd1aWRlcy9rYWZrYS9cblx0SXNzdWVzOiBodHRwczovL2Rjb3NqaXJhLmF0bGFzc2lhbi5uZXQvcHJvamVjdHMvS0FGS0EvaXNzdWVzIiwicG9zdFVuaW5zdGFsbE5vdGVzIjoiREMvT1MgS2Fma2EgU2VydmljZSBoYXMgYmVlbiB1bmluc3RhbGxlZC5cblBsZWFzZSBmb2xsb3cgdGhlIGluc3RydWN0aW9ucyBhdCBodHRwczovL2RvY3MubWVzb3NwaGVyZS5jb20vY3VycmVudC91c2FnZS9zZXJ2aWNlLWd1aWRlcy9rYWZrYS91bmluc3RhbGwgdG8gcmVtb3ZlIGFueSBwZXJzaXN0ZW50IHN0YXRlIGlmIHJlcXVpcmVkLiIsImltYWdlcyI6eyJpY29uLXNtYWxsIjoiaHR0cHM6Ly9kb3dubG9hZHMubWVzb3NwaGVyZS5jb20vdW5pdmVyc2UvYXNzZXRzL2ljb24tc2VydmljZS1rYWZrYS1zbWFsbC5wbmciLCJpY29uLW1lZGl1bSI6Imh0dHBzOi8vZG93bmxvYWRzLm1lc29zcGhlcmUuY29tL3VuaXZlcnNlL2Fzc2V0cy9pY29uLXNlcnZpY2Uta2Fma2EtbWVkaXVtLnBuZyIsImljb24tbGFyZ2UiOiJodHRwczovL2Rvd25sb2Fkcy5tZXNvc3BoZXJlLmNvbS91bml2ZXJzZS9hc3NldHMvaWNvbi1zZXJ2aWNlLWthZmthLWxhcmdlLnBuZyJ9fQ==",
    "DCOS_PACKAGE_REGISTRY_VERSION": "3.0",
    "DCOS_SERVICE_NAME": "kafka",
    "DCOS_PACKAGE_FRAMEWORK_NAME": "kafka",
    "DCOS_SERVICE_PORT_INDEX": "1",
    "DCOS_PACKAGE_VERSION": "1.1.19.1-0.10.1.0",
    "DCOS_MIGRATION_API_PATH": "/v1/plan",
    "DCOS_PACKAGE_NAME": "kafka",
    "MARATHON_SINGLE_INSTANCE_APP": "true",
    "DCOS_PACKAGE_IS_FRAMEWORK": "true",
    "DCOS_MIGRATION_API_VERSION": "v1"
  },
  "portDefinitions": [
    {
      "port": 10001,
      "protocol": "tcp",
      "name": "health",
      "labels": {}
    },
    {
      "port": 10002,
      "protocol": "tcp",
      "name": "api",
      "labels": {}
    }
  ],
  "uris": [
    "https://downloads.mesosphere.com/java/jre-8u121-linux-x64.tar.gz",
    "https://downloads.mesosphere.com/kafka/assets/1.1.19-0.10.1.0/scheduler.zip",
    "https://downloads.mesosphere.com/kafka/assets/kafka_2.11-0.10.1.0.tgz",
    "https://downloads.mesosphere.com/libmesos-bundle/libmesos-bundle-1.9-argus-1.1.x-3.tar.gz"
  ],
  "fetch": [
    {
      "uri": "https://downloads.mesosphere.com/java/jre-8u121-linux-x64.tar.gz",
      "extract": true,
      "executable": false,
      "cache": false
    },
    {
      "uri": "https://downloads.mesosphere.com/kafka/assets/1.1.19-0.10.1.0/scheduler.zip",
      "extract": true,
      "executable": false,
      "cache": false
    },
    {
      "uri": "https://downloads.mesosphere.com/kafka/assets/kafka_2.11-0.10.1.0.tgz",
      "extract": true,
      "executable": false,
      "cache": false
    },
    {
      "uri": "https://downloads.mesosphere.com/libmesos-bundle/libmesos-bundle-1.9-argus-1.1.x-3.tar.gz",
      "extract": true,
      "executable": false,
      "cache": false
    }
  ],
  "readinessChecks": [
    {
      "name": "kafkaUpdateProgress",
      "protocol": "HTTP",
      "path": "/v1/plan",
      "portName": "api",
      "intervalSeconds": 30,
      "timeoutSeconds": 10,
      "httpStatusCodesForReady": [
        200
      ],
      "preserveLastResponse": true
    }
  ],
  "upgradeStrategy": {
    "minimumHealthCapacity": 0,
    "maximumOverCapacity": 0
  }
}

也无法在第二个节点上运行的示例应用程序:

{
  "id": "/sleepscript",
  "cmd": " while :; do echo 'Hit CTRL+C'; sleep 1; done",
  "cpus": 1,
  "mem": 128,
  "disk": 0,
  "instances": 0,
  "acceptedResourceRoles": [
    "*"
  ],
  "portDefinitions": [
    {
      "port": 10000,
      "protocol": "tcp",
      "labels": {}
    }
  ]
}

@ EDIT2:

这些是来自日志的一些有趣的行,我正在尝试运行同一任务的几个实例,因此第一个节点以100%(* .11节点)使用,第二个节点为空(* .12节点):< / p>

Sep 04 09:07:01 master-1 java[2406]: [2017-09-04 09:07:01,682] INFO  Finished processing e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1269 from 10.132.0.11. Matched 0 ops after 1 passes. ports(*) 1025->2180,2182->3887,3889->5049,5052->807
9,8082->8180,8182->9380,9383->29823,29825->32000; disk(*) 50100.0; disk(*) 6270.0; mem(*) 8639.0 left. (mesosphere.marathon.core.matcher.manager.impl.OfferMatcherManagerActor:marathon-akka.actor.default-dispatcher-81)
Sep 04 09:07:01 master-1 java[2406]: [2017-09-04 09:07:01,682] INFO  Finished processing e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1268 from 10.132.0.12. Matched 0 ops after 1 passes. ports(*) 1026->2180,2182->3887,3889->5049,5052->807
9,8082->8180,8182->9099,9101->32000; disk(*) 50100.0; disk(*) 1263.0; cpus(*) 0.5; mem(*) 9279.0 left. (mesosphere.marathon.core.matcher.manager.impl.OfferMatcherManagerActor:marathon-akka.actor.default-dispatcher-81)
Sep 04 09:07:01 master-1 mesos-master[1963]: I0904 09:07:01.683557  1994 master.cpp:4732] Processing DECLINE call for offers: [ e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1269 ] for framework a5370699-baae-4e77-a48e-1283d36b1906-0000 (m
arathon) at scheduler-88e19c66-42c6-48be-be03-662a9312cd91@10.132.0.10:15101
Sep 04 09:07:01 master-1 mesos-master[1963]: I0904 09:07:01.683799  1994 master.cpp:4732] Processing DECLINE call for offers: [ e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1268 ] for framework a5370699-baae-4e77-a48e-1283d36b1906-0000 (m
arathon) at scheduler-88e19c66-42c6-48be-be03-662a9312cd91@10.132.0.10:15101

在第二种模式中,这条线与第一个节点不同:cpus(*)0.5; 我猜第一个节点现在显示CPU,因为所有都被使用,但是第二个节点显示0.5 CPU,它是否意味着半CPU?

这些是首次提供,其中0.11节点接受一个要约并运行一个实例:

Sep 04 09:06:56 master-1 mesos-master[1963]: I0904 09:06:56.671070  1998 master.cpp:7029] Sending 2 offers to framework a5370699-baae-4e77-a48e-1283d36b1906-0000 (marathon) at scheduler-88e19c66-42c6-48be-be03-662a9312cd91@10.132.0
.10:15101
Sep 04 09:06:56 master-1 java[2406]: [2017-09-04 09:06:56,675] INFO  Offer [e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1266]. Considering resources with roles {*} without resident reservation labels. Not all basic resources satisfied: c
pus NOT SATISFIED (1.0 > 0.5), mem SATISFIED (128.0 <= 128.0) (mesosphere.mesos.ResourceMatcher$:marathon-akka.actor.default-dispatcher-75)
Sep 04 09:06:56 master-1 java[2406]: [2017-09-04 09:06:56,675] INFO  Offer [e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1265]. Considering resources with roles {*} without resident reservation labels. Not all basic resources satisfied: c
pus NOT SATISFIED (1.0 > 0.0), mem SATISFIED (128.0 <= 128.0) (mesosphere.mesos.ResourceMatcher$:marathon-akka.actor.default-dispatcher-75)
Sep 04 09:06:56 master-1 java[2406]: [2017-09-04 09:06:56,676] INFO  Finished processing e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1266 from 10.132.0.12. Matched 0 ops after 1 passes. ports(*) 1026->2180,2182->3887,3889->5049,5052->807
9,8082->8180,8182->9099,9101->32000; disk(*) 50100.0; disk(*) 1263.0; cpus(*) 0.5; mem(*) 9279.0 left. (mesosphere.marathon.core.matcher.manager.impl.OfferMatcherManagerActor:marathon-akka.actor.default-dispatcher-75)
Sep 04 09:06:56 master-1 java[2406]: [2017-09-04 09:06:56,676] INFO  Finished processing e74ab83d-b856-4c89-a5fb-0eccb4e74268-O1265 from 10.132.0.11. Matched 1 ops after 2 passes. ports(*) 1025->2180,2182->3887,3889->5049,5052->807
9,8082->8180,8182->9380,9383->29823,29825->32000; disk(*) 50100.0; disk(*) 6270.0; mem(*) 8639.0 left. (mesosphere.marathon.core.matcher.manager.impl.OfferMatcherManagerActor:marathon-akka.actor.default-dispatcher-75)

仍然,它显示类似* .11节点有0个CPU,但最后一个日志显示它已通过,后来有一些ID为正在运行任务的日志。

@ EDIT3:

Bingo,在使用0.1 CPU最终创建任务之后* .12节点使用它拥有的所有0.5 CPU。该节点也不能同时运行两个以上的0.2个CPU任务,所以看起来这个0.5 CPU限制了它。似乎某些东西限制它为0.5 CPU但我不知道是什么。

@ edit4:

DCOS显示该节点的所有2个CPU,mesos UI还显示2个CPU总数,/ proc / cpuinfo也显示两个核心。

@ edit5:

我通过删除/ var / lib / dcos / mesos-resources和/ var / lib / mesos / slave / meta目录的整个内容来“解决”问题。但是我不知道是什么原因引起了这个问题。

@anotherEdit

mesos / master / slaves在工作时的内容:

{"slaves":[{"id":"6c620a26-7bc6-4287-b67a-de2b0eb8778c-S3","hostname":"10.132.0.12","port":5051,"attributes":{},"pid":"slave(1)@10.132.0.12:5051","registered_time":1504683660.37257,"reregistered_time":1504683660.37296,"resources":{"disk":52824.0,"mem":11839.0,"gpus":0.0,"cpus":2.0,"ports":"[1025-2180, 2182-3887, 3889-5049, 5052-8079, 8082-8180, 8182-32000]"},"used_resources":{"disk":50100.0,"mem":7168.0,"gpus":0.0,"cpus":1.3,"ports":"[7000-7001, 7199-7199, 9000-9001, 9042-9042, 9160-9160]"},"offered_resources":{"disk":0.0,"mem":0.0,"gpus":0.0,"cpus":0.0},"reserved_resources":{"cassandra-role":{"disk":50100.0,"mem":5376.0,"gpus":0.0,"cpus":1.5,"ports":"[7000-7001, 7199-7199, 9001-9001, 9042-9042, 9160-9160]"}},"unreserved_resources":{"disk":2724.0,"mem":6463.0,"gpus":0.0,"cpus":0.5,"ports":"[1025-2180, 2182-3887, 3889-5049, 5052-6999, 7002-7198, 7200-8079, 8082-8180, 8182-9000, 9002-9041, 9043-9159, 9161-32000]"},"active":true,"version":"1.2.2","reserved_resources_full":{"cassandra-role":[{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"5827371e-2139-4278-99b5-85e7dd573f4c"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":4096.0},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"c5c881b0-45df-458b-9731-11cfb1b251c3"}]}}},{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"1fed2809-f03e-4600-bfa7-b83a382dafbb"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":1024.0},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"3fa104ed-8fec-47ce-9702-6cde32bc97de"}]}}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":9001,"end":9001}]},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"072d9c13-4205-4ab0-bff2-59e2604774c1"}]}}},{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"c06733c6-fe7e-43a1-a3b7-70371d7cae08"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":256.0},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"18b050cf-f45d-4e2d-8e32-b7d7028d4939"}]}}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":7000,"end":7001},{"begin":7199,"end":7199},{"begin":9042,"end":9042},{"begin":9160,"end":9160}]},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"f49161b4-071f-45e6-9316-62b81c009f8a"}]}}},{"name":"disk","type":"SCALAR","scalar":{"value":50100.0},"role":"cassandra-role","reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"359a466c-1191-441e-8058-d77c84fbdf0f"}]}},"disk":{"persistence":{"id":"0df2d25d-8c64-42ac-ab65-1de63392b5e8","principal":"cassandra-principal"},"volume":{"mode":"RW","container_path":"volume"},"source":{"type":"MOUNT","mount":{"root":"\/dcos\/volume0"}}}}]},"used_resources_full":[{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"1fed2809-f03e-4600-bfa7-b83a382dafbb"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":1024.0},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"3fa104ed-8fec-47ce-9702-6cde32bc97de"}]}}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":9001,"end":9001}]},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"072d9c13-4205-4ab0-bff2-59e2604774c1"}]}}},{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"c06733c6-fe7e-43a1-a3b7-70371d7cae08"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":4096.0},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"c5c881b0-45df-458b-9731-11cfb1b251c3"}]}}},{"name":"disk","type":"SCALAR","scalar":{"value":50100.0},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"359a466c-1191-441e-8058-d77c84fbdf0f"}]}},"disk":{"persistence":{"id":"0df2d25d-8c64-42ac-ab65-1de63392b5e8","principal":"cassandra-principal"},"volume":{"mode":"RW","container_path":"volume"},"source":{"type":"MOUNT","mount":{"root":"\/dcos\/volume0"}}}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":7000,"end":7001},{"begin":7199,"end":7199},{"begin":9042,"end":9042},{"begin":9160,"end":9160}]},"role":"cassandra-role","allocation_info":{"role":"cassandra-role"},"reservation":{"principal":"cassandra-principal","labels":{"labels":[{"key":"resource_id","value":"f49161b4-071f-45e6-9316-62b81c009f8a"}]}}},{"name":"cpus","type":"SCALAR","scalar":{"value":0.3},"role":"*","allocation_info":{"role":"slave_public"}},{"name":"mem","type":"SCALAR","scalar":{"value":2048.0},"role":"*","allocation_info":{"role":"slave_public"}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":9000,"end":9000}]},"role":"*","allocation_info":{"role":"slave_public"}}],"offered_resources_full":[]},{"id":"6c620a26-7bc6-4287-b67a-de2b0eb8778c-S2","hostname":"10.132.0.11","port":5051,"attributes":{},"pid":"slave(1)@10.132.0.11:5051","registered_time":1504683660.32389,"reregistered_time":1504683660.32425,"resources":{"disk":56370.0,"mem":11839.0,"gpus":0.0,"cpus":2.0,"ports":"[1025-2180, 2182-3887, 3889-5049, 5052-8079, 8082-8180, 8182-32000]"},"used_resources":{"disk":512.0,"mem":2048.0,"gpus":0.0,"cpus":0.6,"ports":"[23803-23805]"},"offered_resources":{"disk":0.0,"mem":0.0,"gpus":0.0,"cpus":0.0},"reserved_resources":{"kafka-role":{"disk":5000.0,"mem":2560.0,"gpus":0.0,"cpus":1.0,"ports":"[1025-1025, 9100-9100]"}},"unreserved_resources":{"disk":51370.0,"mem":9279.0,"gpus":0.0,"cpus":1.0,"ports":"[1026-2180, 2182-3887, 3889-5049, 5052-8079, 8082-8180, 8182-9099, 9101-32000]"},"active":true,"version":"1.2.2","reserved_resources_full":{"kafka-role":[{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"resource_id","value":"b722808d-79a2-4871-8c97-e95a2c847fbd"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":256.0},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"resource_id","value":"6eb2865d-f823-473f-8c69-49a72eb7dbf6"}]}}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":1025,"end":1025}]},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"dynamic_port","value":"API_PORT"},{"key":"resource_id","value":"1deb5417-7852-49fb-ac81-fceb073369a8"}]}}},{"name":"cpus","type":"SCALAR","scalar":{"value":0.5},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"resource_id","value":"fe21a9d8-0ba0-4d82-9bae-ca252d72bf93"}]}}},{"name":"mem","type":"SCALAR","scalar":{"value":2304.0},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"resource_id","value":"1beca7e2-bf10-43be-be28-0f93b03eac37"}]}}},{"name":"disk","type":"SCALAR","scalar":{"value":5000.0},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"resource_id","value":"d7e9cb00-2204-481a-a999-cec4fee5d248"}]}},"disk":{"persistence":{"id":"661b8d81-8d33-432d-bd47-5538718730f9","principal":"kafka-principal"},"volume":{"mode":"RW","container_path":"kafka-volume-11c8a735-646f-4c46-9af5-ac2cbc52b697"}}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":9100,"end":9100}]},"role":"kafka-role","reservation":{"principal":"kafka-principal","labels":{"labels":[{"key":"resource_id","value":"99c8303c-1892-4fb8-bd0f-8618ad877654"}]}}}]},"used_resources_full":[{"name":"cpus","type":"SCALAR","scalar":{"value":0.6},"role":"*","allocation_info":{"role":"slave_public"}},{"name":"mem","type":"SCALAR","scalar":{"value":2048.0},"role":"*","allocation_info":{"role":"slave_public"}},{"name":"disk","type":"SCALAR","scalar":{"value":512.0},"role":"*","allocation_info":{"role":"slave_public"}},{"name":"ports","type":"RANGES","ranges":{"range":[{"begin":23803,"end":23805}]},"role":"*","allocation_info":{"role":"slave_public"}}],"offered_resources_full":[]}],"recovered_slaves":[]}

0 个答案:

没有答案