System Cluster中的System.env和值未在Spark Cluster的Slave Nodes上读取

时间:2016-10-11 16:51:09

标签: apache-spark apache-kafka spark-streaming

我确实有一个多节点火花簇,并在主站所在的节点上提交我的火花程序。

当作业提交到从属节点时,HOSTNAME参数给出空值。以下是将属性读取为null的行。

未从从属节点读取System.getenv(HOSTNAME)。

        System.out.println("line 76 System.getenv(HOSTNAME)=" + System.getenv("HOSTNAME"));

AUDIT_USER,AUDIT_PASSWORD在读取时也为空(它们都在属性文件中)。

如果我用一个节点提交作业,我对这些参数没有任何问题。但是,如果你在6个节点的独立模式下提交作业,我就会遇到这个问题。

我在所有节点上为属性文件创建了相同的文件夹。

这是我的代码。你能不能让我知道为什么System.env没有给出null并且我的属性是null?

package com.fb.cpd.myapp;

import java.io.Serializable;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.UUID;
import java.util.concurrent.Future;

import org.apache.commons.configuration.ConfigurationConverter;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.configuration.reloading.FileChangedReloadingStrategy;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.TaskContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;

import kafka.common.TopicAndPartition;
import kafka.message.MessageAndMetadata;
import kafka.serializer.DefaultDecoder;
import kafka.serializer.StringDecoder;

public class GenericLogic implements Serializable {
    /**
     * 
     */
    private static final long serialVersionUID = 1L;
    private static final Logger logger = LogManager.getLogger(GenericLogic.class);
    private PropertiesConfiguration props;
    private Producer<String, String> producer = null;
    private Future<RecordMetadata> receipt = null;
    private RecordMetadata receiptInfo = null;
    private ConnectToRDBMS auditor = null;
    private ConnectToRDBMS df = null;

    private static String myId = null;

    private Map<TopicAndPartition, Long> getOffsets(String topic) throws SQLException {
        String appName = "myapp";
        String TopicName = topic;
        Map<TopicAndPartition, Long> topicMap = new HashMap<>(); //
        System.out.println("line 64 before making connection");

        try {
            props = new PropertiesConfiguration("/app/lock/conf/empty.properties");
        } catch (ConfigurationException e) { // TODO Auto-generated catch block
            System.out.println("Line 70");
            e.printStackTrace();
        }

        try {
            System.out.println("line 76 System.getenv(HOSTNAME)=" + System.getenv("HOSTNAME"));
            auditor = new ConnectToRDBMS(System.getenv("HOSTNAME"), "lockSparkCollector", null, null, null, null, null,
                    0, props.getString("AUDIT_USER"), props.getString("AUDIT_PASSWORD"),
                    props.getString("AUDIT_DB_URL"));
        } catch (SQLException e) {
            logger.error("ASSERT: run() ERROR CONNECTING TO AUDIT DB " + e.getMessage());
        }
        System.out.println("line 64 after making connection");

        Statement stmt = null;

        String query = "select va_application, topic_name, partition_id, from_offset,until_offset from lock_spark_offsets where va_application = "
                + "'" + appName + "'" + " and topic_name= " + "'" + TopicName + "'";
        System.out.println("query" + query);
        System.out.println("before query exection");
        try {
            stmt = auditor.dbConnection.createStatement();
            System.out.println("line 81");

            ResultSet rs = stmt.executeQuery(query);
            System.out.println("line 83");
            while (rs.next()) {
                System.out.println("pass 1 of Resultset");
                System.out.println("getOffsets=" + topic.trim() + " " + rs.getInt("partition_id") + " "
                        + rs.getString("until_offset") + " " + rs.getString("until_offset"));
                Integer partition = rs.getInt("partition_id");

                TopicAndPartition tp = new TopicAndPartition(topic.trim(), partition);
                System.out.println("102");
                Long.parseLong(rs.getString("until_offset"));
                topicMap.put(tp, Long.parseLong(rs.getString("until_offset")));
                System.out.println("105");

            }
            System.out.println("after populating topic map");

        } catch (

        SQLException e) {
            System.out.println("printing exception");
            e.printStackTrace();
        } finally {
            if (stmt != null) {
                System.out.println("closing statement");
                stmt.close();
            }
        }
        return topicMap;
    }

    public void setDefaultProperties() {
        FileChangedReloadingStrategy strategy = new FileChangedReloadingStrategy();
        strategy.setRefreshDelay(10000);
        System.out.println("Line 45");
        // supply the properties file.
        try {
            props = new PropertiesConfiguration("/app/lock/conf/empty.properties");
        } catch (ConfigurationException e) {
            // TODO Auto-generated catch block
            System.out.println("Line 51");
            e.printStackTrace();
        }
        props.setReloadingStrategy(strategy);
        System.out.println("Line 56");

        // Producer configs
        if (!props.containsKey("acks")) {
            props.setProperty("acks", "1");
        }

        if (!props.containsKey("retries")) {
            props.setProperty("retries", "1000");
        }

        if (!props.containsKey("compression.type")) {
            props.setProperty("compression.type", "gzip");
        }

        if (!props.containsKey("request.timeout.ms")) {
            props.setProperty("request.timeout.ms", "600000");
        }

        if (!props.containsKey("batch.size")) {
            props.setProperty("batch.size", "32768");
        }

        if (!props.containsKey("buffer.memory")) {
            props.setProperty("buffer.memory", "134217728");
        }

        if (!props.containsKey("block.on.buffer.full")) {
            props.setProperty("block.on.buffer.full", "true");
        }

        if (!props.containsKey("SHUTDOWN")) {
            props.setProperty("SHUTDOWN", "false");
        }

        if (!props.containsKey("producer.topic")) {
            props.setProperty("producer.topic", "mytopic1");
        }

        Properties producer_props = ConfigurationConverter.getProperties(props);

        producer_props.setProperty("bootstrap.servers", props.getString("target.bootstrap.servers"));
        producer_props.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        producer_props.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); // ????

        this.producer = new KafkaProducer<String, String>(producer_props);
        System.out.println("Line 107");

    }

    public void PublishMessages(String st) {

        try {
            System.out.println("Line 111");
            String key = UUID.randomUUID().toString().replace("-", "");
            System.out.println("Started Producing...");

            receipt = producer.send(new ProducerRecord<String, String>(props.getString("producer.topic"), key, // Key
                    st));
            System.out.println("After Completion of Producing Producing");
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("Exception in PublishMessages ");
        }

    }

    public void DBConnect() {
        try {
            auditor = new ConnectToRDBMS(System.getenv("HOSTNAME"), "myapp", props.getString("consumer.topic"), null,
                    null, null, null, 0, props.getString("AUDIT_USER"), props.getString("AUDIT_PASSWORD"),
                    props.getString("AUDIT_DB_URL"));
        } catch (SQLException e) {
            logger.error("ASSERT: run() ERROR CONNECTING TO AUDIT DB " + e.getMessage());
            return;
        }
    }

    private void writeToDB(Long startTime, Integer partnId, String fromOffset, String untilOffset, Integer count) {
        this.auditor.audit(startTime, partnId, fromOffset, untilOffset, count);

    }

    /**
     * 
     * @param jsc
     * @param topicSet
     * @throws Exception
     */
    public static void main(String[] args) {
        String topicNames = "MySourceTopic";
        GenericLogic ec = new GenericLogic();
        Map<TopicAndPartition, Long> topicMap = null;
        try {

            topicMap = ec.getOffsets("MySourceTopic");

        } catch (SQLException e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        }

        boolean clusterMode = false;

        Integer batchDuration = Integer.parseInt("30000");
        JavaSparkContext sparkConf = new JavaSparkContext("abcd.net:7077", "Kafka-Spark-Integration");

        sparkConf.getConf().set("spark.local.ip", "lock-dt-a4d.xyz.com");
        sparkConf.getConf().set("spark.eventLog.enabled", "false");
        sparkConf.getConf().set("spark.shuffle.blockTransferService", "nio");

        JavaStreamingContext jsc = new JavaStreamingContext(sparkConf, new Duration(10000));
        Map<String, String> kafkaParams = new HashMap<String, String>();
        String pollInterval = "10000";
        String zookeeper = "lock-dt-a5d.xyz.com:2181,lock-dt-a6d.xyz.com:2181";

        kafkaParams.put("metadata.broker.list", "lock-dt-a5d.xyz.com:9092,lock-dt-a6d.xyz.com:9092");
        kafkaParams.put("group.id", "Consumer");
        kafkaParams.put("client.id", "Consumer");
        kafkaParams.put("zookeeper.connect", zookeeper);

        JavaInputDStream<byte[]> directKafkaStream = KafkaUtils.createDirectStream(jsc, String.class, byte[].class,
                StringDecoder.class, DefaultDecoder.class, byte[].class, kafkaParams, topicMap,
                (Function<MessageAndMetadata<String, byte[]>, byte[]>) MessageAndMetadata::message);

        directKafkaStream.foreachRDD(rdd -> {
            if (rdd.isEmpty()) {
                System.out.println("No events polled in last " + pollInterval + " milli seconds");
                return;
            }

            rdd.foreachPartition(itr -> {
                Integer partnId = TaskContext.get().partitionId();
                Long systime = System.nanoTime();
                Map<String, String> hmap = new HashMap<String, String>();

                GenericLogic ec2 = new GenericLogic();
                ec2.setDefaultProperties();
                ec2.DBConnect();

                try {

                    while (itr.hasNext()) {
                        System.out.println("232");
                    }

                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                }

            });
        });
        jsc.start();
        jsc.awaitTermination();
    }

}

2 个答案:

答案 0 :(得分:0)

请告诉我们所有节点的操作系统,如果您确保在主节点上注意导出HOSTNAME。如果您告诉我们您的操作系统详细信息,那么回答您的问题会更好。

可能与您的上下文无关,但仅仅是为了获取信息System.getenv(&#34; HOSTNAME&#34;)可能无法在所有平台(例如Ubuntu或Mac)中提供主机名。

更好的是为什么不导出HOSTNAME。

注意:我假设您已经检查过道具不为空或空? 如果没有调试并检查是否加载了属性文件,如果加载它不是空属性文件,因此它已加载文件中的属性。

查看您的问题(不仅是环境变量,但属性也没有返回,属性文件或其在不同计算机上的相对位置可能有问题。 如果它不是放在不同计算机上的精确副本,请同时检查它是否适用于Linux(不在Windows中编写和编辑并放入Linux)。

答案 1 :(得分:0)

我用start-slaves.sh开始了药膏。  这就是问题所在。我必须通过提供主要地址来启动工人。