Question

我想知道如何维护在MapReduce作业内的特定数据节点上执行方法的次数。

不幸的是，到目前为止，我已经开发了代码，但是却产生了非常不利的结果。使用“ makeRequest”方法传输的计数变量的行为非常不合理（并产生重复模式）。结果可以在这里查看： results

对于任何给定的MapReduce作业，“计数”应仅增加。我觉得需要特别注意的是，我使用的主要输入文件只是数字：0-750,000（每行一个数字）。期望的最终结果应该是将750,000个计数报告给服务器。

仅提供一些背景知识：我目前正在开发一个Prime-Numbers MapReduce程序，该程序旨在定期（基于计时器）根据处理到服务器的数字“数量”中继信息。该服务器托管在主节点上，旨在实时显示作业结果。

如果我在理解Hadoop框架时犯了任何公然的错误，请原谅，不幸的是，我对它仍然很陌生，仍然在学习。

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.net.InetAddress;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.UnknownHostException;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Timer;
import java.util.TimerTask; 

/** Hadoop MapReduce program to compute the prime numbers based on a given range provided within the input file. */

public final class Primes {

public static int counter = 0; 
public static boolean created = false;

/**
 * Defines Job Configuration
 */
public final static void main(final String[] args) throws Exception {
    final Configuration conf = new Configuration();
    conf.set("fs.defaultFS", "hdfs://master:9000");
    conf.set("mapreduce.jobtracker.address", "master:5431");
    conf.set("mapreduce.framework.name", "yarn");
    conf.set("yarn.resourcemanager.address", "master:8050");
    final Job job = new Job(conf, "Primes");
    job.setJarByClass(Primes.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(PrimesMap.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
}

/**
 * Creates a timer which processes a GET request to the
 * hosted server, repeating every five seconds.
 */
public final static void createRequestTimer(){
    Timer timer = new Timer(); 
    TimerTask request = new TimerTask() {
        @Override
        public void run(){ makeRequest(counter);}
    };

    int delay = 3000; 
    int period = 5000;
    timer.scheduleAtFixedRate(request, delay, period); 
}

public static final class PrimesMap extends Mapper<LongWritable, Text, NullWritable, IntWritable> {
    final NullWritable nw = NullWritable.get(); 
    public final void map(final LongWritable key, final Text value, final Context context)
            throws IOException, InterruptedException {
        final int number = Integer.parseInt(value.toString());

        /**
         * Creates a timer the first time this method is executed. Ensures that only one
         * timer will be maintained.
         */
        if(created == false){
            createRequestTimer(); 
            created = true; 
        }

        /**
         * Checks to see if the number is in fact prime
         */
        if(isPrime(number)) {
            context.write(nw, new IntWritable(number));
        }

    }
}

/**
 * Basic primality test
 */
private static final boolean isPrime(final int number) {
    //Function should be performed on all numbers, and therefore
    //Can be incremented within this function (as the first step)
    counter++;
    if(number <= 1){
        return false; 
    }

    if(number == 2) {
        return true; 
    }

    if(number % 2 == 0){
        return false; 
    }

    for(int i = 3; i <= Math.sqrt(number) + 1; i = i + 2){
        if(number % i == 0){
            return false; 
        }
    }
    return true;
}

/**
 * Based on the counter parameter, a get request will be made to the
 * server. This method is effectively used to relay the number of numbers
 * that have been processed by this particular node to the server (which 
 * then goes on to display results in real time). 
 */
private static void makeRequest(int counter){
    String url = "http://192.168.1.2:5000/add/1/" + counter ;

    try {
        String IP = InetAddress.getLocalHost().toString();
        if(IP.contains("192.168.1.3")){
            url = "http://192.168.1.2:5000/add/1/" + counter; 
        }
        else if(IP.contains("192.168.1.4")){
            url = "http://192.168.1.2:5000/add/2/" + counter;
        }
        else if(IP.contains("192.168.1.5")){
            url = "http://192.168.1.2:5000/add/3/" + counter;
        }
        else if(IP.contains("192.168.1.6")){
            url = "http://192.168.1.2:5000/add/4/" + counter;
        }
        else if(IP.contains("192.168.1.7")){
            url = "http://192.168.1.2:5000/add/5/" + counter;
        }
        else if(IP.contains("192.168.1.8")){
            url = "http://192.168.1.2:5000/add/6/" + counter;
        }
        else if(IP.contains("192.168.1.9")){
            url = "http://192.168.1.2:5000/add/7/" + counter;
        }
        else if(IP.contains("192.168.1.10")){
            url = "http://192.168.1.2:5000/add/8/" + counter;
        }

        URL myurl = new URL(url);         
        HttpURLConnection con = (HttpURLConnection) myurl.openConnection();            
        con.setRequestMethod("GET");
        con.getInputStream(); 
        con.disconnect();
    } catch (Exception e){
        e.printStackTrace();
    }
}

}

当前结果表明，所表示的“ count”变量的值增加和减少（这是不正确的）。

作业的期望结果是，“ GET / add / 1 /”后面的值永远不会减少，而只会在MapReduce作业过程中增加（与“ isPrime（）”方法一样多）称为）。如果可以的话，我将不胜感激！：）

再次重申一下，我想知道： 我如何计算每个数据节点在Hadoop框架内执行特定方法的次数 < / p>

所提供的代码中的错误仅存在于“计数器”中，以及如何在整个“映射器”和“ isPrime”函数中递增。我不确定为什么在整个程序过程中'counter'变量会减少。

Answer 1

它有3个线程。
如果您希望更新间隔为5秒就可以了。
如果可能进行这些更改，那就是我要研究的内容。
在聚合器方面。接受/ add / 1 ....的服务器，使其从0开始并添加发送的值。
工人方面。每次发送时都要重置值

查看运行初期的日志语句可能会发现发生的事情更好。

Hadoop：如何维护数据节点方法执行的计数器？

1 个答案: