hadoop MapReduce仅按值排序

时间:2015-04-04 12:53:05

标签: hadoop mapreduce

有没有办法只按值对MapReduce输出进行排序,而不更改键和值的输出顺序?

原始输出类似于(按键排序):

A 1

B 2

C 1

D 3

我需要像这样的输出(按值排序):

D 3

B 2

A 1

C 1

我试图通过使用Inversemapper添加另一个排序作业来交换键和值,以便输出按值排序,但是它有效,但输出如下:

3 D

2 B

1 A

1 C

无论如何都要反转键和值的输出格式?

还是有其他方法只按值排序吗?

由于

3 个答案:

答案 0 :(得分:3)

M / R总是按键排序。如果要按值排序,则需要创建另一个作业,将要排序的值映射到键中。

答案 1 :(得分:1)

您可以使用自定义值并实现WritableComparable接口来实现compareTo(),或者您可以继承WritableComparator类以覆盖compare()方法。选择是你的。以下是定制键和定制值类
CustKey.java

package in.aniruddha.mapreduce.custFormat;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;

public class CustKey implements Writable {
    protected Text customerId;
    //default constructor
    public CustKey()
    {
        super();
        customerId=new Text();
    }
    public CustKey(Text customerId)
    {
        super();
        this.customerId=customerId;
    }
    public CustKey(String customerId)
    {
        super();
        this.customerId=new Text(customerId);
    }
    public CustKey(CustKey k)
    {
        super();
        this.customerId=k.customerId;
    }
    /**
     * @return the customerId
     */
    public Text getCustomerId() {
        return customerId;
    }
    /**
     * @param customerId the customerId to set
     */
    public void setCustomerId(Text customerId) {
        this.customerId = customerId;
    }
    public void setCustomerId(String customerId) {
        this.customerId = new Text(customerId);
    }

    public void readFields(DataInput arg0) throws IOException {
        this.customerId.readFields(arg0);
    }


    public void write(DataOutput arg0) throws IOException {
        this.customerId.write(arg0);
    }



    /* (non-Javadoc)
     * @see java.lang.Object#hashCode()
     */
    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result
                + ((customerId == null) ? 0 : customerId.hashCode());
        return result;
    }
    /* (non-Javadoc)
     * @see java.lang.Object#equals(java.lang.Object)
     */
    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        CustKey other = (CustKey) obj;
        if (customerId == null) {
            if (other.customerId != null)
                return false;
        } else if (!customerId.equals(other.customerId))
            return false;
        return true;
    }
}

同样自定义值格式 的 CustValue.java

package in.aniruddha.mapreduce.custFormat;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;

public class CustValue implements WritableComparable<CustValue> {
    protected CustKey custId;
    protected Text firstName,lastName,age,profession;
    public CustValue()
    {
        super();
        custId=new CustKey();
        firstName=new Text();
        lastName=new Text();
        age=new Text();
        profession=new Text();
    }

    public CustValue(CustKey custId, Text firstName, Text lastName, Text age,
            Text profession) {
        super();
        this.custId = new CustKey(custId);
        this.firstName = firstName;
        this.lastName = lastName;
        this.age = age;
        this.profession = profession;
    }
    public CustValue(String custId, String firstName, String lastName, String age,
            String profession) {
        super();
        this.custId = new CustKey(custId);
        this.firstName = new Text(firstName);
        this.lastName =new Text(lastName);
        this.age = new Text(age);
        this.profession = new Text(profession);
    }
    /**
     * @return the custId
     */
    public CustKey getCustId() {
        return custId;
    }

    /**
     * @param custId the custId to set
     */
    public void setCustId(CustKey custId) {
        this.custId = custId;
    }

    /**
     * @return the firstName
     */
    public Text getFirstName() {
        return firstName;
    }

    /**
     * @param firstName the firstName to set
     */
    public void setFirstName(Text firstName) {
        this.firstName = firstName;
    }

    /**
     * @return the lastName
     */
    public Text getLastName() {
        return lastName;
    }

    /**
     * @param lastName the lastName to set
     */
    public void setLastName(Text lastName) {
        this.lastName = lastName;
    }

    /**
     * @return the age
     */
    public Text getAge() {
        return age;
    }

    /**
     * @param age the age to set
     */
    public void setAge(Text age) {
        this.age = age;
    }

    /**
     * @return the profession
     */
    public Text getProfession() {
        return profession;
    }

    /**
     * @param profession the profession to set
     */
    public void setProfession(Text profession) {
        this.profession = profession;
    }

    @Override
    public void readFields(DataInput arg0) throws IOException {
        this.custId.readFields(arg0);
        this.age.readFields(arg0);
        this.profession.readFields(arg0);
        this.lastName.readFields(arg0);
        this.firstName.readFields(arg0);
        }

    @Override
    public void write(DataOutput arg0) throws IOException {
        this.custId.write(arg0);
        this.age.write(arg0);
        this.profession.write(arg0);
        this.lastName.write(arg0);
        this.firstName.write(arg0);
        }

    @Override
    public int compareTo(CustValue o) {
        /*
         * Here we're gonna compare customerid and the age
         */
        int comp=this.custId.customerId.compareTo(o.custId.customerId);
        if(comp!=0)
        {
            return comp;
        }
        else return this.age.compareTo(o.age);
    }

}

这里,如果您不想对键进行排序,但是值只在自定义Value类中实现WritableComparable,并且由于您希望根据值对键进行排序,因此Key必须实现Writable接口。

<小时/> 如果你有疑虑,请回复我。谢谢你:)

答案 2 :(得分:0)

您可以使用辅助排序根据值进行排序。定义复合键并覆盖SortComparator以根据值进行排序。这将在reducer中提供排序值。