有没有办法只按值对MapReduce输出进行排序,而不更改键和值的输出顺序?
原始输出类似于(按键排序):
A 1
B 2
C 1
D 3
我需要像这样的输出(按值排序):
D 3
B 2
A 1
C 1
我试图通过使用Inversemapper添加另一个排序作业来交换键和值,以便输出按值排序,但是它有效,但输出如下:
3 D
2 B
1 A
1 C
无论如何都要反转键和值的输出格式?
还是有其他方法只按值排序吗?
由于
答案 0 :(得分:3)
M / R总是按键排序。如果要按值排序,则需要创建另一个作业,将要排序的值映射到键中。
答案 1 :(得分:1)
您可以使用自定义值并实现WritableComparable接口来实现compareTo(),或者您可以继承WritableComparator类以覆盖compare()方法。选择是你的。以下是定制键和定制值类
的 CustKey.java 强>
package in.aniruddha.mapreduce.custFormat;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
public class CustKey implements Writable {
protected Text customerId;
//default constructor
public CustKey()
{
super();
customerId=new Text();
}
public CustKey(Text customerId)
{
super();
this.customerId=customerId;
}
public CustKey(String customerId)
{
super();
this.customerId=new Text(customerId);
}
public CustKey(CustKey k)
{
super();
this.customerId=k.customerId;
}
/**
* @return the customerId
*/
public Text getCustomerId() {
return customerId;
}
/**
* @param customerId the customerId to set
*/
public void setCustomerId(Text customerId) {
this.customerId = customerId;
}
public void setCustomerId(String customerId) {
this.customerId = new Text(customerId);
}
public void readFields(DataInput arg0) throws IOException {
this.customerId.readFields(arg0);
}
public void write(DataOutput arg0) throws IOException {
this.customerId.write(arg0);
}
/* (non-Javadoc)
* @see java.lang.Object#hashCode()
*/
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result
+ ((customerId == null) ? 0 : customerId.hashCode());
return result;
}
/* (non-Javadoc)
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
CustKey other = (CustKey) obj;
if (customerId == null) {
if (other.customerId != null)
return false;
} else if (!customerId.equals(other.customerId))
return false;
return true;
}
}
同样自定义值格式 的 CustValue.java 强>
package in.aniruddha.mapreduce.custFormat;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
public class CustValue implements WritableComparable<CustValue> {
protected CustKey custId;
protected Text firstName,lastName,age,profession;
public CustValue()
{
super();
custId=new CustKey();
firstName=new Text();
lastName=new Text();
age=new Text();
profession=new Text();
}
public CustValue(CustKey custId, Text firstName, Text lastName, Text age,
Text profession) {
super();
this.custId = new CustKey(custId);
this.firstName = firstName;
this.lastName = lastName;
this.age = age;
this.profession = profession;
}
public CustValue(String custId, String firstName, String lastName, String age,
String profession) {
super();
this.custId = new CustKey(custId);
this.firstName = new Text(firstName);
this.lastName =new Text(lastName);
this.age = new Text(age);
this.profession = new Text(profession);
}
/**
* @return the custId
*/
public CustKey getCustId() {
return custId;
}
/**
* @param custId the custId to set
*/
public void setCustId(CustKey custId) {
this.custId = custId;
}
/**
* @return the firstName
*/
public Text getFirstName() {
return firstName;
}
/**
* @param firstName the firstName to set
*/
public void setFirstName(Text firstName) {
this.firstName = firstName;
}
/**
* @return the lastName
*/
public Text getLastName() {
return lastName;
}
/**
* @param lastName the lastName to set
*/
public void setLastName(Text lastName) {
this.lastName = lastName;
}
/**
* @return the age
*/
public Text getAge() {
return age;
}
/**
* @param age the age to set
*/
public void setAge(Text age) {
this.age = age;
}
/**
* @return the profession
*/
public Text getProfession() {
return profession;
}
/**
* @param profession the profession to set
*/
public void setProfession(Text profession) {
this.profession = profession;
}
@Override
public void readFields(DataInput arg0) throws IOException {
this.custId.readFields(arg0);
this.age.readFields(arg0);
this.profession.readFields(arg0);
this.lastName.readFields(arg0);
this.firstName.readFields(arg0);
}
@Override
public void write(DataOutput arg0) throws IOException {
this.custId.write(arg0);
this.age.write(arg0);
this.profession.write(arg0);
this.lastName.write(arg0);
this.firstName.write(arg0);
}
@Override
public int compareTo(CustValue o) {
/*
* Here we're gonna compare customerid and the age
*/
int comp=this.custId.customerId.compareTo(o.custId.customerId);
if(comp!=0)
{
return comp;
}
else return this.age.compareTo(o.age);
}
}
这里,如果您不想对键进行排序,但是值只在自定义Value类中实现WritableComparable,并且由于您希望根据值对键进行排序,因此Key必须实现Writable接口。
<小时/> 如果你有疑虑,请回复我。谢谢你:)
答案 2 :(得分:0)
您可以使用辅助排序根据值进行排序。定义复合键并覆盖SortComparator以根据值进行排序。这将在reducer中提供排序值。