我正在尝试在应用一些过滤器后从HBase获取行中的选定列。考虑如下表:
ename:fname ename:lname salary:gross salary:da salary:ta
我想获得所有员工的工资清单>为此,我写了以下代码。我面临的问题是,当我过滤列时,我只得到输出中的那个过滤器,这是有意义的,因为这是它们的创建,但如果我想获得所需的列,但想要仅根据特定列进行过滤,该怎么办?就像我刚才提到的那样 - 所有有薪水的员工名单> 1500。
输出应该是以下一组列:
到目前为止L-NAME,FNAME,工资:毛,工资:TA
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.filter.FamilyFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class MyQualifierFilterExample {
public static void main(String[] args) throws IOException {
Configuration conf = HBaseConfiguration.create();
HTable table = new HTable(conf, "emp");
List<Filter> filters = new ArrayList<Filter>();
Filter famFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL,
new BinaryComparator(Bytes.toBytes("salary")));
filters.add(famFilter);
Filter colFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL,
new BinaryComparator(Bytes.toBytes("gross")));
filters.add(colFilter);
Filter valFilter = new ValueFilter(CompareFilter.CompareOp.GREATER_OR_EQUAL,
new BinaryComparator(Bytes.toBytes("1500")));
filters.add(valFilter);
FilterList fl = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters);
Scan scan = new Scan();
scan.setFilter(fl);
ResultScanner scanner = table.getScanner(scan);
System.out.println("Scanning table... ");
for (Result result : scanner) {
//System.out.println("getRow:"+Bytes.toString(result.getRow()));
for (KeyValue kv : result.raw()) {
//System.out.println("Family - "+Bytes.toString(kv.getFamily()));
//System.out.println("Qualifier - "+Bytes.toString(kv.getQualifier() ));
System.out.println("kv:"+kv +", Key: " + Bytes.toString(kv.getRow()) + ", Value: " +Bytes.toString(kv.getValue()));
}
}
scanner.close();
System.out.println("Completed ");
}
}
Scanning table...
kv:101/salary:gross/1339876269770/Put/vlen=4, Key: 101, Value: 2000
kv:102/salary:gross/1339876277659/Put/vlen=4, Key: 102, Value: 2400
kv:105/salary:gross/1339876300585/Put/vlen=4, Key: 105, Value: 2300
kv:106/salary:gross/1339876310004/Put/vlen=4, Key: 106, Value: 2900
Completed
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.FamilyFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class MyQualifierFilterExample {
public static void main(String[] args) throws IOException {
Configuration conf = HBaseConfiguration.create();
HTable table = new HTable(conf, "emp");
List<Filter> filters = new ArrayList<Filter>();
SingleColumnValueFilter colValFilter = new SingleColumnValueFilter(Bytes.toBytes("salary"), Bytes.toBytes("gross")
, CompareFilter.CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes("1300")));
colValFilter.setFilterIfMissing(false);
filters.add(colValFilter);
Filter colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("salary"), Bytes.toBytes("da")
, CompareFilter.CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes("150")));
filters.add(colValFilter2);
//Filter colValFilter3 = new SingleColumnValueFilter(Bytes.toBytes("ename"), Bytes.toBytes("fname")
// , CompareFilter.CompareOp.GREATER_OR_EQUAL, new SubstringComparator("jack"));
//filters.add(colValFilter3);
FilterList fl = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters);
Scan scan = new Scan();
scan.setFilter(fl);
scan.addColumn(Bytes.toBytes("ename"), Bytes.toBytes("fname"));
scan.addColumn(Bytes.toBytes("ename"), Bytes.toBytes("lname"));
scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("gross"));
scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("da"));
ResultScanner scanner = table.getScanner(scan);
String key = new String("~");
String keyFlag = new String("~");
System.out.println("Scanning table... ");
for (Result result : scanner) {
//System.out.println("getRow:"+Bytes.toString(result.getRow()));
key = "~";
for (KeyValue kv : result.raw()) {
if (key.compareTo(keyFlag)==0)
{
key = Bytes.toString(kv.getRow());
System.out.print("Key: " + key);
}
//System.out.print("Family - "+Bytes.toString(kv.getFamily()));
//System.out.print(", Buffer - "+Bytes.toString(kv.getBuffer() ));
//System.out.print(", FamilyOffset - " + kv.getFamilyOffset() );
System.out.print(", "+Bytes.toString(kv.getFamily())+"."+Bytes.toString(kv.getQualifier()));
System.out.print("=" +Bytes.toString(kv.getValue()));
}
System.out.println("");
System.out.println("-------------------");
}
scanner.close();
System.out.println("Completed ");
}
}
Scanning table...
Key: 103, ename.fname=peter, ename.lname=parker, salary.da=190, salary.gross=1400
-------------------
Key: 105, ename.fname=harry, ename.lname=potter, salary.da=154, salary.gross=2300
-------------------
Completed
答案 0 :(得分:1)
您应该使用SingleColumnValueFilter和addFamily(或addColumn)的组合
见下文(我目前无法在我的结尾测试):
SingleColumnValueFilter filter = new SingleColumnValueFilter(
Bytes.toBytes("salary"),
Bytes.toBytes("gross"),
CompareOp.GREATER,
Bytes.toBytes("1500")
);
//To prevent the entire row from being emitted
//if the column is not found on a row
scan.setFilterIfMissing(true)
scan.setFilter(filter);
scan.addFamily(Bytes.toBytes("ename"))
scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("da"))
scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("gross"))
答案 1 :(得分:0)
<强> ValueFilter 强> 此过滤器可以仅包含具有特定值的列
这就是为什么您只获得过滤器中指定的列。
告诉我,如果我错了,但你想要做的是在工资为&gt;时检索所有列。 1500,不是吗?
答案 2 :(得分:0)
您的要求是关系型的。因此,我建议您使用HBase上的包装器来简化生活。
考虑使用:Apache Phoenix。它是HBase的高性能SQL包装器,使用它可以运行如下的查询:select * from emp where salary>1500
。