我已经填充了一个Hbase表,其中包含有关tweet的rowid和vrious信息,如clean-text,url,hashtag等,如下所示
902221655086211073 column=clean-tweet:clean-text-cta, timestamp=1514793745304, value=democrat mayor order hurricane harvey stand houston
然而,在填充时我注意到有些行是空的,如
902487280543305728 column=clean-tweet:clean-text-cta, timestamp=1514622371008, value=
现在如何找到拥有数据的行数?
请帮助我
答案 0 :(得分:0)
目前在HBase shell中没有规定这样做。也许你可以使用像这样的简单代码来获得一些没有提供列限定符值的记录。
CountAndFilter [tableName] [columnFamily] [columnQualifier]
import java.io.IOException;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
public class CountAndFilter {
private static Connection conn;
private static int recordsWithoutValue = 0;
public static Admin getConnection() throws IOException {
if (conn == null) {
conn = ConnectionFactory.createConnection(HBaseConfiguration.create());
}
return conn.getAdmin();
}
public static void main(String args[]) throws IOException {
getConnection();
scan(args[0], args[1], args[2]);
System.out.println("Records with empty value : " + recordsWithoutValue);
}
public static void scan(String tableName, String columnFamily, String columnQualifier) throws IOException {
Table table = conn.getTable(TableName.valueOf(tableName));
ResultScanner rs = table.getScanner(new Scan().addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnQualifier)));
Result res = null;
try {
while ((res = rs.next()) != null) {
if (res.containsEmptyColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnQualifier))){
recordsWithoutValue++;
}
}
} finally {
rs.close();
}
}
}