我只是火花的初学者。 我试图连接到名为mimic3的Hbase myDB表,该列族称为sepsiscategories,并且有很多列。我试图获取“ subject_id”列。这是我的代码:
import org.apache.hadoop.conf.Configuration
import org.apache.spark.sql.SparkSession
import org.apache.hadoop.fs.{FileSystem,FSDataInputStream,Path}
import java.net.URI
import java.io.File
import java.util.Properties
import java.sql.DriverManager
import org.apache.spark.sql.{Row,SaveMode}
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.conf.Configuration._
import spark.implicits._
import spark.sql
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val mimic_table_conf = HBaseConfiguration.create();
mimic_table_conf.set(TableInputFormat.INPUT_TABLE,"mimic3")
val mimic_PatternsFromHbase = spark.sparkContext.newAPIHadoopRDD(mimic_table_conf,classOf[TableInputFormat],classOf[ImmutableBytesWritable], classOf[Result])
val sepsiscategories = mimic_PatternsFromHbase.mapPartitions(f=> f.map(row1 => (Bytes.toString(row1._2.getRow),
Bytes.toString(row1._2.getValue(Bytes.toBytes("sepsiscategories"),Bytes.toBytes("admissiontype")))))).toDF("id","admissiontype")
sepsiscategories.createOrReplaceTempView("sepsiscategories")
spark.sql("select * from sepsiscategories").show
最后一行是 spark.sql(“从脓毒症类别中选择*”)。显示向我提出此错误:
org.apache.hadoop.hbase.DoNotRetryIOException: java.lang.NoClassDefFoundError:com / yammer / metrics / core / Gauge at org.apache.hadoop.hbase.client.RpcRetryingCaller.translateException(RpcRetryingCaller.java:248) 在 org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:221) 在 org.apache.hadoop.hbase.client.ScannerCallableWithReplicas $ RetryingRPC.call(ScannerCallableWithReplicas.java:388) 在 org.apache.hadoop.hbase.client.ScannerCallableWithReplicas $ RetryingRPC.call(ScannerCallableWithReplicas.java:362) 在 org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithRetries(RpcRetryingCaller.java:142) 在 org.apache.hadoop.hbase.client.ResultBoundedCompletionService $ QueueingFuture.run(ResultBoundedCompletionService.java:80) 在 java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 在 java.util.concurrent.ThreadPoolExecutor $ Worker.run(ThreadPoolExecutor.java:624) 在java.lang.Thread.run(Thread.java:748)造成原因: java.lang.NoClassDefFoundError:com / yammer / metrics / core / Gauge at org.apache.hadoop.hbase.ipc.AbstractRpcClient.callMethod(AbstractRpcClient.java:401) 在 org.apache.hadoop.hbase.ipc.AbstractRpcClient.callBlockingMethod(AbstractRpcClient.java:327) 在 org.apache.hadoop.hbase.ipc.AbstractRpcClient.access $ 200(AbstractRpcClient.java:94) 在 org.apache.hadoop.hbase.ipc.AbstractRpcClient $ BlockingRpcChannelImplementation.callBlockingMethod(AbstractRpcClient.java:571) 在 org.apache.hadoop.hbase.protobuf.generation.ClientProtos $ ClientService $ BlockingStub.scan(ClientProtos.java:37059) 在 org.apache.hadoop.hbase.client.ScannerCallable.openScanner(ScannerCallable.java:405) 在 org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:274) 在 org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:62) 在 org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:219) ... 7更多原因:java.lang.ClassNotFoundException: com.yammer.metrics.core.Gauge at java.net.URLClassLoader.findClass(URLClassLoader.java:382)在 java.lang.ClassLoader.loadClass(ClassLoader.java:424)在 sun.misc.Launcher $ AppClassLoader.loadClass(Launcher.java:349)在 java.lang.ClassLoader.loadClass(ClassLoader.java:357)
这是对mimic3表执行扫描命令的结果:
scan "mimic3"
ROW COLUMN+CELL
100 column=sepsiscategories:admissiontype, timestamp=1573115453362, value=ScheduledSurgical
100 column=sepsiscategories:age, timestamp=1573115453362, value=71.94
100 column=sepsiscategories:bicarbonate_max, timestamp=1573115453362, value=22.0
100 column=sepsiscategories:bicarbonate_min, timestamp=1573115453362, value=22.0
100 column=sepsiscategories:bun_max, timestamp=1573115453362, value=21.0
100 column=sepsiscategories:bun_min, timestamp=1573115453362, value=16.0
100 column=sepsiscategories:explicit_sepsis, timestamp=1573115453362, value=0
100 column=sepsiscategories:hadm_id, timestamp=1573115453362, value=153952
100 column=sepsiscategories:has_choc_sepsis, timestamp=1573115453362, value=0
100 column=sepsiscategories:has_sepsis, timestamp=1573115453362, value=0
100 column=sepsiscategories:has_severe_sepsis, timestamp=1573115453362, value=0
100 column=sepsiscategories:has_sirs, timestamp=1573115453362, value=0
100 column=sepsiscategories:heartrate_max, timestamp=1573115453362, value=104.0
100 column=sepsiscategories:heartrate_min, timestamp=1573115453362, value=62.0
100 column=sepsiscategories:icustay_id, timestamp=1573115453362, value=221100
100 column=sepsiscategories:infection, timestamp=1573115453362, value=0
100 column=sepsiscategories:intime, timestamp=1573115453362, value=2157-08-10 12:16:00.0
100 column=sepsiscategories:mingcs, timestamp=1573115453362, value=15.0
100 column=sepsiscategories:organ_dysfunction, timestamp=1573115453362, value=0
100 column=sepsiscategories:outtime, timestamp=1573115453362, value=2157-08-12 15:04:00.0
100 column=sepsiscategories:pco2, timestamp=1573115453362, value=37.0
100 column=sepsiscategories:potassium_max, timestamp=1573115453362, value=6.9
100 column=sepsiscategories:potassium_min, timestamp=1573115453362, value=3.0
100 column=sepsiscategories:resprate_max, timestamp=1573115453362, value=42.0
100 column=sepsiscategories:sodium_max, timestamp=1573115453362, value=143.0
100 column=sepsiscategories:sodium_min, timestamp=1573115453362, value=138.0
100 column=sepsiscategories:sysbp_max, timestamp=1573115453362, value=131.0
100 column=sepsiscategories:sysbp_min, timestamp=1573115453362, value=84.0
100 column=sepsiscategories:tempc_max, timestamp=1573115453362, value=37.70000076293945
100 column=sepsiscategories:tempc_min, timestamp=1573115453362, value=35.20000076293945
100 column=sepsiscategories:urineoutput, timestamp=1573115453362, value=2650.0
100 column=sepsiscategories:wbc_max, timestamp=1573115453362, value=7.9
100 column=sepsiscategories:wbc_min, timestamp=1573115453362, value=7.9
什么可能导致此错误 PS我正在使用的指标jar是 metrics-core-3.1.2.jar