test = LOAD 'hdfs://192.168.1.195:9000/vivek/flume_data/flume.1520589885576' USING TextLoader AS (line:chararray);
log = FOREACH test GENERATE FLATTEN(REGEX_EXTRACT_ALL(line,'^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] "(.+?)" (\\S+) (\\S+) "([^"]*)" "([^"]*)"')) AS (address_ip: chararray, logname: chararray, user: chararray, timestamp: chararray, req_line: chararray, status: int, bytes: int, referer: chararray, userAgent: chararray);
STORE log INTO 'hbase://Access_Logs' USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('cf:address_ip, cf:logname, cf:user, cf:timestamp, cf:req_line, cf:status, cf:bytes, cf:referer, cf:userAgent');
2018-03-10 10:52:03,636 [main] INFO org.apache.hadoop.conf.Configuration.deprecation - fs.default.name is deprecated. Instead, use fs.defaultFS
2018-03-10 10:52:03,840 [main] INFO org.apache.hadoop.conf.Configuration.deprecation - fs.default.name is deprecated. Instead, use fs.defaultFS
2018-03-10 10:52:03,840 [main] INFO org.apache.hadoop.hbase.mapreduce.TableOutputFormat - Created table instance for Access_Logs
2018-03-10 10:52:03,843 [main] INFO org.apache.pig.tools.pigstats.ScriptState - Pig features used in the script: UNKNOWN
2018-03-10 10:52:03,859 [main] INFO org.apache.hadoop.conf.Configuration.deprecation - fs.default.name is deprecated. Instead, use fs.defaultFS
2018-03-10 10:52:03,860 [main] INFO org.apache.pig.data.SchemaTupleBackend - Key [pig.schematuple] was not set... will not generate code.
2018-03-10 10:52:03,860 [main] INFO org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer - {RULES_ENABLED=[AddForEach, ColumnMapKeyPrune, ConstantCalculator, GroupByConstParallelSetter, LimitOptimizer, LoadTypeCastInserter, MergeFilter, MergeForEach, PartitionFilterOptimizer, PredicatePushdownOptimizer, PushDownForEachFlatten, PushUpFilter, SplitFilter, StreamTypeCastInserter]}
2018-03-10 10:52:03,890 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler - File concatenation threshold: 100 optimistic? false
2018-03-10 10:52:03,890 [main] INFO org.apache.hadoop.conf.Configuration.deprecation - fs.default.name is deprecated. Instead, use fs.defaultFS
2018-03-10 10:52:03,891 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer - MR plan size before optimization: 1
2018-03-10 10:52:03,891 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer - MR plan size after optimization: 1
2018-03-10 10:52:03,897 [main] INFO org.apache.hadoop.conf.Configuration.deprecation - fs.default.name is deprecated. Instead, use fs.defaultFS
2018-03-10 10:52:03,898 [main] INFO org.apache.hadoop.yarn.client.RMProxy - Connecting to ResourceManager at /192.168.1.195:8050
2018-03-10 10:52:03,899 [main] INFO org.apache.pig.tools.pigstats.mapreduce.MRScriptState - Pig script settings are added to the job
2018-03-10 10:52:03,899 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - mapred.job.reduce.markreset.buffer.percent is not set, set to default 0.3
2018-03-10 10:52:03,900 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - This job cannot be converted run in-process
2018-03-10 10:52:03,981 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/pig/pig-0.15.0-core-h2.jar to DistributedCache through /tmp/temp1710369540/tmp1282565307/pig-0.15.0-core-h2.jar
2018-03-10 10:52:04,013 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/hbase/lib/htrace-core-2.04.jar to DistributedCache through /tmp/temp1710369540/tmp520067094/htrace-core-2.04.jar
2018-03-10 10:52:04,067 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/hadoop/share/hadoop/common/lib/guava-11.0.2.jar to DistributedCache through /tmp/temp1710369540/tmp946538428/guava-11.0.2.jar
2018-03-10 10:52:04,123 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/hbase/lib/hbase-common-0.98.8-hadoop2.jar to DistributedCache through /tmp/temp1710369540/tmp468949353/hbase-common-0.98.8-hadoop2.jar
2018-03-10 10:52:04,144 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/hbase/lib/hbase-hadoop-compat-0.98.8-hadoop2.jar to DistributedCache through /tmp/temp1710369540/tmp113887319/hbase-hadoop-compat-0.98.8-hadoop2.jar
2018-03-10 10:52:04,200 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/hbase/lib/hbase-server-0.98.8-hadoop2.jar to DistributedCache through /tmp/temp1710369540/tmp682998180/hbase-server-0.98.8-hadoop2.jar
2018-03-10 10:52:04,256 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/hbase/lib/hbase-client-0.98.8-hadoop2.jar to DistributedCache through /tmp/temp1710369540/tmp-1958170360/hbase-client-0.98.8-hadoop2.jar
2018-03-10 10:52:04,317 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/hbase/lib/hbase-protocol-0.98.8-hadoop2.jar to DistributedCache through /tmp/temp1710369540/tmp-892814021/hbase-protocol-0.98.8-hadoop2.jar
2018-03-10 10:52:04,363 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/hadoop/share/hadoop/common/lib/zookeeper-3.4.5.jar to DistributedCache through /tmp/temp1710369540/tmp-830858682/zookeeper-3.4.5.jar
2018-03-10 10:52:04,396 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/hadoop/share/hadoop/common/lib/protobuf-java-2.5.0.jar to DistributedCache through /tmp/temp1710369540/tmp-420530468/protobuf-java-2.5.0.jar
2018-03-10 10:52:04,432 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/hbase/lib/high-scale-lib-1.1.1.jar to DistributedCache through /tmp/temp1710369540/tmp-1046507224/high-scale-lib-1.1.1.jar
2018-03-10 10:52:04,474 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/hadoop/share/hadoop/common/lib/netty-3.6.2.Final.jar to DistributedCache through /tmp/temp1710369540/tmp309001480/netty-3.6.2.Final.jar
2018-03-10 10:52:04,489 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/pig/lib/automaton-1.11-8.jar to DistributedCache through /tmp/temp1710369540/tmp964502237/automaton-1.11-8.jar
2018-03-10 10:52:04,507 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/pig/lib/antlr-runtime-3.4.jar to DistributedCache through /tmp/temp1710369540/tmp-1680308848/antlr-runtime-3.4.jar
2018-03-10 10:52:04,528 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Added jar file:/usr/local/pig/lib/joda-time-2.5.jar to DistributedCache through /tmp/temp1710369540/tmp813805284/joda-time-2.5.jar
2018-03-10 10:52:04,539 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Setting up single store job
2018-03-10 10:52:04,544 [main] INFO org.apache.pig.data.SchemaTupleFrontend - Key [pig.schematuple] is false, will not generate code.
2018-03-10 10:52:04,544 [main] INFO org.apache.pig.data.SchemaTupleFrontend - Starting process to move generated code to distributed cacche
2018-03-10 10:52:04,544 [main] INFO org.apache.pig.data.SchemaTupleFrontend - Setting key [pig.schematuple.classes] with classes to deserialize []
2018-03-10 10:52:04,555 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 1 map-reduce job(s) waiting for submission.
2018-03-10 10:52:04,557 [JobControl] INFO org.apache.hadoop.yarn.client.RMProxy - Connecting to ResourceManager at /192.168.1.195:8050
2018-03-10 10:52:04,596 [JobControl] INFO org.apache.hadoop.conf.Configuration.deprecation - fs.default.name is deprecated. Instead, use fs.defaultFS
2018-03-10 10:52:04,597 [JobControl] INFO org.apache.hadoop.hbase.mapreduce.TableOutputFormat - Created table instance for Access_Logs
2018-03-10 10:52:04,625 [JobControl] WARN org.apache.hadoop.mapreduce.JobSubmitter - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2018-03-10 10:52:04,742 [JobControl] INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat - Total input paths to process : 1
2018-03-10 10:52:04,742 [JobControl] INFO org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil - Total input paths to process : 1
2018-03-10 10:52:04,748 [JobControl] INFO org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil - Total input paths (combined) to process : 1
2018-03-10 10:52:04,786 [JobControl] INFO org.apache.hadoop.mapreduce.JobSubmitter - number of splits:1
2018-03-10 10:52:04,828 [JobControl] INFO org.apache.hadoop.mapreduce.JobSubmitter - Submitting tokens for job: job_1519576410629_0017
2018-03-10 10:52:04,832 [JobControl] INFO org.apache.hadoop.mapred.YARNRunner - Job jar is not present. Not adding any jar to the list of resources.
2018-03-10 10:52:04,908 [JobControl] INFO org.apache.hadoop.yarn.client.api.impl.YarnClientImpl - Submitted application application_1519576410629_0017
2018-03-10 10:52:04,910 [JobControl] INFO org.apache.hadoop.mapreduce.Job - The url to track the job: http://server2.linux.com:8088/proxy/application_1519576410629_0017/
2018-03-10 10:52:05,056 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - HadoopJobId: job_1519576410629_0017
2018-03-10 10:52:05,056 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Processing aliases log,test
2018-03-10 10:52:05,056 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - detailed locations: M: test[16,7],log[-1,-1] C: R:
2018-03-10 10:52:05,064 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 0% complete
2018-03-10 10:52:05,064 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Running jobs are [job_1519576410629_0017]
2018-03-10 10:53:02,248 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 50% complete
2018-03-10 10:53:02,249 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Running jobs are [job_1519576410629_0017]
2018-03-10 10:53:05,259 [main] WARN org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Ooops! Some job has failed! Specify -stop_on_failure if you want Pig to stop immediately on failure.
2018-03-10 10:53:05,259 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - job job_1519576410629_0017 has failed! Stop running all dependent jobs
2018-03-10 10:53:05,259 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 100% complete
2018-03-10 10:53:05,260 [main] INFO org.apache.hadoop.yarn.client.RMProxy - Connecting to ResourceManager at /192.168.1.195:8050
2018-03-10 10:53:05,274 [main] INFO org.apache.hadoop.mapred.ClientServiceDelegate - Application state is completed. FinalApplicationStatus=FAILED. Redirecting to job history server
2018-03-10 10:53:05,789 [main] ERROR org.apache.pig.tools.pigstats.PigStats - ERROR 0: java.lang.ClassCastException: java.lang.String cannot be cast to java.lang.Integer
2018-03-10 10:53:05,789 [main] ERROR org.apache.pig.tools.pigstats.mapreduce.MRPigStatsUtil - 1 map reduce job(s) failed!
2018-03-10 10:53:05,789 [main] INFO org.apache.pig.tools.pigstats.mapreduce.SimplePigStats - Script Statistics:
HadoopVersion PigVersion UserId StartedAt FinishedAt Features
2.4.1 0.15.0 hadoop 2018-03-10 10:52:03 2018-03-10 10:53:05 UNKNOWN
Failed!
Failed Jobs:
JobId Alias Feature Message Outputs
job_1519576410629_0017 log,test MAP_ONLY Message: Job failed! hbase://Access_Logs,
Input(s):
Failed to read data from "hdfs://192.168.1.195:9000/vivek/flume_data/flume.1520589885576"
Output(s):
Failed to produce result in "hbase://Access_Logs"
Counters:
Total records written : 0
Total bytes written : 0
Spillable Memory Manager spill count : 0
Total bags proactively spilled: 0
Total records proactively spilled: 0
Job DAG:
job_1519576410629_0017
2018-03-10 10:53:05,789 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Failed!
但在做的时候运行得很好 转储日志
答案 0 :(得分:0)
PIG脚本无法为列status: int, bytes: int
加载数据。
错误说
java.lang.ClassCastException: java.lang.String cannot be cast to java.lang.Integer
这意味着,当PIG期望它为String
时,REGEX解析器会带来Integer
个数据。
要进行调试,请尝试更改PIG
命令中的数据类型,然后尝试打印输出。完成所有设置后,您可以尝试保存到hbase
。