Hive RegexSerDe多行日志匹配在一行后显示NULL值

时间:2018-02-22 05:22:57

标签: regex tomcat logging hive regexserde

尝试加载具有多行的Apache Tomcat日志用于单行,但是它只加载单行并显示其余行的NULL值,直到它到达下一条记录。

我从早期的帖子中尝试过正则表达式,但它们不起作用,这里的测试工具中使用的相同正则表达式是其中一个的链接: http://rubular.com/r/nVrWhuwg1c

正确识别线条并将它们分成正确的组,但是当我在蜂巢中尝试它时,它们的代码相同。

样本记录:

12-Dec-2013 12:03:26.988 WARNING [localhost-startStop-1] org.apache.tomcat.util.scan.StandardJarScanner.scan Failed to scan [file:/usr/share/java/jsp-api-2.3.jar] from classloader hierarchy
 java.io.FileNotFoundException: /usr/share/java/jsp-api-2.3.jar (No such file or directory)
    at java.util.zip.ZipFile.open(Native Method)
    at java.util.zip.ZipFile.<init>(ZipFile.java:225)
    at java.util.zip.ZipFile.<init>(ZipFile.java:155)
    at java.util.jar.JarFile.<init>(JarFile.java:166)
    at java.util.jar.JarFile.<init>(JarFile.java:130)
    at org.apache.tomcat.util.scan.JarFileUrlJar.<init>(JarFileUrlJar.java:60)
    at org.apache.tomcat.util.scan.JarFactory.newInstance(JarFactory.java:49)
    at org.apache.tomcat.util.scan.StandardJarScanner.process(StandardJarScanner.java:338)
    at org.apache.tomcat.util.scan.StandardJarScanner.scan(StandardJarScanner.java:288)
    at org.apache.catalina.startup.ContextConfig.processJarsForWebFragments(ContextConfig.java:1898)
    at org.apache.catalina.startup.ContextConfig.webConfig(ContextConfig.java:1126)
    at org.apache.catalina.startup.ContextConfig.configureStart(ContextConfig.java:775)
    at org.apache.catalina.startup.ContextConfig.lifecycleEvent(ContextConfig.java:299)
    at org.apache.catalina.util.LifecycleBase.fireLifecycleEvent(LifecycleBase.java:94)
    at org.apache.catalina.core.StandardContext.startInternal(StandardContext.java:5105)
    at org.apache.catalina.util.LifecycleBase.start(LifecycleBase.java:150)
    at org.apache.catalina.core.ContainerBase.addChildInternal(ContainerBase.java:752)
    at org.apache.catalina.core.ContainerBase.addChild(ContainerBase.java:728)
    at org.apache.catalina.core.StandardHost.addChild(StandardHost.java:734)
    at org.apache.catalina.startup.HostConfig.deployDescriptor(HostConfig.java:596)
    at org.apache.catalina.startup.HostConfig$DeployDescriptor.run(HostConfig.java:1805)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

13-Dec-2016 12:03:24.988 WARNING [localhost-startStop-1] org.apache.tomcat.util.scan.StandardJarScanner.scan Failed to scan [file:/usr/share/java/el-api-3.0.jar] from classloader hierarchy
 java.io.FileNotFoundException: /usr/share/java/el-api-3.0.jar (No such file or directory)
    at java.util.zip.ZipFile.open(Native Method)
    at java.util.zip.ZipFile.<init>(ZipFile.java:225)
    at java.util.zip.ZipFile.<init>(ZipFile.java:155)
    at java.util.jar.JarFile.<init>(JarFile.java:166)
    at java.util.jar.JarFile.<init>(JarFile.java:130)
    at org.apache.tomcat.util.scan.JarFileUrlJar.<init>(JarFileUrlJar.java:60)
    at org.apache.tomcat.util.scan.JarFactory.newInstance(JarFactory.java:49)
    at org.apache.tomcat.util.scan.StandardJarScanner.process(StandardJarScanner.java:338)
    at org.apache.tomcat.util.scan.StandardJarScanner.scan(StandardJarScanner.java:288)
    at org.apache.jasper.servlet.TldScanner.scanJars(TldScanner.java:262)
    at org.apache.jasper.servlet.TldScanner.scan(TldScanner.java:104)
    at org.apache.jasper.servlet.JasperInitializer.onStartup(JasperInitializer.java:101)
    at org.apache.catalina.core.StandardContext.startInternal(StandardContext.java:5196)
    at org.apache.catalina.util.LifecycleBase.start(LifecycleBase.java:150)
    at org.apache.catalina.core.ContainerBase.addChildInternal(ContainerBase.java:752)
    at org.apache.catalina.core.ContainerBase.addChild(ContainerBase.java:728)
    at org.apache.catalina.core.StandardHost.addChild(StandardHost.java:734)
    at org.apache.catalina.startup.HostConfig.deployDescriptor(HostConfig.java:596)
    at org.apache.catalina.startup.HostConfig$DeployDescriptor.run(HostConfig.java:1805)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)


Create Table Query which i am trying 

CREATE EXTERNAL TABLE apache_cat_log_test 
(
    logdatetime STRING, 
    logtype STRING, 
    requestid STRING, 
    verbosedata STRING,
    msg string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
  "input.regex" = "^(\\d{2}-\\w{3}-\\d{4})\\s+(.*?)\\s+(\\w+)\\s*(.*?)\\s*$((?:\\s*(?!\\d{2}).*?$)*)"
 )
STORED AS TEXTFILE
location '/catlogs';

0 个答案:

没有答案