我是Hadoop世界的新手,我的最终数据遇到了一些问题。
我的目的是使用水槽从Facebook页面(我使用restfb API)提取数据,然后将数据传送到HDFS,HIVE将使用它来整合最终数据。每小时都会发生这种情这一切都在HUE。
我不知道为什么,但有时候我会成功地提取数据。有些日子,我只能从几个小时内提取数据。
这是来自Flume的数据:
正如你所看到的,在03/21我只能提取当天的前4小时。在03/22,我可以提取洞日。
更多信息。
My Flume配置。来自Cloudera经理
FacebookAgent.sources = FacebookPageFansCity FacebookPageFansGenderAge FacebookPageFans FacebookPagePosts FacebookPageViews
FacebookAgent.channels = MemoryChannelFacebookPageFansCity MemoryChannelFacebookPageFansGenderAge MemoryChannelFacebookPageFans MemoryChannelFacebookPagePosts MemoryChannelFacebookPageViews
FacebookAgent.sinks = HDFSFacebookPageFansCity HDFSFacebookPageFansGenderAge HDFSFacebookPageFans HDFSFacebookPagePosts HDFSFacebookPageViews
# FacebookPageFansCity
FacebookAgent.sources.FacebookPageFansCity.type = br.com.tsystems.hadoop.flume.source.restfb.FacebookPageFansCitySource
FacebookAgent.sources.FacebookPageFansCity.channels = MemoryChannelFacebookPageFansCity
FacebookAgent.sources.FacebookPageFansCity.appId = null
FacebookAgent.sources.FacebookPageFansCity.appSecret = null
FacebookAgent.sources.FacebookPageFansCity.accessToken = *confidential*
FacebookAgent.sources.FacebookPageFansCity.pageId = *confidential*
FacebookAgent.sources.FacebookPageFansCity.proxyEnabled = false
FacebookAgent.sources.FacebookPageFansCity.proxyHost = null
FacebookAgent.sources.FacebookPageFansCity.proxyPort = -1
FacebookAgent.sources.FacebookPageFansCity.refreshInterval = 3600
FacebookAgent.sinks.HDFSFacebookPageFansCity.channel = MemoryChannelFacebookPageFansCity
FacebookAgent.sinks.HDFSFacebookPageFansCity.type = hdfs
FacebookAgent.sinks.HDFSFacebookPageFansCity.hdfs.path = hdfs://hdoop01:8020/user/flume/pocfacebook/pagefanscity/%Y%m%d%H
FacebookAgent.sinks.HDFSFacebookPageFansCity.hdfs.fileType = DataStream
FacebookAgent.sinks.HDFSFacebookPageFansCity.hdfs.writeFormat = Text
FacebookAgent.sinks.HDFSFacebookPageFansCity.hdfs.batchSize = 1000
FacebookAgent.sinks.HDFSFacebookPageFansCity.hdfs.rollSize = 0
FacebookAgent.sinks.HDFSFacebookPageFansCity.hdfs.rollCount = 10000
FacebookAgent.channels.MemoryChannelFacebookPageFansCity.type = memory
FacebookAgent.channels.MemoryChannelFacebookPageFansCity.capacity = 10000
FacebookAgent.channels.MemoryChannelFacebookPageFansCity.transactionCapacity = 1000
# FacebookPageFansGenderAge
FacebookAgent.sources.FacebookPageFansGenderAge.type = br.com.tsystems.hadoop.flume.source.restfb.FacebookPageFansGenderAgeSource
FacebookAgent.sources.FacebookPageFansGenderAge.channels = MemoryChannelFacebookPageFansGenderAge
FacebookAgent.sources.FacebookPageFansGenderAge.appId = null
FacebookAgent.sources.FacebookPageFansGenderAge.appSecret = null
FacebookAgent.sources.FacebookPageFansGenderAge.accessToken = *confidential*
FacebookAgent.sources.FacebookPageFansGenderAge.pageId = *confidential*
FacebookAgent.sources.FacebookPageFansGenderAge.proxyEnabled = false
FacebookAgent.sources.FacebookPageFansGenderAge.proxyHost = null
FacebookAgent.sources.FacebookPageFansGenderAge.proxyPort = -1
FacebookAgent.sources.FacebookPageFansGenderAge.refreshInterval = 3600
FacebookAgent.sinks.HDFSFacebookPageFansGenderAge.channel = MemoryChannelFacebookPageFansGenderAge
FacebookAgent.sinks.HDFSFacebookPageFansGenderAge.type = hdfs
FacebookAgent.sinks.HDFSFacebookPageFansGenderAge.hdfs.path = hdfs://hdoop01:8020/user/flume/pocfacebook/pagefansgenderage/%Y%m%d%H
FacebookAgent.sinks.HDFSFacebookPageFansGenderAge.hdfs.fileType = DataStream
FacebookAgent.sinks.HDFSFacebookPageFansGenderAge.hdfs.writeFormat = Text
FacebookAgent.sinks.HDFSFacebookPageFansGenderAge.hdfs.batchSize = 1000
FacebookAgent.sinks.HDFSFacebookPageFansGenderAge.hdfs.rollSize = 0
FacebookAgent.sinks.HDFSFacebookPageFansGenderAge.hdfs.rollCount = 10000
FacebookAgent.channels.MemoryChannelFacebookPageFansGenderAge.type = memory
FacebookAgent.channels.MemoryChannelFacebookPageFansGenderAge.capacity = 10000
FacebookAgent.channels.MemoryChannelFacebookPageFansGenderAge.transactionCapacity = 1000
# FacebookPageFans
FacebookAgent.sources.FacebookPageFans.type = br.com.tsystems.hadoop.flume.source.restfb.FacebookPageFansSource
FacebookAgent.sources.FacebookPageFans.channels = MemoryChannelFacebookPageFans
FacebookAgent.sources.FacebookPageFans.appId = null
FacebookAgent.sources.FacebookPageFans.appSecret = null
FacebookAgent.sources.FacebookPageFans.accessToken = *confidential*
FacebookAgent.sources.FacebookPageFans.pageId = *confidential*
FacebookAgent.sources.FacebookPageFans.proxyEnabled = false
FacebookAgent.sources.FacebookPageFans.proxyHost = null
FacebookAgent.sources.FacebookPageFans.proxyPort = -1
FacebookAgent.sources.FacebookPageFans.refreshInterval = 3600
FacebookAgent.sinks.HDFSFacebookPageFans.channel = MemoryChannelFacebookPageFans
FacebookAgent.sinks.HDFSFacebookPageFans.type = hdfs
FacebookAgent.sinks.HDFSFacebookPageFans.hdfs.path = hdfs://hdoop01:8020/user/flume/pocfacebook/pagefans/%Y%m%d%H
FacebookAgent.sinks.HDFSFacebookPageFans.hdfs.fileType = DataStream
FacebookAgent.sinks.HDFSFacebookPageFans.hdfs.writeFormat = Text
FacebookAgent.sinks.HDFSFacebookPageFans.hdfs.batchSize = 1000
FacebookAgent.sinks.HDFSFacebookPageFans.hdfs.rollSize = 0
FacebookAgent.sinks.HDFSFacebookPageFans.hdfs.rollCount = 10000
FacebookAgent.channels.MemoryChannelFacebookPageFans.type = memory
FacebookAgent.channels.MemoryChannelFacebookPageFans.capacity = 10000
FacebookAgent.channels.MemoryChannelFacebookPageFans.transactionCapacity = 1000
# FacebookPagePosts
FacebookAgent.sources.FacebookPagePosts.type = br.com.tsystems.hadoop.flume.source.restfb.FacebookPagePostsSource
FacebookAgent.sources.FacebookPagePosts.channels = MemoryChannelFacebookPagePosts
FacebookAgent.sources.FacebookPagePosts.appId = null
FacebookAgent.sources.FacebookPagePosts.appSecret = null
FacebookAgent.sources.FacebookPagePosts.accessToken = *confidential*
FacebookAgent.sources.FacebookPagePosts.pageId = *confidential*
FacebookAgent.sources.FacebookPagePosts.proxyEnabled = false
FacebookAgent.sources.FacebookPagePosts.proxyHost = null
FacebookAgent.sources.FacebookPagePosts.proxyPort = -1
FacebookAgent.sources.FacebookPagePosts.refreshInterval = 3600
FacebookAgent.sinks.HDFSFacebookPagePosts.channel = MemoryChannelFacebookPagePosts
FacebookAgent.sinks.HDFSFacebookPagePosts.type = hdfs
FacebookAgent.sinks.HDFSFacebookPagePosts.hdfs.path = hdfs://hdoop01:8020/user/flume/pocfacebook/pageposts/%Y%m%d%H
FacebookAgent.sinks.HDFSFacebookPagePosts.hdfs.fileType = DataStream
FacebookAgent.sinks.HDFSFacebookPagePosts.hdfs.writeFormat = Text
FacebookAgent.sinks.HDFSFacebookPagePosts.hdfs.batchSize = 1000
FacebookAgent.sinks.HDFSFacebookPagePosts.hdfs.rollSize = 0
FacebookAgent.sinks.HDFSFacebookPagePosts.hdfs.rollCount = 10000
FacebookAgent.channels.MemoryChannelFacebookPagePosts.type = memory
FacebookAgent.channels.MemoryChannelFacebookPagePosts.capacity = 10000
FacebookAgent.channels.MemoryChannelFacebookPagePosts.transactionCapacity = 5000
# FacebookPageViews
FacebookAgent.sources.FacebookPageViews.type = br.com.tsystems.hadoop.flume.source.restfb.FacebookPageViewsSource
FacebookAgent.sources.FacebookPageViews.channels = MemoryChannelFacebookPageViews
FacebookAgent.sources.FacebookPageViews.appId = null
FacebookAgent.sources.FacebookPageViews.appSecret = null
FacebookAgent.sources.FacebookPageViews.accessToken = *confidential*
FacebookAgent.sources.FacebookPageViews.pageId = *confidential*
FacebookAgent.sources.FacebookPageViews.proxyEnabled = false
FacebookAgent.sources.FacebookPageViews.proxyHost = null
FacebookAgent.sources.FacebookPageViews.proxyPort = -1
FacebookAgent.sources.FacebookPageViews.refreshInterval = 3600
FacebookAgent.sinks.HDFSFacebookPageViews.channel = MemoryChannelFacebookPageViews
FacebookAgent.sinks.HDFSFacebookPageViews.type = hdfs
FacebookAgent.sinks.HDFSFacebookPageViews.hdfs.path = hdfs://hdoop01:8020/user/flume/pocfacebook/pageviews/%Y%m%d%H
FacebookAgent.sinks.HDFSFacebookPageViews.hdfs.fileType = DataStream
FacebookAgent.sinks.HDFSFacebookPageViews.hdfs.writeFormat = Text
FacebookAgent.sinks.HDFSFacebookPageViews.hdfs.batchSize = 1000
FacebookAgent.sinks.HDFSFacebookPageViews.hdfs.rollSize = 0
FacebookAgent.sinks.HDFSFacebookPageViews.hdfs.rollCount = 10000
FacebookAgent.channels.MemoryChannelFacebookPageViews.type = memory
FacebookAgent.channels.MemoryChannelFacebookPageViews.capacity = 10000
FacebookAgent.channels.MemoryChannelFacebookPageViews.transactionCapacity = 1000
有人能帮助我吗?
更新
Oozie的协调员