在CDH Quickstart VM 5.13.0中学习Apache Flume。它正在运行,将生成COMPLETED令牌文件,但看起来像flume仍将同一文件发送到hdfs。据说它应该只上传一次。任何时候我的配置都不正确吗?
agent1.sinks = hdfs-sink1
agent1.sources = source1
agent1.channels = fileChannel1
agent1.channels.fileChannel1.type = file
agent1.channels.fileChannel.capacity = 2000
agent1.channels.fileChannel.transactionCapacity = 100
agent1.sources.source1.type = spooldir
agent1.sources.source1.spoolDir = /home/cloudera/app/flume/data
agent1.sources.source1.fileHeader = false
agent1.sources.source1.fileSuffix = .COMPLETED
agent1.sinks.hdfs-sink1.type = hdfs
agent1.sinks.hdfs-sink1.hdfs.path = hdfs://quickstart.cloudera:8020/tmp/data/
agent1.sinks.hdfs-sink1.hdfs.batchSize = 1000
agent1.sinks.hdfs-sink1.hdfs.rollSize = 2684
agent1.sinks.hdfs-sink1.hdfs.rollInterval = 0
agent1.sinks.hdfs-sink1.hdfs.rollCount = 5000
agent1.sinks.hdfs-sink1.hdfs.writeFormat=Text
agent1.sinks.hdfs-sink1.hdfs.fileType = DataStream
agent1.sources.source1.channels = fileChannel1
agent1.sinks.hdfs-sink1.channel = fileChannel1
HDFS中的文件:
-rw-r--r-- 1 cloudera supergroup 2826 2019-09-25 11:28 /tmp/data/FlumeData.1569436091407
-rw-r--r-- 1 cloudera supergroup 2824 2019-09-25 11:28 /tmp/data/FlumeData.1569436091408
-rw-r--r-- 1 cloudera supergroup 2809 2019-09-25 11:28 /tmp/data/FlumeData.1569436091409
-rw-r--r-- 1 cloudera supergroup 2802 2019-09-25 11:28 /tmp/data/FlumeData.1569436091410
lu割命令
flume-ng agent -n agent1 -f /home/cloudera/app/flume/Flume1.conf