我已经进步到下面,使用scala比较2个文件。我正在尝试打印不常见的行,但readHDFSFile上的foreach似乎无法正常工作。对此有任何帮助。
import java.io.{BufferedReader, FileInputStream, InputStreamReader}
import java.net.URI
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path}
import scala.util.{Failure, Success, Try}
object DRCompareHDFSFiles {
def main(args: Array[String]): Unit = {
println("DBMigrate Main")
val hdfs = FileSystem.get(new URI("hdfs://localhost:8020/"), new Configuration())
val path1 = new Path(args(0))
val path2 = new Path(args(1))
readHDFSFile(hdfs, path1, path2)
}
// Accept a parameter which implements a close method
def using[A <: { def close(): Unit }, B](resource: A)(f: A => B): B =
try {
f(resource)
} finally {
resource.close()
}
def readHDFSFile(hdfs: FileSystem, path1: Path, path2: Path): Option[Stream[(String,String)]] = {
Try(
using(hdfs.open(path1))(readFileStream))
} match {
case Success(result) => {
Try(
using(hdfs.open(path2))(readFileStream))
} match {
case Success(result1) => {
val notEqualLines = for {
line1 <- result
line2 <- result1
if (line1 != line2)
} yield (line1,line2)
Some(notEqualLines)
}
case Failure(ex) =>
println(s"Could not read file $path2, detail ${ex.getClass.getName}:${ex.getMessage}")
None
}
case Failure(ex) =>
println(s"Could not read file $path1, detail ${ex.getClass.getName}:${ex.getMessage}")
None
}
def readFileStream(fis: FSDataInputStream)= {
val inFile = new BufferedReader(new InputStreamReader(fis))
def readLines = Stream.cons(inFile.readLine(), Stream.continually(inFile.readLine()))
readLines
}
}
在readHDFSFile中获取共同行后,如何获取行。 foreach似乎不起作用 感谢