我必须列出文件夹中的所有文件,并使用spark根据文件名将它们保存在不同的文件夹中。我已经写了下面的代码,但是报错了
split在使用运算符split时不是org.hadoop的成员。
下面是我的代码,任何人都可以建议我如何删除或克服此错误。
import org.apache.spark.sql.SparkSession
import scala.io.Source
import org.apache.hadoop.conf.Configuration
import scala.io.Source
import org.apache.spark.sql.functions.col
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.hadoop.fs.FileSystem
import org.apache.spark.sql.functions._
object Three extends App {
val spark = SparkSession.builder
.master("local[*]")
.appName("ListFile")
.getOrCreate()
val sqlContext = spark.sqlContext
val sc = spark.sparkContext
import spark.implicits._
import org.apache.hadoop.fs.{FileSystem,Path}
val files = FileSystem
.get(sc.hadoopConfiguration)
.listStatus(new
Path("C:\\Users\\ayush.gupta\\Desktop\\Newfolder25"))
for(x<-files){
val z= x.getPath
println(z)
val k = List(z)
val word = k.map(a=>
a.split("""\/""")).last.map(y=>y.split("""\."""))
val ay = word.last
val ak = ay(0)
val an = List(ak)
val ni = an.map{
s=>
val m = s.split("-")
val jk = m(0)
jk
}
val l = ni.map(ar=>ar.length).sum
if (l == 2)
df.saveAsTextFile("C:\\Users\\ayush.gupta\\Desktop\\a36.txt")
else
df.saveAsTextFile("C:\\Users\\ayush.gupta\\Desktop\\a37.txt")
}
答案 0 :(得分:1)
可以使用getName方法代替split
来返回文件名。
import org.apache.hadoop.fs.Path
val conf = sc.hadoopConfiguration
val path = ??? // your path
val files = FileSystem.get(conf).listStatus(new Path(path))
val fileNames: Array[String] = files.map(_.getPath.getName)
您还可以对文件名使用带谓词的filter
方法。
val filteredFiles = files.filter(_.getPath.getName.length == ???)
答案 1 :(得分:0)
使用Scala,以下是根据文件名将文件从一个文件夹移动到其他文件夹的一种方法。
import java.io.File
import java.util.regex.Pattern
import java.io.File
import java.nio.file.{ Files, Path, StandardCopyOption }
object SegregateFilesToFolders {
def main(args: Array[String]): Unit = {
val path = "C:\\Users\\User1\\Desktop\\All\\Data\\ExcelFilesComparison\\files"
val files = new File(path).list.toList // gives list of file names including extensions in the path `path`
println(files)
val out_path = "C:\\Users\\User1\\Desktop\\" // In Desktop, I have created folders which match expected file names
for (f <- files) {
val p = Pattern.compile("(.+?)(\\.[^.]*$|$)") // regex to identify files names and extensions
val m = p.matcher(f)
if (m.find()) {
val d1 = new File(path + s"\\$f").toPath
val d2 = new File(out_path + s"${m.group(1)}" + s"\\$f").toPath // m.group(1) gives the file name without extension ... $f gives the file name with extension
Files.move(d1, d2, StandardCopyOption.ATOMIC_MOVE)
}
}
}
}