在一次采访中,技术负责人说我的scala代码就像java代码,但是使用scala api,他希望我改进它。
我是一名3年的Java开发人员,我在Coursera上跟随MOOC开始进行scala编码。
有谁能告诉我这是什么问题,我该如何改进呢?
我得到了这份工作,因为我的Java认可,但这项工作是基于scala,编码风格是在试用期间要解决的一件事。
object Extraction {
// IntelliJ use .idea/modules as current working directory
val FilePathPre = "../../src/main/resources/"
val UserIdFile = "lookup_user.csv"
val ProductIdFile = "lookup_product.csv"
val RatingFile = "agg_ratings.csv"
def readFile(file: String): Iterator[((String, String), String, String)] = {
val Splitter = ","
Source.fromInputStream(this.getClass.getResourceAsStream(file)).getLines()
.map(_.split(Splitter))
.filter(_.size >= 4) // in case line is not valid
.map(x => ((x(0), x(1)), x(2), x(3))) // (userId, ItemId), rating, time
}
def filePrinter(fileName: String, lines: mutable.Map[String, Int]) = {
val file = new File(fileName)
val bw = new BufferedWriter(new FileWriter(file))
lines.toArray.sortWith((a, b) => a._2 < b._2)
.map(x => x._1 + "," + x._2.toString + "\n")
.foreach(bw.write)
bw.close()
}
def aggFilePrinter(fileName: String, lines: mutable.Map[(Int, Int), Float]) = {
val file = new File(fileName)
val bw = new BufferedWriter(new FileWriter(file))
lines.foreach(x => {
val line = x._1._1.toString + "," + x._1._2.toString + "," + (math.round(x._2 * 100.0) / 100.0).toFloat + "\n"
bw.write(line)
})
bw.close()
}
/**
* * une pénalité multiplicative de 0.95 est appliquée au rating
* pour chaque jour d'écart avec le timestamp maximal de input.csv
*
* @param nowTime maximal timestamp at input.csv
* @param pastTime current rating time
* @param rating original rating
* @return final rating multiplied by 0.95 for every day interval from the maximal timestamp
*/
def finalRating(nowTime: String, pastTime: String, rating: String): Float = {
val now =
LocalDateTime.ofInstant(Instant.ofEpochMilli(nowTime.toLong), ZoneId.systemDefault())
val past =
LocalDateTime.ofInstant(Instant.ofEpochMilli(pastTime.toLong), ZoneId.systemDefault())
val diff = ChronoUnit.DAYS.between(past, now)
(math.pow(0.95, diff) * rating.toFloat).toFloat
}
/**
*
* @param file file to extract
*/
def fileDispatcher(file: String) = {
/**
* get idIndice or increment to idIndice and put it to id map
* @param id id in String
* @param idIndice id in Int
* @param idMap userIdMap or productIdMap
* @return (indice for id, max idIndice)
*/
def getIndice(id: String, idIndice: Int, idMap: mutable.Map[String, Int]): (Int, Int) = {
idMap.get(id) match {
case Some(i) => (i, idIndice)
case None => {
val indice = idIndice + 1
idMap += (id -> indice)
(indice, indice)
}
}
}
// 1. scan the file the find the max time
val maxTime = readFile(file).reduce((a, b) => if(a._3 > b._3) a else b)._3
// 2. apply rating condition, calculate rating and return only valid rating lines
val validLines = readFile(file).map(x => (x._1, finalRating(maxTime.toString, x._3, x._2))).filter(_._2 > 0.01)
// 3. loop file lines, sum ratings by (userId, productId), and combine id_String and id_Int
val userIdMap = mutable.Map[String, Int]() // (userId, userIdAsInt)
val productIdMap = mutable.Map[String, Int]() // (productId, productIdAsInt)
val userProductRatingMap = mutable.Map[(Int, Int), Float]() // (userIdAsInt, productIdAsInt, ratingSum)
var userIdIndice = -1
var productIdIndice = -1
validLines.foreach(x => {
val userIdString = x._1._1
val userId = getIndice(userIdString, userIdIndice, userIdMap)
userIdIndice = userId._2
val productIdString = x._1._2
val productId = getIndice(productIdString, productIdIndice, productIdMap)
productIdIndice = productId._2
val key = (userId._1, productId._1)
userProductRatingMap.get(key) match {
case Some(i) => userProductRatingMap += (key -> (i + x._2))
case None => userProductRatingMap += (key -> x._2)
}
})
filePrinter(FilePathPre + UserIdFile, userIdMap)
filePrinter(FilePathPre + ProductIdFile, productIdMap)
aggFilePrinter(FilePathPre + RatingFile, userProductRatingMap)
}
}
答案 0 :(得分:0)
除了 javish 代码之外,您还有代码样式问题,建议在开始时阅读https://docs.scala-lang.org/style/(这不是最终指南,但是开始就可以了)。避免在元组上使用match { case (a, b, c) => ... }
,而是使用mutable.
。
主要问题是你使用了可变结构,因此在scala中,默认情况下每个结构都是不可变的,除非你有充分的理由让它变得可变,否则它应该保持不变。它更多地是关于函数式编程,从一个角度来试图避免可变性和副作用,你可以更多地谷歌搜索这个主题。
从代码中删除foreach
并将foldLeft
替换为例如。 immutable.Map
在每次迭代时获得新创建的NULL
,而不是修改现有的。{/ p>