Question

说我有一个功能：

package BIGDATA

/**
* @author ${user.name}
*/

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.sql.types.{ArrayType, StringType, StructField, StructType}
import org.apache.spark.ml.feature.StopWordsRemover
import org.apache.spark.ml.feature.{HashingTF, IDF, Tokenizer}
import org.apache.spark.ml.feature.Word2Vec
import org.apache.spark.ml.clustering.KMeans
import org.apache.spark.mllib.linalg.{VectorUDT, Vectors}


object App {

  def main(args : Array[String]) {

val conf = new SparkConf()
  .setAppName("SEMANTIC ANALYSIS - TEST")

val sc = new SparkContext(conf)
val hiveContext = new HiveContext(sc)
import hiveContext.implicits._

println("====================================================")
println("READING DATA")
println("====================================================")


val pattern: scala.util.matching.Regex = "(([\\w\\.-]+@[\\w\\.-]+)|((X|A|x|a)\\d{6})|(MA\\d{7}\\w|MA\\d{7}|FR\\d{8}\\w)|(w+\\..*(\\.com|fr))|([|\\[\\]!\\(\\)?,;:@&*#_=\\/]*))".r

def extractPattern(pattern: scala.util.matching.Regex) = udf(
  (title: String) => pattern.replaceAllIn(title, "")
)

val df = Seq(
  (8, "Hi I heard about Spark x163021. Now, let’s use trained model by loading it. We need to import KMeansModel in order to use it for loading the model from file."),
  (64, "I wish Java could use case classes. Above is a very naive example in which we use training dataset as input data too. In real world we will train a model, save it and later use it for predicting clusters of input data."),
  (-27, "Logistic regression models are neat. Here is how you can save a trained model and later load it for prediction.")
).toDF("number", "word").select($"number", $"word",
  extractPattern(pattern)($"word").alias("NewWord"))


println("====================================================")
println("FEATURE TRANSFORMERS")
println("====================================================")

val tokenizer = new Tokenizer()
  .setInputCol("NewWord")
  .setOutputCol("FeauturesEntities")

val TokenizedDataFrame = tokenizer.transform(df)

val remover = new StopWordsRemover()
  .setInputCol("FeauturesEntities")
  .setOutputCol("FilteredFeauturesEntities")

val CleanedTokenizedDataFrame = remover.transform(TokenizedDataFrame)

CleanedTokenizedDataFrame.show()


println("====================================================")
println("WORD2VEC : LEARN A MAPPING FROM WORDS TO VECTORS")
println("====================================================")


// Learn a mapping from words to Vectors.
val word2Vec = new Word2Vec()
  .setMinCount(2)
  .setInputCol("FilteredFeauturesEntities")
  .setOutputCol("Word2VecFeatures")
  .setVectorSize(1000)

val model = word2Vec.fit(CleanedTokenizedDataFrame)
val word2VecDataFrame = model.transform(CleanedTokenizedDataFrame)

word2VecDataFrame.show()

  }

}

如何使用此函数迭代另一个函数中的列表：

quotes:{[ticker;x;y]
output: ....
}

Answer 1

您可以将uj与/结合使用来执行此操作：

uj/[list of tables]

在你的情况下，这可能看起来像：

uj/[quotes[;x;y]each tickerList]

如果引号函数始终输出具有相同模式的表，则可以使用raze代替：

raze quotes[;x;y]each tickerList

raze和uj都是加入函数，,的实现，但raze要求所有表的架构都相同。

如何将参数传递给函数（Q / KDB +）？

1 个答案: