我想知道如何从全名解析出名字,姓氏,同时考虑在中间删除中间名,先生,女士和JR,最后是JR。
private def loaduserdata(spark: SparkSession): Unit = {
import spark.implicits._
// Create an RDD of User objects from a text file, convert it to a Dataframe
val userDF = spark.sparkContext
.textFile("examples/src/main/resources/users.csv")
.map(_.split("::"))
.map(attributes => users(attributes(0).trim.toInt, attributes(1), attributes(2).trim.toInt, attributes(3), attributes(4)))
.toDF()
// Register the DataFrame as a temporary view
userDF.createOrReplaceTempView("Users")
// SQL statements can be run by using the sql methods provided by Spark
val zipcodesDF = spark.sql("SELECT distinct zipcode, substr(zipcode,1,5) as state FROM Users ORDER BY zipcode desc") // zipcodesDF.map(zipcodes => "zipcode: " + zipcodes.getAs[String]("zipcode") + getstate(zipcodes.getAs[String]("zipcode"))).show()
val colNames = zipcodesDF.columns
val cols = colNames.map(cName => zipcodesDF.col(cName))
val theColumn = zipcodesDF("state")
val mappedCols = cols.map(c =>
if (c.toString() == theColumn.toString()) getstate(c).as("state") else c)
val geoDF = zipcodesDF.select(mappedCols:_*)//.show()
geoDF.createOrReplaceTempView("Geo")
}
val getstate = udf {(zipcode: String) =>
val url = "http://maps.googleapis.com/maps/api/geocode/json?address="+zipcode
val result = scala.io.Source.fromURL(url).mkString
val address = parse(result)
val statenm = for {
JObject(statename) <- address
JField("types", JArray(types)) <- statename
JField("short_name", JString(short_name)) <- statename
if types.toString().equals("List(JString(administrative_area_level_1), JString(political))")
// if types.head.equals("JString(administrative_area_level_1)")
} yield short_name
val str = if (statenm.isEmpty.toString().equals("true")) "N/A" else statenm.head
}
谢谢
答案 0 :(得分:0)
首先选择左(名称,CHARINDEX(&#39;&#39;,名称)), substring(name,CHARINDEX(&#39;&#39;,name)+1,len(name) - (CHARINDEX(&#39;&#39;,name)-1)) 来自table-name;
答案 1 :(得分:0)
我找到了一个名为PARSE_NAME_UDF here
的漂亮函数我在数据迁移和集成工作中不时使用此功能:
以下是一些返回两列First和Last名称的示例:
SELECT 'First:' = dbo.PARSE_NAME_UDF('Jon Doe Jr', 'F'), 'Last:' = dbo.PARSE_NAME_UDF('Jon Doe Jr', 'L')
UNION ALL SELECT 'First:' = dbo.PARSE_NAME_UDF('Jon J Doe', 'F'), 'Last:' = dbo.PARSE_NAME_UDF('Jon J Doe', 'L')
UNION ALL SELECT 'First:' = dbo.PARSE_NAME_UDF('Jon Doe', 'F'), 'Last:' = dbo.PARSE_NAME_UDF('Jon Doe', 'L')
以下是更多示例:
SELECT dbo.PARSE_NAME_UDF('Fred J Muggs Jr', 'F M L S') -- Returns Fred J Muggs Junior
SELECT dbo.PARSE_NAME_UDF('Fred J Muggs Jr', 'F M L s') -- Returns Fred J Muggs Jr
SELECT dbo.PARSE_NAME_UDF('Fred J Muggs Jr', 'f. M. L s') -- Returns F. J. Muggs Jr