这种架构有一个DataFrame:
a.printSchema()
root
|-- fieldA: long (nullable = false)
|-- fieldB: struct (nullable = true)
| |-- _1: string(nullable = true)
| |-- _2: long (nullable = false)
我想通过fieldB的_2值来查询。
我知道是否要根据fieldA a.where("fieldA = 1234").show()
的值进行查询。但我不知道我是如何通过struct里面的值查询的。
答案 0 :(得分:0)
根据您的情况,您可以使用a.where("fieldB._2 = 123")
访问 struct 元素:
val df = Seq((1L, ("a", 2L)), (2L, ("b", 3L))).toDF("A", "B")
// df: org.apache.spark.sql.DataFrame = [A: bigint, B: struct<_1: string, _2: bigint>]
df.printSchema
root
|-- A: long (nullable = false)
|-- B: struct (nullable = true)
| |-- _1: string (nullable = true)
| |-- _2: long (nullable = false)
df.where("B._2 = 3").show
+---+-----+
| A| B|
+---+-----+
| 2|[b,3]|
+---+-----+
实施例:
txt = "how does this work"
ngrams = ["ow ", "his", "s w"]
# first find where letters match ngrams
L = len(txt)
match = [False]*L
for ng in ngrams:
l = len(ng)
for i in range(L-l):
if txt[i:i+l] == ng:
for j in range(l):
match[i+j] = True
# then sandwich matches with quotes
out = []
switch = False
for i in range(L):
if not switch and match[i]:
out.append('<')
switch = True
if switch and not match[i]:
out.append('>')
switch = False
out.append(txt[i])
print "".join(out)