我正尝试将json文件加载到pyspark中,仅使用以下特定列
import { Component, OnInit } from '@angular/core';
import { Hero } from '../hero';
import { NgForm } from '@angular/forms'
import { UserService } from './user.service';
@Component({
selector: 'app-heroes',
templateUrl: './heroes.component.html',
styleUrls: ['./heroes.component.css']
})
export class HeroesComponent implements OnInit {
hero: Hero = {
FirstName : 'Racj',
LastName: 'xlkcj',
Email: 'rd@gmail.com',
Password: '103aa56',
Phone: '90632512',
CountryId:'1'
};
constructor(private userService: UserService ) {
}
ngOnInit() {
this.userService.registerUser()
.subscribe((data: any) => {
if (data.Succeeded == true) {
alert('User registration successful');
}
else
alert(data.Errors[0]);
});}
OnReset()
{
this.hero = {
FirstName: '',
LastName: '',
Phone: '',
Password: '',
Email: '',
CountryId:''
}}}
所以我开始为下面的主模式编写输入读取模式
df = spark.read.json("sample/json/", schema=schema)
我尝试编写直接字符串类型,但无法编写数组和结构类型
|-- test_name: string (nullable = true)
|-- test_file: string (nullable = true)
|-- test_id: string (nullable = true)
|-- test_type: string (nullable = true)
|-- test_url: string (nullable = true)
|-- test_ids: array (nullable = true)
| |-- element: string (containsNull = true)
|-- value: struct (nullable = true)
| |-- ct: long (nullable = true)
| |-- dimmingSetting: long (nullable = true)
| |-- hue: double (nullable = true)
| |-- modeId: string (nullable = true)
如何为
扩展此架构 schema = StructType([
StructField('test_name', StringType()),
StructField('test_file', StringType()),
StructField('test_id', StringType()),
StructField('test_type', StringType()),
StructField('test_url', StringType()),
])
答案 0 :(得分:2)
扩展版本应为
from pyspark.sql.types import StructType, StructField, StringType, ArrayType, LongType, DoubleType
schema = StructType([
StructField('test_name', StringType(), True),
StructField('test_file', StringType(), True),
StructField('test_id', StringType(), True),
StructField('test_type', StringType(), True),
StructField('test_url', StringType(), True),
StructField('test_ids', ArrayType(StringType(), True), True),
StructField('value', StructType([
StructField('ct', LongType(), True),
StructField('dimmingSetting', LongType(), True),
StructField('hue', DoubleType(), True),
StructField('modeId', StringType(), True)
])
)
])
我希望答案会有所帮助