我有一个仅包含数据但没有列名的CSV文件,现在我想通过使用此CSV文件的数据在Spark中创建一个数据框,并为其创建架构(列名和数据类型) 。我的代码如下:
import 'package:flutter/foundation.dart';
import 'package:flutter/material.dart';
import 'package:intl/intl.dart';
import 'package:vartest/sqlitedatabase/DatabaseBloc.dart';
import 'carts.dart';
import 'new_cart_form.dart';
import 'item_storage.dart';
import 'shopping_list.dart';
void main() {
runApp( MaterialApp(
title: 'Einkaufsliste',
home: MyApp(storage: ItemStorage()),
),);
}
class MyApp extends StatefulWidget {
final ItemStorage storage;
MyApp({Key key, @required this.storage}) : super(key: key);
@override
MyAppState createState() {
return MyAppState();
}
}
class MyAppState extends State<MyApp> {
final bloc = CartsBloc();
final blocCartItems = CartsBloc();
ItemStorage storage;
int id;
double sum;
@override
void dispose() {
bloc.dispose();
blocCartItems.dispose();
super.dispose();
}
var refreshKey = GlobalKey<RefreshIndicatorState>();
@override
void initState() {
super.initState();
bloc.getCarts();
sum = 0.0;
}
@override
Widget build(BuildContext context) {
final title = 'Einkaufsliste';
return MaterialApp(
title: title,
theme: ThemeData(
primarySwatch: Colors.blue,
),
home: Scaffold(
appBar: AppBar(
title: Text(title),
),
body: StreamBuilder<List<Carts>>(
stream: bloc.carts,
builder: (BuildContext context, AsyncSnapshot<List<Carts>> snapshot) {
if (snapshot.hasData) {
return ListView.builder(
itemCount: snapshot.data.length,
itemBuilder: (context, index) {
Carts cart = snapshot.data[index];
sum = 0.0;
return Dismissible(
key: UniqueKey(),
onDismissed: (direction) {
bloc.delete(cart.id);
Scaffold.of(context)
.showSnackBar(SnackBar(content: Text(cart.shopName + " vom entfernt")));
},
background: Container(color: Colors.red),
child: ListTile(
leading: Text(sum.toStringAsFixed(2)),
title: Text(cart.shopName),
subtitle: Text(DateFormat('dd.MM.yyyy').format(new DateTime.fromMillisecondsSinceEpoch(cart.createdDate, isUtc : false))),
onTap: () async {
await Navigator.of(context).push(MaterialPageRoute(
builder: (context) => ShoppingList(cart: cart),
));
}
),
);
},
);}else {
return Center(child: CircularProgressIndicator());
}}),
floatingActionButton: FloatingActionButton(
onPressed: _showNewCartForm,
tooltip: 'Increment',
backgroundColor: Colors.green,
child: Icon(Icons.camera,),
),
),
);
}
Future _showNewCartForm() async {
Carts newCart = await Navigator.of(context).push(
MaterialPageRoute(
builder: (BuildContext context) {
return AddCartFormPage();
},
),
);
if (newCart != null) {
setState(() {
bloc.add(newCart);
bloc.getCarts();
});
}
}
}
我想知道我需要在代码中添加哪些命令以在数据框中构建模式?
答案 0 :(得分:0)
没有模式时,spark将列命名为c0,c1,c2,依此类推。阅读csv时必须提供架构。请查看此link,这将帮助您解决问题。
答案 1 :(得分:0)
如果要提供模式,则必须提供列名。因此,动态地,您只能推断出数据类型。要添加列名,我想必须将其修复,但是没有其他方法可以添加有意义的列名,而无需亲自添加它们。
您只需要通过Structfield
构造函数传递的StructType
序列或类似的集合。
import org.apache.spark.sql._
val yourSchema =
StructType(
StructField("colA”, IntegerType, true) ::
StructField("colB”, LongType, false) ::
StructField("colC”, BooleanType, false) :: Nil)