我有一些我想要组合在一起的数据:
Serial_Num Latitude Longitude
1950004S11059 -11.1 59.1
1950004S11059 -11.6 57.8
1950004S11059 -12.4 56
1950004S11059 -13.2 54.6
1950004S11059 -13.8 53.8
1950004S11059 -14.8 52.7
1950004S11059 -15.9 52
1950004S11059 -18.3 52.4
1950004S11059 -20 54
1950004S11059 -22.1 55.9
1950004S11059 -26.2 59.8
1950012S14150 -14 146.9
1950012S14150 -14.4 145.8
1950012S14150 -14.9 145.4
1950012S14150 -15.8 145.6
1950012S14150 -18.9 149.1
1950012S14150 -22.3 152.5
1950013S14139 -16 139
1950013S14139 -16.3 139
简单地说,对于每个唯一的Serial_Num
,我想要坐标。我期待类似的东西:
1950004S11059: {"GPS": (-11.1 , 59.1) , (-11.6, 57.8) , (-12.4, 56), ..., (-26.2, 59.8)}
然后我可以遍历每个GPS
的{{1}}坐标并绘制。
我有一些我在其他地方使用的脚本,但主要依赖于.csv数据用于设置字典,Serial_Num
为关键字。
但是,csv中的数据是顺序的,序列很重要。
为每个Serial_Num
输出坐标列表的顺序是什么?按照它们在CSV中的顺序?
编辑:我现在正在看Pandas,因为它有Serial_Num
方法可能会有所帮助。
答案 0 :(得分:2)
<强>鉴于强>
名为fileprivate struct DateWrapper: Decodable {
var date: Date
private enum TimeCodingKeys: String, CodingKey {
case datetime
}
init(from decoder: Decoder) throws {
let dateString: String
if let timeContainer = try? decoder.container(keyedBy: TimeCodingKeys.self) {
dateString = try timeContainer.decode(String.self, forKey: .datetime)
} else {
let container = try decoder.singleValueContainer()
if let string = try? container.decode(String.self) {
dateString = string
} else {
date = try container.decode(Date.self)
return
}
}
if let date = Utils.date(from: dateString) {
self.date = date
} else if let date = Utils.date(from: dateString, with: "yyyy-MM-dd") {
self.date = date
} else {
let context = DecodingError.Context(codingPath: decoder.codingPath, debugDescription: "Date format was not parseable.")
throw DecodingError.dataCorrupted(context)
}
}
}
extension KeyedDecodingContainer {
func decode(_ type: Date.Type, forKey key: K) throws -> Date {
return try self.decode(DateWrapper.self, forKey: key).date
}
func decode(_ type: [Date].Type, forKey key: K) throws -> [Date] {
var container = try nestedUnkeyedContainer(forKey: key)
var dates: [Date] = []
while !container.isAtEnd {
dates.append(try container.decode(Date.self))
}
return dates
}
}
extension UnkeyedDecodingContainer {
mutating func decode(_ type: Date.Type) throws -> Date {
return try self.decode(DateWrapper.self).date
}
}
的文件:
foo.csv
一些代码将数据解析为(序列,坐标)的元组:
Serial_Num Latitude Longitude
1950004S11059 -11.1 59.1
1950004S11059 -11.6 57.8
1950004S11059 -12.4 56
1950004S11059 -13.2 54.6
1950004S11059 -13.8 53.8
1950004S11059 -14.8 52.7
1950004S11059 -15.9 52
1950004S11059 -18.3 52.4
1950004S11059 -20 54
1950004S11059 -22.1 55.9
1950004S11059 -26.2 59.8
1950012S14150 -14 146.9
1950012S14150 -14.4 145.8
1950012S14150 -14.9 145.4
1950012S14150 -15.8 145.6
1950012S14150 -18.9 149.1
1950012S14150 -22.3 152.5
1950013S14139 -16 139
1950013S14139 -16.3 139
<强>代码强>
我们构建了defaultdicts的嵌套:
import csv
import collections as ct
def read_file(fname):
with open(fname) as f:
reader = csv.reader(f)
next(reader)
for line in reader:
#line = [x for x in line[0].split(" ") if x]
yield line[0], tuple(map(float, line[1:]))
输出
data = ct.defaultdict(dict)
for serial, coords in (read_file("foo.csv")):
if serial not in data:
dd = ct.defaultdict(list)
dd["GPS"].append(coords)
data[serial] = dd
dict(data)
答案 1 :(得分:2)
这是一种方式。下面每个步骤的细分。
import pandas as pd
df = pd.read_csv('file.csv', delim_whitespace=True)
df['GPS'] = list(zip(df.Latitude, df.Longitude))
df.groupby('Serial_Num')['GPS'].apply(list).to_dict()
阅读数据
df = pd.read_csv('file.csv', delim_whitespace=True)
# Serial_Num Latitude Longitude
# 0 1950004S11059 -11.1 59.1
# 1 1950004S11059 -11.6 57.8
# 2 1950004S11059 -12.4 56.0
# 3 1950004S11059 -13.2 54.6
# 4 1950004S11059 -13.8 53.8
# 5 1950004S11059 -14.8 52.7
制作元组列
df['GPS'] = list(zip(df.Latitude, df.Longitude))
# Serial_Num Latitude Longitude GPS
# 0 1950004S11059 -11.1 59.1 (-11.1, 59.1)
# 1 1950004S11059 -11.6 57.8 (-11.6, 57.8)
# 2 1950004S11059 -12.4 56.0 (-12.4, 56.0)
# 3 1950004S11059 -13.2 54.6 (-13.2, 54.6)
# 4 1950004S11059 -13.8 53.8 (-13.8, 53.8)
# 5 1950004S11059 -14.8 52.7 (-14.8, 52.7)
创建字典
df.groupby('Serial_Num')['GPS'].apply(list).to_dict()
# {'1950004S11059': [(-11.1, 59.100000000000001),
# (-11.6, 57.799999999999997),
# (-12.4, 56.0),
# (-13.199999999999999, 54.600000000000001),
# (-13.800000000000001, 53.799999999999997),
# (-14.800000000000001, 52.700000000000003),
# (-15.9, 52.0),
# (-18.300000000000001, 52.399999999999999),
# (-20.0, 54.0),
# (-22.100000000000001, 55.899999999999999),
# (-26.199999999999999, 59.799999999999997)],
# '1950012S14150': [(-14.0, 146.90000000000001),
# (-14.4, 145.80000000000001),
# (-14.9, 145.40000000000001),
# (-15.800000000000001, 145.59999999999999),
# (-18.899999999999999, 149.09999999999999),
# (-22.300000000000001, 152.5)],
# '1950013S14139': [(-16.0, 139.0), (-16.300000000000001, 139.0)]}