我是cassandra python模块的新手,所以我正在尝试使用batchstatement将json文件插入到表中,但是出现了'KeyError:0'错误,我知道所提供的文档太多了。但是我几乎尝试了所有一切,但不理解我的错误。请帮忙!!!!!!
import json
import logging
from cassandra.cluster import Cluster
import os
from uuid import uuid4
from cassandra.cluster import Cluster, BatchStatement
from cassandra import ConsistencyLevel
from myencoder import MyEncoder
import logging
import re
import ast
def parsing():
with open('dfs.json', 'r', encoding="utf8") as json_file:
data = json.load(json_file)
aboutlegacy = data['aboutLegacy']
accomplishments = data['accomplishments']
profilealternative = data['profileAlternative']
educations = data['educations']
profileLegacy = data['profileLegacy']
peopleAlsoviewed = data['peopleAlsoViewed']
positions = data['positions']
skills = data['skills']
recommendations = data['recommendations']
volunteerExperience = data['volunteerExperience']
profile = data['profile']
idd = uuid4()
query = """
INSERT INTO profile (id,profilelegacy,profilealternative,aboutlegacy,positions,educations,skills,recommendations,accomplishments,peoplealsoviewed,volunteerExperience,profile)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?);"""
insert_user = session.prepare(query)
batch = BatchStatement(consistency_level=ConsistencyLevel.ONE)
batch.add(insert_user, (idd, profileLegacy, profilealternative, aboutlegacy, positions, educations,
skills, recommendations, accomplishments, peopleAlsoviewed, volunteerExperience, profile,))
log = logging.getLogger()
log.info('Batch Insert Completed')
session.execute(batch)
if __name__ == "__main__":
cluster = Cluster(['127.0.0.1'], port=9042)
session = cluster.connect('profiles', wait_for_all_pools=True)
session.execute('USE profiles')
parsing()
此错误产生了:
File "cassandratest2.py", line 61, in <module>
parsing()
File "cassandratest2.py", line 51, in parsing
skills, recommendations, accomplishments, peopleAlsoviewed, volunteerExperience, profile,))
File "C:\Python\Python37\lib\site-packages\cassandra\query.py", line 815, in add
bound_statement = statement.bind(() if parameters is None else parameters)
File "C:\Python\Python37\lib\site-packages\cassandra\query.py", line 501, in bind
return BoundStatement(self).bind(values)
File "C:\Python\Python37\lib\site-packages\cassandra\query.py", line 627, in bind
self.values.append(col_spec.type.serialize(value, proto_version))
File "C:\Python\Python37\lib\site-packages\cassandra\cqltypes.py", line 723, in serialize
return cls.serialize_safe(val, protocol_version)
File "C:\Python\Python37\lib\site-packages\cassandra\cqltypes.py", line 942, in serialize_safe
item = val[i]
KeyError: 0
这是json文件,只是格式,因为它包含机密信息
{
"profileLegacy": {
"name": "",
"headline": "",
"location": "",
"connections": 0,
"summary": ""
},
"profileAlternative": {
"name": "",
"headline": "",
"location": "",
"connections": 0
},
"aboutLegacy": {
"text": ""
},
"positions": [{
"org": "",
"title": "",
"end": "",
"start": "",
"desce": ""
}],
"educations": [{
"major": "",
"end": "",
"name": "",
"degree": "Maestr\u00eda en Finanzas",
"start": "",
"desce": ""
}],
"skills": [
"Key Account Development",
"Strategic Planning"
],
"recommendations": {
"givenCount": "0",
"receivedCount": "0",
"given": [],
"received": []
},
"accomplishments": [],
"peopleAlsoViewed": [{
"url": "",
"id": ""
}],
"volunteerExperience": [],
"profile": {
"name": "",
"headline": "",
"location": "",
"connections": 0
}
}
这是cqlsh中的创建表查询
CREATE TABLE profile (
id uuid PRIMARY KEY,
profilelegacy frozen<profilelegacy>,
profilealternative frozen<profilelaternative>,
aboutlegacy text,
positions list<frozen<positions>>,
educations set<frozen<educations>>,
skills list<text>,
recommendations frozen<recommendations>,
accomplishments list<text>,
peoplealsoviewed list<frozen<peoplealsoviewed>>,
volunteerExperience list<text>,
profile frozen<profilelaternative>
) WITH bloom_filter_fp_chance = 0.01
AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
AND comment = ''
AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
AND crc_check_chance = 1.0
AND dclocal_read_repair_chance = 0.1
AND default_time_to_live = 0
AND gc_grace_seconds = 864000
AND max_index_interval = 2048
AND memtable_flush_period_in_ms = 0
AND min_index_interval = 128
AND read_repair_chance = 0.0
AND speculative_retry = '99PERCENTILE';
and these are the declared types
CREATE TYPE profiles.peoplealsoviewed (
url text,
id text
);
CREATE TYPE profiles.profilelegacy (
name text,
headline text,
location text,
connections int,
summary text
);
CREATE TYPE profiles.positions (
org text,
title text,
end text,
start text,
desce text
);
CREATE TYPE profiles.recommendations (
givencount text,
receivedcount text,
given frozen<list<text>>,
received frozen<list<text>>
);
CREATE TYPE profiles.skills (
title text,
count text
);
CREATE TYPE profiles.educations (
major text,
end text,
name text,
degree text,
start text,
desce text
);
CREATE TYPE profiles.profilelaternative (
name text,
headline text,
location text,
connections int
);
答案 0 :(得分:1)
当Cassandra尝试根据用户定义的类型进行填充时,它期望tuple
值的类型与该类型字段的顺序相同,例如对于profilelegacy
,它需要(name, headline, location, connections, summary)
,或一个对象,该对象的属性与所讨论的名称匹配(例如,某些对象x
可以检索到{{1} },然后依次是x.name
等。
您正在加载一个JSON文件,该文件将JSON对象解码为Python x.headline
(使用基于键的查找),而不是Python对象(使用基于属性的查找)。您需要从dict
转换为适当的dict
或具有适当属性的对象。
可能最简单的方法是use types.SimpleNamespace
,使您可以从任意tuple
中制造对象。导入dict
并将types
更改为以下内容:
json.load
将查找结果从data = json.load(json_file, object_hook=lambda d: types.SimpleNamespace(**d))
样式更改为对象样式:
dict
应该使您更接近解决方案。