我对字节和熊猫很陌生,
我有这样的数据
但不确定如何转换为数据框。
data=[b"{'metricValue': 5.0, 'appMetadata':{'index': 'cfs_planum_metrics_debug_86188', 'host': 'iaasn00041949', 'job': 'splunk_scraper'}, 'timestampEpochSecond': 1544651948897, 'metricName': 'splunk_logs_tstats_count_per_min', 'metricType': 'count', 'metricTags': {'source': '/opt/splunk/etc/apps/PlanumComputeMetrics/bin/logs/DECOInstance2.log', 'query_timestamp': '2018-12-12T16:43:40.000-05:00'}}", b"{'metricValue': 4.0, 'appMetadata': {'index': 'cfs_digital_88082', 'host': 'dgt01p01tx5l046', 'job': 'splunk_scraper'}, 'timestampEpochSecond': 1544651948462, 'metricName': 'splunk_logs_tstats_count_per_min', 'metricType': 'count', 'metricTags': {'source': '/logs/apache24inst0/httpds0_access.log', 'query_timestamp': '2018-12-12T16:43:50.000-05:00'}}"]
感谢您的帮助
答案 0 :(得分:1)
给出数据的结构
{
'metricValue': 5.0,
'appMetadata': {
'index': 'cfs_planum_metrics_debug_86188',
'host': 'iaasn00041949',
'job': 'splunk_scraper'
},
'timestampEpochSecond': 1544651948897,
'metricName': 'splunk_logs_tstats_count_per_min',
'metricType': 'count',
'metricTags': {
'source': '/opt/splunk/etc/apps/PlanumComputeMetrics/bin/logs/DECOInstance2.log',
'query_timestamp': '2018-12-12T16:43:40.000-05:00'
}
}, {
'metricValue': 4.0,
'appMetadata': {
'index': 'cfs_digital_88082',
'host': 'dgt01p01tx5l046',
'job': 'splunk_scraper'
},
'timestampEpochSecond': 1544651948462,
'metricName': 'splunk_logs_tstats_count_per_min',
'metricType': 'count',
'metricTags': {
'source': '/logs/apache24inst0/httpds0_access.log',
'query_timestamp': '2018-12-12T16:43:50.000-05:00'
}
}
可以
# Convert your data from list of bytes into a list of strings
list_of_string = list(map(lambda d: d.decode('utf-8'), data))
# Parse the list of strings into a list of dictionaries
from ast import literal_eval
list_of_dicts = list(map(literal_eval, list_of_string))
# Convert the list to a DataFrame
df = pd.DataFrame(list_of_dicts)
# Convert appMetadata to a DataFrame
app_metadata = pd.concat(df['appMetadata']
.apply(pd.DataFrame.from_dict, orient='index')
.apply(lambda x: x.T)
.to_dict()).reset_index(level=1, drop=True)
# Convert metricTags to a DataFrame
metric_tags = pd.concat(df['metricTags']
.apply(pd.DataFrame.from_dict, orient='index')
.apply(lambda x: x.T)
.to_dict()).reset_index(level=1, drop=True)
# Join everything back to the original DataFrame
df = df.join(app_metadata).drop('appMetadata', axis=1)
df = df.join(metric_tags).drop('metricTags', axis=1)
或者,或者
# Flatten the dictionaries
def dict_flatten(d):
for key in d:
val = d[key]
if isinstance(val, dict):
for sub_key in val:
yield sub_key, val[sub_key]
else:
yield key, val
flat_dicts = list(map(dict, map(dict_flatten, list_of_dicts)))
# Convert the list of flattened dictionaries to a DataFrame
df = pd.DataFrame(flat_dicts)
两者都会(最多按列顺序)
metricName metricType metricValue timestampEpochSecond ... query_timestamp index host job
0 splunk_logs_tstats_count_per_min count 5.0 1544651948897 ... 2018-12-12T16:43:40.000-05:00 cfs_planum_metrics_debug_86188 iaasn00041949 splunk_scraper
1 splunk_logs_tstats_count_per_min count 4.0 1544651948462 ... 2018-12-12T16:43:50.000-05:00 cfs_digital_88082 dgt01p01tx5l046 splunk_scraper