Pandas将Dataframe转换为嵌套Json

时间:2014-05-10 03:43:05

标签: python json dictionary pandas

我的问题基本上与这个问题相反:

Create a Pandas DataFrame from deeply nested JSON

我想知道是否可以做相反的事情。给出如下表格:

     Library  Level           School Major  2013 Total
200  MS_AVERY  UGRAD  GENERAL STUDIES  GEST        5079
201  MS_AVERY  UGRAD  GENERAL STUDIES  HIST           5
202  MS_AVERY  UGRAD  GENERAL STUDIES  MELC           2
203  MS_AVERY  UGRAD  GENERAL STUDIES  PHIL          10
204  MS_AVERY  UGRAD  GENERAL STUDIES  PHYS           1
205  MS_AVERY  UGRAD  GENERAL STUDIES  POLS          53

是否可以生成嵌套的dict(或JSON),如:

字典:

{'MS_AVERY': 
    { 'UGRAD' :
        {'GENERAL STUDIES' : {'GEST' : 5}
                             {'MELC' : 2}

 ...

4 个答案:

答案 0 :(得分:6)

创建一个函数似乎不难给出DataFrame对象给出的递归字典:

def fdrec(df):
    drec = dict()
    ncols = df.values.shape[1]
    for line in df.values:
        d = drec
        for j, col in enumerate(line[:-1]):
            if not col in d.keys():
                if j != ncols-2:
                    d[col] = {}
                    d = d[col]
                else:
                    d[col] = line[-1]
            else:
                if j!= ncols-2:
                    d = d[col]
    return drec

将产生:

{'MS_AVERY':
    {'UGRAD':
        {'GENERAL STUDIES': {'PHYS': 1L, 
                             'POLS': 53L,
                             'PHIL': 10L,
                             'HIST': 5L,
                             'MELC': 2L,
                             'GEST': 5079L}}}}

答案 1 :(得分:1)

这是我在处理this question时出现的解决方案:

def rollup_to_dict_core(x, values, columns, d_columns=None):
    if d_columns is None:
        d_columns = []

    if len(columns) == 1:
        if len(values) == 1:
            return x.set_index(columns)[values[0]].to_dict()
        else:
            return x.set_index(columns)[values].to_dict(orient='index')
    else:
        res = x.groupby([columns[0]] + d_columns).apply(lambda y: rollup_to_dict_core(y, values, columns[1:]))
        if len(d_columns) == 0:
            return res.to_dict()
        else:
            res.name = columns[1]
            res = res.reset_index(level=range(1, len(d_columns) + 1))
            return res.to_dict(orient='index')

def rollup_to_dict(x, values, d_columns=None):
    if d_columns is None:
        d_columns = []

    columns = [c for c in x.columns if c not in values and c not in d_columns]
    return rollup_to_dict_core(x, values, columns, d_columns)

>>> pprint(rollup_to_dict(df, ['2013 Total']))
{'MS_AVERY': {'UGRAD': {'GENERAL STUDIES': {'GEST': 5079,
                                            'HIST': 5,
                                            'MELC': 2,
                                            'PHIL': 10,
                                            'PHYS': 1,
                                            'POLS': 53}}}}

答案 2 :(得分:0)

  const showContent = () => {
    Animated.parallel([
      Animated.timing(height, {
        toValue: 1,
        duration: 1000,
        easing: Easing.linear,
        useNativeDriver: false, // <-- neccessary
      }),
      Animated.timing(opacity, {
        toValue: 1,
        duration: 1000,
        easing: Easing.linear,
        useNativeDriver: false, // <-- neccessary
      }),
    ]).start();
  };

  const dismissContent = () => {
    Animated.timing(opacity, {
      toValue: 0,
      duration: 1000,
      easing: Easing.linear,
      useNativeDriver: false,
    }).start(() => {
      Animated.timing(height, {
        toValue: 0,
        duration: 1000,
        easing: Easing.linear,
        useNativeDriver: false,
      }).start();
    });
  };

  useEffect(() => {
    if (props.text !== '' ) showContent();
  }, [props.text]);

  const maxHeight = height.interpolate({
    inputRange: [0, 1],
    outputRange: [0, 1200], // <-- value that larger than your content's height
  });

  if (props.text === null || props.text === undefined) return null;

  return (
    <ScrollView style={styles.box}>
      <Text style={{ marginTop: 40 }}>Contribution</Text>
      <Animated.View style={{ opacity: opacity, maxHeight: maxHeight }}>
        <Text style={styles.content}>{props.text}</Text>
      </Animated.View>
      <View style={styles.spacing}>
        <Button title="Show content" onPress={showContent} />
        <Button title="Hide content" onPress={dismissContent} />
      </View>
    </ScrollView>
  );
};

它将产生:

key = ['Library', 'Level', 'School']
series = (df.groupby(key, sort=False)[df.columns.difference(key)]
            .apply(lambda x: x[['Major', '2013 Total']].to_dict('records'))
         )

# build: {Major: Total}
major = {}
values = series.values[0]
for i in range(len(values)):
    major.update({values[i]['Major']: values[i]['2013 Total']})

# build the recursive dictionary
index = series.index[0]
d = {}
for i in reversed(range(len(index))):
    if not bool(d):
        d = {index[i]: major}
    else:
        d = {index[i]: d}
print(json.dumps(d, indent=2))

答案 3 :(得分:0)

这是生成此格式的通用方法,可能是其他人正在寻找的方法。所需格式:

{ "data": 
   [
        {
            "NAME": [1, 2, 3]
        },
        {
            "NAME": [1, 2, 3]
        },
    ]
}

要做到这一点:

import json
jsonstr = '{"data":['
for (columnName, columnData) in df.iteritems():
    jsonstr+='{"'
    jsonstr+=columnName
    jsonstr+='":'
    jsonstr+=json.dumps(list(columnData.values))
    jsonstr+='},'
jsonstr = jsonstr[:-1]
jsonstr+=']}'
jsonobject = json.loads(jsonstr)
jsonobject