很抱歉,如果已经提出并解决了这个问题。我花了很多时间试图解决看似简单的问题。 首先是错误:
提高ValueError(“输入数组必须为一维”) ValueError:输入数组必须为一维
导致错误的代码:
bins=pd.IntervalIndex.from_tuples([(29,35),(35,55),(55,80)])
pd.cut(np.array(heart_data.Age),bins=bins,labels=["Youth","Mid-Age","Old"])
我的数据样本(感谢克里斯·阿的提示):
{('Age',): {204: 62, 159: 56, 219: 48, 174: 60, 184: 50, 295: 63, 269: 56, 119: 46, 193: 60, 154: 39, 51: 66, 249: 69, 278: 58, 229: 64, 208: 49, 302: 57, 58: 34, 220: 63, 18: 43, 228: 59, 11: 48, 300: 68, 70: 54, 146: 44, 122: 41}, ('Sex',): {204: 'F', 159: 'M', 219: 'M', 174: 'M', 184: 'M', 295: 'M', 269: 'M', 119: 'F', 193: 'M', 154: 'F', 51: 'M', 249: 'M', 278: 'F', 229: 'M', 208: 'M', 302: 'F', 58: 'M', 220: 'F', 18: 'M', 228: 'M', 11: 'F', 300: 'M', 70: 'M', 146: 'F', 122: 'F'}, ('ChestPain',): {204: 0, 159: 1, 219: 0, 174: 0, 184: 0, 295: 0, 269: 0, 119: 0, 193: 0, 154: 2, 51: 0, 249: 2, 278: 1, 229: 2, 208: 2, 302: 1, 58: 3, 220: 0, 18: 0, 228: 3, 11: 2, 300: 0, 70: 2, 146: 2, 122: 2}, ('RestingBP',): {204: 160, 159: 130, 219: 130, 174: 130, 184: 150, 295: 140, 269: 130, 119: 138, 193: 145, 154: 138, 51: 120, 249: 140, 278: 136, 229: 125, 208: 120, 302: 130, 58: 118, 220: 150, 18: 150, 228: 170, 11: 130, 300: 144, 70: 120, 146: 118, 122: 112}, ('Chol',): {204: 164, 159: 221, 219: 256, 174: 206, 184: 243, 295: 187, 269: 283, 119: 243, 193: 282, 154: 220, 51: 302, 249: 254, 278: 319, 229: 309, 208: 188, 302: 236, 58: 182, 220: 407, 18: 247, 228: 288, 11: 275, 300: 193, 70: 258, 146: 242, 122: 268}, ('FastingBS',): {204: 0, 159: 0, 219: 1, 174: 0, 184: 0, 295: 0, 269: 1, 119: 0, 193: 0, 154: 0, 51: 0, 249: 0, 278: 1, 229: 0, 208: 0, 302: 0, 58: 0, 220: 0, 18: 0, 228: 0, 11: 0, 300: 1, 70: 0, 146: 0, 122: 0}, ('RestECG',): {204: 0, 159: 0, 219: 0, 174: 0, 184: 0, 295: 0, 269: 0, 119: 0, 193: 0, 154: 1, 51: 0, 249: 0, 278: 0, 229: 1, 208: 1, 302: 0, 58: 0, 220: 0, 18: 1, 228: 0, 11: 1, 300: 1, 70: 0, 146: 1, 122: 0}, ('maxHR',): {204: 145, 159: 163, 219: 150, 174: 132, 184: 128, 295: 144, 269: 103, 119: 152, 193: 142, 154: 152, 51: 151, 249: 146, 278: 152, 229: 131, 208: 139, 302: 174, 58: 174, 220: 154, 18: 171, 228: 159, 11: 139, 300: 141, 70: 147, 146: 149, 122: 172}, ('ExerIndAng',): {204: 0, 159: 0, 219: 1, 174: 1, 184: 0, 295: 1, 269: 1, 119: 1, 193: 1, 154: 0, 51: 0, 249: 0, 278: 0, 229: 1, 208: 0, 302: 0, 58: 0, 220: 0, 18: 0, 228: 0, 11: 0, 300: 0, 70: 0, 146: 0, 122: 1}, ('STDepre',): {204: 6.2, 159: 0.0, 219: 0.0, 174: 2.4, 184: 2.6, 295: 4.0, 269: 1.6, 119: 0.0, 193: 2.8, 154: 0.0, 51: 0.4, 249: 2.0, 278: 0.0, 229: 1.8, 208: 2.0, 302: 0.0, 58: 0.0, 220: 4.0, 18: 1.5, 228: 0.2, 11: 0.2, 300: 3.4, 70: 0.4, 146: 0.3, 122: 0.0}, ('Slope',): {204: 0, 159: 2, 219: 2, 174: 1, 184: 1, 295: 2, 269: 0, 119: 1, 193: 1, 154: 1, 51: 1, 249: 1, 278: 2, 229: 1, 208: 1, 302: 1, 58: 2, 220: 1, 18: 2, 228: 1, 11: 2, 300: 1, 70: 1, 146: 1, 122: 2}, ('Majorvessels',): {204: 3, 159: 0, 219: 2, 174: 2, 184: 0, 295: 2, 269: 0, 119: 0, 193: 2, 154: 0, 51: 0, 249: 3, 278: 2, 229: 0, 208: 3, 302: 1, 58: 0, 220: 3, 18: 0, 228: 0, 11: 0, 300: 2, 70: 0, 146: 1, 122: 0}, ('Thal',): {204: 3, 159: 3, 219: 3, 174: 3, 184: 3, 295: 3, 269: 3, 119: 2, 193: 3, 154: 2, 51: 2, 249: 3, 278: 2, 229: 3, 208: 3, 302: 2, 58: 2, 220: 3, 18: 2, 228: 3, 11: 2, 300: 3, 70: 3, 146: 2, 122: 2}, ('Target',): {204: 0, 159: 1, 219: 0, 174: 0, 184: 0, 295: 0, 269: 0, 119: 1, 193: 0, 154: 1, 51: 1, 249: 0, 278: 0, 229: 0, 208: 0, 302: 0, 58: 1, 220: 0, 18: 1, 228: 0, 11: 1, 300: 0, 70: 1, 146: 1, 122: 1}}
样本数据:
Age Sex ChestPain RestingBP Chol FastingBS RestECG maxHR ExerIndAng STDepre \
0 63 M 3 145 233 1 0 150 0 2.3
1 37 M 2 130 250 0 1 187 0 3.5
2 41 F 1 130 204 0 0 172 0 1.4
3 56 M 1 120 236 0 1 178 0 0.8
Slope Majorvessels Thal Target
0 0 0 1 1
1 0 0 2 1
2 2 0 2 1
3 2 0 2 1
可能是什么问题?我尝试了很多事情,在最终决定提出要求之前先看了源代码。预先感谢。
答案 0 :(得分:2)
这可能是由于您拥有MultiIndex
列标题。如果我平整列,则此代码对我有用:
heart_data = pd.DataFrame({('Age',): {204: 62, 159: 56, 219: 48, 174: 60, 184: 50, 295: 63, 269: 56, 119: 46, 193: 60, 154: 39, 51: 66, 249: 69, 278: 58, 229: 64, 208: 49, 302: 57, 58: 34, 220: 63, 18: 43, 228: 59, 11: 48, 300: 68, 70: 54, 146: 44, 122: 41}, ('Sex',): {204: 'F', 159: 'M', 219: 'M', 174: 'M', 184: 'M', 295: 'M', 269: 'M', 119: 'F', 193: 'M', 154: 'F', 51: 'M', 249: 'M', 278: 'F', 229: 'M', 208: 'M', 302: 'F', 58: 'M', 220: 'F', 18: 'M', 228: 'M', 11: 'F', 300: 'M', 70: 'M', 146: 'F', 122: 'F'}, ('ChestPain',): {204: 0, 159: 1, 219: 0, 174: 0, 184: 0, 295: 0, 269: 0, 119: 0, 193: 0, 154: 2, 51: 0, 249: 2, 278: 1, 229: 2, 208: 2, 302: 1, 58: 3, 220: 0, 18: 0, 228: 3, 11: 2, 300: 0, 70: 2, 146: 2, 122: 2}, ('RestingBP',): {204: 160, 159: 130, 219: 130, 174: 130, 184: 150, 295: 140, 269: 130, 119: 138, 193: 145, 154: 138, 51: 120, 249: 140, 278: 136, 229: 125, 208: 120, 302: 130, 58: 118, 220: 150, 18: 150, 228: 170, 11: 130, 300: 144, 70: 120, 146: 118, 122: 112}, ('Chol',): {204: 164, 159: 221, 219: 256, 174: 206, 184: 243, 295: 187, 269: 283, 119: 243, 193: 282, 154: 220, 51: 302, 249: 254, 278: 319, 229: 309, 208: 188, 302: 236, 58: 182, 220: 407, 18: 247, 228: 288, 11: 275, 300: 193, 70: 258, 146: 242, 122: 268}, ('FastingBS',): {204: 0, 159: 0, 219: 1, 174: 0, 184: 0, 295: 0, 269: 1, 119: 0, 193: 0, 154: 0, 51: 0, 249: 0, 278: 1, 229: 0, 208: 0, 302: 0, 58: 0, 220: 0, 18: 0, 228: 0, 11: 0, 300: 1, 70: 0, 146: 0, 122: 0}, ('RestECG',): {204: 0, 159: 0, 219: 0, 174: 0, 184: 0, 295: 0, 269: 0, 119: 0, 193: 0, 154: 1, 51: 0, 249: 0, 278: 0, 229: 1, 208: 1, 302: 0, 58: 0, 220: 0, 18: 1, 228: 0, 11: 1, 300: 1, 70: 0, 146: 1, 122: 0}, ('maxHR',): {204: 145, 159: 163, 219: 150, 174: 132, 184: 128, 295: 144, 269: 103, 119: 152, 193: 142, 154: 152, 51: 151, 249: 146, 278: 152, 229: 131, 208: 139, 302: 174, 58: 174, 220: 154, 18: 171, 228: 159, 11: 139, 300: 141, 70: 147, 146: 149, 122: 172}, ('ExerIndAng',): {204: 0, 159: 0, 219: 1, 174: 1, 184: 0, 295: 1, 269: 1, 119: 1, 193: 1, 154: 0, 51: 0, 249: 0, 278: 0, 229: 1, 208: 0, 302: 0, 58: 0, 220: 0, 18: 0, 228: 0, 11: 0, 300: 0, 70: 0, 146: 0, 122: 1}, ('STDepre',): {204: 6.2, 159: 0.0, 219: 0.0, 174: 2.4, 184: 2.6, 295: 4.0, 269: 1.6, 119: 0.0, 193: 2.8, 154: 0.0, 51: 0.4, 249: 2.0, 278: 0.0, 229: 1.8, 208: 2.0, 302: 0.0, 58: 0.0, 220: 4.0, 18: 1.5, 228: 0.2, 11: 0.2, 300: 3.4, 70: 0.4, 146: 0.3, 122: 0.0}, ('Slope',): {204: 0, 159: 2, 219: 2, 174: 1, 184: 1, 295: 2, 269: 0, 119: 1, 193: 1, 154: 1, 51: 1, 249: 1, 278: 2, 229: 1, 208: 1, 302: 1, 58: 2, 220: 1, 18: 2, 228: 1, 11: 2, 300: 1, 70: 1, 146: 1, 122: 2}, ('Majorvessels',): {204: 3, 159: 0, 219: 2, 174: 2, 184: 0, 295: 2, 269: 0, 119: 0, 193: 2, 154: 0, 51: 0, 249: 3, 278: 2, 229: 0, 208: 3, 302: 1, 58: 0, 220: 3, 18: 0, 228: 0, 11: 0, 300: 2, 70: 0, 146: 1, 122: 0}, ('Thal',): {204: 3, 159: 3, 219: 3, 174: 3, 184: 3, 295: 3, 269: 3, 119: 2, 193: 3, 154: 2, 51: 2, 249: 3, 278: 2, 229: 3, 208: 3, 302: 2, 58: 2, 220: 3, 18: 2, 228: 3, 11: 2, 300: 3, 70: 3, 146: 2, 122: 2}, ('Target',): {204: 0, 159: 1, 219: 0, 174: 0, 184: 0, 295: 0, 269: 0, 119: 1, 193: 0, 154: 1, 51: 1, 249: 0, 278: 0, 229: 0, 208: 0, 302: 0, 58: 1, 220: 0, 18: 1, 228: 0, 11: 1, 300: 0, 70: 1, 146: 1, 122: 1}})
print(heart_data.columns)
MultiIndex(levels=[['Age', 'ChestPain', 'Chol', 'ExerIndAng', 'FastingBS', 'Majorvessels', 'RestECG', 'RestingBP', 'STDepre', 'Sex', 'Slope', 'Target', 'Thal', 'maxHR']],
codes=[[0, 9, 1, 7, 2, 4, 6, 13, 3, 8, 10, 5, 12, 11]])
# Flatten column MultiIndex
heart_data.columns = [x[0] for x in heart_data.columns]
bins=pd.IntervalIndex.from_tuples([(29,35),(35,55),(55,80)])
pd.cut(np.array(heart_data.Age),bins=bins,labels=["Youth","Mid-Age","Old"])
[出]
[(35, 55], (35, 55], (55, 80], (29, 35], (35, 55], ..., (55, 80], (55, 80], (55, 80], (55, 80], (55, 80]]
Length: 25
Categories (3, interval[int64]): [(29, 35] < (35, 55] < (55, 80]]