我试图想象Galton Height Data。我已将数据复制并粘贴到txt文件中,并使用以下代码将其转换为.xlsx
文件:
with open('Galton_height_Data.txt','r') as f:
data = []
for i in f:
z = i.strip('\n')
z = z.split('\t')
data.append(z)
import pandas as pd
df = pd.DataFrame( data[1:], columns = data[0] )
df.to_excel('Galton_Height.xlsx')
现在,我想使用散点图来显示数据。我试图通过以下方式想象母亲的身高与孩子的身高:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
df = pd.read_excel("Galton_Height.xlsx")
ax = df.plot( kind = 'scatter' , x = df['Mother'], y = df['Height'])
返回以下错误:
`KeyError: '[ 67. 67. 67. 67. 66.5 66.5 66.5 66.5 64. 64. 64. 64.\n 64. 64. 64. 58.5 58.5 58.5 58.5 58.5 58.5 68. 68. 68.\n 68. 68. 68. 68. 66.5 66.5 66.5 66. 65.5 62. 62. 62.\n 62. 62. 62. 62. 62. 61. 67. 67. 66.5 66.5 66.5 65.\n 65. 65. 65. 65. 65. 65. 65. 65. 64.5 64.5 64.5 64.5\n 64.5 64.5 64. 64. 64. 63. 69. 69. 69. 69. 69. 69.\n 69. 69. 68. 68. 68. 67. 67. 67. 65. 65. 65. 65.\n 65. 65. 65. 65.5 64. 64. 63. 63. 63. 63. 63. 63.\n 63. 63. 63. 63. 63. 63. 63. 63. 63.5 63.5 63.5 62.\n 62. 62. 62. 62. 62. 62. 62. 62. 62. 62. 62. 62.\n 62. 62. 62. 62. 61. 69. 69. 69. 69. 69. 67. 67.\n 67. 67. 66. 66. 66. 66. 66. 66. 66. 66. 66. 66.\n 66. 66. 66. 66. 66. 66. 66. 65.5 65.5 65.5 65.5 65.5\n 65.5 65.5 65.5 65.5 65. 65. 65. 65. 65. 64. 64. 64.\n 64. 64. 64. 64. 64. 64.5 64.5 64.5 64.5 64. 64. 64.\n 64.5 64.5 64.5 64.5 64.5 64.5 64.5 63. 63. 63.5 63.5 63.5\n 63.5 63.5 63. 63. 63. 63. 63. 63. 63. 63. 63. 63.\n 63. 63. 63. 62. 62. 62. 62. 62. 62. 62. 62. 62.\n 62. 62.5 62.5 62.5 62.5 62.5 62. 62. 62. 62. 62. 62.\n 62. 61. 58. 58. 69. 69. 69. 69. 69. 69. 69. 69.\n 69. 69. 68. 67. 67. 67. 67. 67. 67. 66.5 66.5 66.5\n 66.5 66.5 66.5 66.5 66.5 66.5 66.5 66.5 65. 65. 65. 65.\n 65. 65. 65. 65. 65. 65. 65. 65. 65. 65. 65. 65.\n 65. 65. 65. 65. 65. 65. 65. 65. 65. 65. 65. 65.\n 65. 65. 65. 65. 65. 65. 65. 65. 65. 65. 65. 65.\n 64.7 64.7 64.7 64.7 64.7 64.7 64.7 64. 64. 64. 64. 64.\n 64. 64. 64. 64. 64. 64. 64.2 64.2 64.2 64.2 64.2 64.\n 64. 64. 64. 64. 64. 64. 64. 64.5 64. 64. 64. 64.\n 64. 64. 64. 64. 64. 64. 64. 64. 64. 63.7 63.7 63.7\n 63.7 63.7 63.7 63.7 63.7 63. 63. 63. 63. 63. 63.5 63.5\n 63.5 63.5 63. 63. 63. 63. 63. 63. 63. 63. 62. 62.\n 62. 62. 62. 62. 62. 62. 62.7 62.7 62.7 62.7 62.7 62.7\n 62.7 62. 62. 62. 61. 61. 60. 60. 60. 60. 60. 60.\n 58.5 58.5 58.5 58. 58. 58. 58. 58. 68.5 68.5 68.5 68.5\n 68.5 68.5 68.5 68.5 68.5 68.5 67. 66. 66. 66. 66. 66.\n 66. 66. 66. 66. 66. 66. 66.7 66.7 66.7 66.7 66.7 66.7\n 66. 66. 66. 66. 66. 66. 66.5 66.5 66.5 66.5 66.5 66.5\n 66.5 66.5 66.5 66.5 66.5 66.5 66.5 66.5 66.5 66. 66. 66.\n 66. 66. 66. 66. 66. 66. 66. 66. 66. 66. 66. 66.\n 66. 65. 65. 65. 65. 65. 65. 65. 64.5 64.5 64.5 64.5\n 64.5 64.5 64.5 64. 64. 64. 64. 63. 63. 63. 63. 63.\n 63. 63. 63. 63. 63. 63.5 63.5 63.5 63.5 63.5 63.5 63.5\n 63.5 63.5 63.5 62. 62. 62. 62. 62. 62. 62. 62. 62.\n 62.5 62.5 62.5 62.5 62.5 62.5 62.5 62.5 62. 62. 62. 62.\n 61. 61. 61. 61. 61. 61. 61. 61. 61. 61. 61. 61.\n 61. 61. 60. 60. 60. 60. 60. 60. 60. 60.5 70.5 70.5\n 67. 67. 67. 66.5 66.5 66.5 66.5 66.5 66.5 66.5 66.5 66.5\n 66.5 65. 65. 65.5 65.5 65.5 65.5 65.5 65.5 65.5 65.5 65.5\n 65. 65. 65. 65. 65. 65. 65. 65. 65. 65. 65. 65.\n 64. 64. 64. 64. 64. 64. 64. 64. 64. 64. 64. 64.\n 64. 64. 64. 64. 64. 64. 64. 64.5 64. 64. 64. 64.\n 64. 64. 64. 64. 64. 64. 63. 63. 63. 63. 63. 63.\n 63. 63. 63.5 63.5 63.5 63.5 63. 63. 63. 63. 63. 63.\n 63. 63. 63. 63. 63. 63. 63. 63. 63. 63. 63. 63.\n 63. 63.5 63. 63.5 63.5 63.5 63.5 63.5 62.5 62. 62. 62.5\n 61. 61. 61. 61. 61. 60.2 60. 60. 60. 60. 60. 60.\n 60. 60. 60. 60. 60. 59. 59. 59. 59. 59. 59. 59.\n 59. 59. 59. 59. 66.2 66.2 66.2 66.2 66.2 66.5 65. 65.\n 65. 65. 65. 65. 65.5 65.5 65.5 65.5 65.5 65.5 65.5 65.5\n 65.5 65. 65. 65. 65. 65. 65. 65. 65. 65. 65. 65.\n 65. 65. 65. 64. 64. 64. 64. 63.5 63.5 63.5 63.5 63.5\n 63.5 63.5 63.5 63. 63. 63. 62. 62. 62. 62. 62. 61.\n 67. 67. 67. 67. 67. 67. 67. 67. 67. 67. 67. 67.\n 67. 67. 67. 67. 67. 66. 66. 66. 66. 66. 66. 66.\n 66. 66. 66. 66. 65. 65. 65. 65. 65. 65. 65. 65.\n 65.5 65.5 65.5 65.5 65.5 63. 63.5 63.5 63. 63. 63. 63.\n 63. 63. 62.5 62.5 62.5 62.5 62.5 62.5 62.5 61.5 60. 60.\n 60. 60. 60. 59. 59. 59. 59. 59. 59. 59. 59. 59.\n 59. 59. 59. 59. 59. 59. 67. 67. 67. 67. 67. 66.\n 66. 66. 66. 65. 65. 65. 65. 65. 65. 65. 65. 65.\n 65.5 65.5 65. 65. 65. 65. 65. 65. 64. 64. 64. 64.\n 64. 64. 63. 63. 63. 63. 63. 63. 63. 63. 63. 60.\n 60. 60. 60. 60. 64. 64. 64. 64. 64. 64. 64. 64.\n 64. 64. 64. 64. 64. 64. 63. 60. 60. 66. 66. 66.\n 63. 63. 65. 65. 65. 65. 65. 65. 65. 65. ] not in index'
`
这是母亲的身高数据。看起来有些值有一个' \ n'但我认为当我转换为xlsx文件时我已经处理好了。
可能会发生什么?
答案 0 :(得分:2)
为了它的价值,您可以使用pandas
解析器来读取该文件。
df = pd.read_csv('Galton_height_Data.txt', delim_whitespace=True)
对于您的情节,将名称列传递到plot
方法。
df.plot(kind='scatter', x='Mother', y='Height')