import pandas as pd
import numpy as np
import sys
auto = pd.read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data",
names=['MPG', 'Cylinders', 'Displacement', 'Horse power',
'Weight', 'Acceleration', 'Model Year', 'Origin', 'Car Name']
)
auto.head()
我需要清理这些数据,但我不断提出这个问题并需要一些帮助。初学者在这里,我无法弄清楚
答案 0 :(得分:2)
如果查看文件,分隔符不是常量,而是空格的变体。 sep ='\ s +'给出了所需的输出。
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
df = pd.read_csv(url, sep = '\s+',names = ['MPG','Cylinders','Displacement','Horse power','Weight','Acceleration','Model Year','Origin','Car Name'])
df.head()
MPG Cylinders Displacement Horse power Weight Acceleration Model Year Origin Car Name
0 18 8 307 130.0 3504 12.0 70 1 chevrolet chevelle malibu
1 15 8 350 165.0 3693 11.5 70 1 buick skylark 320
2 18 8 318 150.0 3436 11.0 70 1 plymouth satellite
3 16 8 304 150.0 3433 12.0 70 1 amc rebel sst
4 17 8 302 140.0 3449 10.5 70 1 ford torino
答案 1 :(得分:1)
使用delim_whitespace
参数:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
cols = ['MPG', 'Cylinders', 'Displacement', 'Horse power', 'Weight',
'Acceleration', 'Model Year', 'Origin', 'Car Name']
auto = pd.read_csv(url, names=cols, delim_whitespace=True)
auto.head()
Out:
MPG Cylinders Displacement Horse power Weight Acceleration \
0 18.0 8 307.0 130.0 3504.0 12.0
1 15.0 8 350.0 165.0 3693.0 11.5
2 18.0 8 318.0 150.0 3436.0 11.0
3 16.0 8 304.0 150.0 3433.0 12.0
4 17.0 8 302.0 140.0 3449.0 10.5
Model Year Origin Car Name
0 70 1 chevrolet chevelle malibu
1 70 1 buick skylark 320
2 70 1 plymouth satellite
3 70 1 amc rebel sst
4 70 1 ford torino