python pandas ValueError:值的长度与索引的长度不匹配

时间:2018-10-03 07:18:26

标签: python pandas

此代码的目的是刮起一堆长度不同(每张表的行数不同)的数据表,将它们变成pandas数据帧,删除一些不必要的列并确定日期。

以上所有方法都可以正常工作,除了确定日期。

我试图通过使用重建的日期创建一个新列来完全重建日期。

但是当我尝试这段代码时:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
import pandas as pd
from datetime import datetime
from tzlocal import get_localzone

class DataEngine:
    def __init__(self):
        self.urls = open(r"C:\Users\Sayed\Desktop\script\sample.txt").readlines()
        self.driver = webdriver.Chrome(r"D:\Projects\Tutorial\Driver\chromedriver.exe")
        self.wait = WebDriverWait(self.driver, 10)

    def title(self):
        names = []
        for url in self.urls:
            self.driver.get(url)
            title = self.driver.find_element_by_xpath('//*[@id="leftColumn"]/h1').text
            names.append(title)
        return names

    def table(self):
        DataFrames = []
        for url in self.urls:
            self.driver.get(url)
            while True:
                try:
                    item = self.wait.until(
                        ec.visibility_of_element_located((By.XPATH, '//*[contains(@id,"showMoreHistory")]/a')))
                    self.driver.execute_script("arguments[0].click();", item)
                except Exception:
                    break

            df = pd.DataFrame(columns=['Release Date', 'Time', 'Actual', 'Forecast', 'Previous'])
            pos = 0
            for table in self.wait.until(
                    ec.visibility_of_all_elements_located((By.XPATH, '//*[contains(@id,"eventHistoryTable")]//tr'))):
                data = [item.text for item in table.find_elements_by_xpath(".//*[self::td]")]
                if data:
                    df.loc[pos] = data[0:5]
                    pos += 1
            df = df.head(50)
            DataFrames.append(df)
        return DataFrames

    def date(self):
        Dates = []
        dfs = []
        tables = self.table()
        for df in tables:
            df["Date"] = df["Release Date"].apply(lambda date: date[:12]) + " " + df["Time"]
            for date in df["Date"]:
                date = datetime.strptime(date.strip(), '%b %d, %Y %H:%M')
                Dates.append(date)
            df["Date"] = Dates
            df['Date'] = df['Date'].dt.tz_localize('EST').dt.tz_convert(get_localzone())


            df = df[['Date', 'Actual', 'Forecast', 'Previous', 'Release Date', 'Time']]
            df = df.drop(df.columns[-4:], axis=1).reset_index(drop=True)
            print(df.head())

            dfs.append(df)
        return dfs 

我收到此错误:

  

回溯(最近通话最近一次):

     

文件“ D:/Projects/Tutorial/database.py”,第94行,在       DataEngine()。date()

     

文件“ D:/Projects/Tutorial/database.py”,第57行,日期       df [“ Date”] =日期

     

文件   “ C:\ Users \ Sayed \ Anaconda3 \ lib \ site-packages \ pandas \ core \ frame.py”,   第3119行,在 setitem

self._set_item(key, value)
     

文件   “ C:\ Users \ Sayed \ Anaconda3 \ lib \ site-packages \ pandas \ core \ frame.py”,   第3194行,位于_set_item

value = self._sanitize_column(key, value)
     

文件   “ C:\ Users \ Sayed \ Anaconda3 \ lib \ site-packages \ pandas \ core \ frame.py”,   _sanitize_column中的第3391行

value = _sanitize_index(value, self.index, copy=False)
     

文件   “ C:\ Users \ Sayed \ Anaconda3 \ lib \ site-packages \ pandas \ core \ series.py”,   第4001行,位于_sanitize_index

raise ValueError('Length of values does not match length of ' 'index')
     

ValueError:值的长度与索引的长度不匹配

0 个答案:

没有答案