错误:列的类型为时间戳,没有时区,但表达式的类型为double precision

时间:2019-10-06 00:31:21

标签: python pandas postgresql

以下代码...

import time
import numpy as np
import pyodbc
import datetime
import win32com.client
import pythoncom
import re
from sqlalchemy import create_engine, event
import pandas as pd
import runpy
import codecs
import collections
from multiprocessing import Process, Queue
from datetime import datetime, timezone, timedelta
import threading
import os
import csv
import warnings
import xlsxwriter
from threading import Timer
import psycopg2

warnings.filterwarnings("ignore")
import time
import win32com.client
import threading
from datetime import datetime, timezone, timedelta
import numpy as np
import pyodbc
import datetime
import pandas as pd
import runpy
import codecs
import collections
from multiprocessing import Process, Queue
import threading
from datetime import timedelta
import warnings
from win32com import client
from datetime import datetime
import os
#from docx import Document
#from docx.shared import Inches, Pt, Mm
import time
import numpy as np
import pyodbc
import datetime
import pandas as pd
import runpy
import codecs
import collections
from multiprocessing import Process, Queue
import threading
from datetime import timedelta
from datetime import datetime
import os
import time
import glob
import time
import time
from datetime import datetime, timezone, timedelta
import time
import win32com.client
import threading
from datetime import datetime, timezone, timedelta
import numpy as np
import pyodbc
import datetime
import pandas as pd
import runpy
import codecs
import collections
from multiprocessing import Process, Queue
import threading
from datetime import timedelta
import warnings
from win32com import client
from datetime import datetime
import os
#from docx import Document
#from docx.shared import Inches, Pt, Mm
import time
import numpy as np
import pyodbc
import datetime
import pandas as pd
import runpy
import codecs
import collections
from multiprocessing import Process, Queue
import threading
from datetime import timedelta
import warnings
from win32com import client
from datetime import datetime
import os
import time
import glob
import time
import time
from datetime import datetime, timezone, timedelta

warnings.filterwarnings("ignore")
kronos_df = pd.read_excel(r"\\teslamotors.com\us\Public\stalamakki\ExcelFiles\KronosDataHourlyRefresh.xls")

kronos_df.fillna('')
clockRecords = kronos_df.to_dict('records')

sqlUpsert = """
                        INSERT INTO "daily_performance_metrics"."employee_kronos_data_2"
                        VALUES (%s,%s,%s,%s,%s)
"""
#                        VALUES (%s,to_timestamp(%s, 'YY-MM-DD HH24:MI'),COALESCE(to_timestamp(NULLIF(%s, '01/01/01 00:00'),'MM/DD/YY hh24:mi')),%s,%s)

#sqlDelete = """
#        DELETE FROM "daily_performance_metrics"."employee_kronos_data" WHERE CustomerName='Alfreds Futterkiste';

postgres_conn = psycopg2.connect("host=sjc04p1scadb02.teslamotors.com dbname=service_warehouse_metrics user=service_warehouse_rw port=5432 password=gvjY96LcnWn2B3+obVjFsLG5erMy/4JNxgN00Lnq2n0=")
postgres_cursor = postgres_conn.cursor()
for record in clockRecords:
    if record['ShiftEnd'] == '':
       record['ShiftEnd'] = None
    if record['ShiftStart'] == '':
       record['ShiftStart'] = None
    postgres_cursor.execute(sqlUpsert,list(record.values()))
postgres_conn.commit()
postgres_cursor.close()
postgres_conn.close()

...当尝试写入我认为是具有空值的第一条记录时会生成此错误消息...

---------------------------------------------------------------------------
DatatypeMismatch                          Traceback (most recent call last)
<ipython-input-73-2ef7c8c3820c> in <module>()
     15     if record['ShiftStart'] == 'NaN':
     16        record['ShiftStart'] = None
---> 17 postgres_cursor.execute(sqlUpsert,list(record.values()))
     18 postgres_conn.commit()
     19 postgres_cursor.close()

DatatypeMismatch: column "shift_end" is of type timestamp without time zone but expression is of type double precision
LINE 3: ...              VALUES ('zvolkert','10/02/19 13:13','NaN'::flo...
                                                             ^
HINT:  You will need to rewrite or cast the expression.

为了处理Null,我已经尝试了INSERT语句的这种语法...

INSERT INTO "daily_performance_metrics"."employee_kronos_data_2"
VALUES (%s,to_timestamp(%s, 'YY-MM-DD HH24:MI'),COALESCE(to_timestamp(NULLIF(%s, '01/01/01 00:00'),'MM/DD/YY hh24:mi')),%s,%s)

...生成此错误消息...

InvalidTextRepresentation: invalid input syntax for type double precision: "01/01/01 00:00"
LINE 3: ...4:MI'),COALESCE(to_timestamp(NULLIF('NaN'::float, '01/01/01 ...

我假设这是一个非常简单的语法错误。如果有人能告诉我将这些字符串和空值添加到时间戳字段中的正确语法,我们将不胜感激。

我正在写这张桌子...

CREATE TABLE daily_performance_metrics.employee_kronos_data_5 (
    file_number TEXT
    ,shift_start TIMESTAMP
    ,shift_end TIMESTAMP
    ,job_category TEXT
    ,job_name TEXT
    )

这是我要复制到数据库中的文件:

ClockInOutRecords.xlsx

1 个答案:

答案 0 :(得分:0)

解决方案只是简单地用nan值代替。我以为我已经在这样做了,但是=='NaN'语法不正确。检查NaN的正确语法是pd.isna。固定代码如下...

sqlUpsert = """
                        INSERT INTO "daily_performance_metrics"."employee_kronos_data_2"                        
                        VALUES (%s,COALESCE(to_timestamp(%s, 'MM/DD/YY hh24:mi')),COALESCE(to_timestamp(%s, 'MM/DD/YY hh24:mi')),%s,%s)
"""
#                        VALUES (%s,%s,%s,%s,%s)
#                        VALUES (%s,to_timestamp(%s, 'YY-MM-DD HH24:MI'),COALESCE(to_timestamp(NULLIF(%s, '01/01/01 00:00'),'MM/DD/YY hh24:mi')),%s,%s)

postgres_conn = psycopg2.connect("host=sjc04p1scadb02.teslamotors.com dbname=service_warehouse_metrics user=service_warehouse_rw port=5432 password=gvjY96LcnWn2B3+obVjFsLG5erMy/4JNxgN00Lnq2n0=")
postgres_cursor = postgres_conn.cursor()

postgres_cursor.execute(sqlDelete)

for record in clockRecords:
#    print(list(record.values()))
    if pd.isna(record['ShiftEnd']):
#        print(record['ShiftEnd'])
        record['ShiftEnd'] = None
#        print(record['ShiftEnd'])
    if pd.isna(record['Job_Category']):
        record['Job_Category'] = None
    if pd.isna(record['Job_Name']):
        record['Job_Name'] = None
#    print(list(record.values()))
    postgres_cursor.execute(sqlUpsert,list(record.values()))
#    print('success')