Question

我有一个问题， pandas to_sql 函数没有将正确的dtypes放入 SQLite 3 数据库。它会自动检测类型，并忽略提供的词典中指定的类型。我尝试了很多变体或类型，例如“ int ”，“ 整数”，“ 浮点”，“ 真实< / strong>”，“ 浮动”，试图直接显示它们或使用 sqlalchemy.types 方法显示它们。

Issue

def generate_dtypes_for_sql(filename, separator, decimal, skip_errors, quoting, engine, shape): df2 = pd.DataFrame() if os.path.isfile(filename): try: df = load_csv(filename, separator, decimal, skip_errors, quoting, engine, shape) params_to_import = {} cols = df.columns i_arc = 7; i_name = 6; i_type = 3; i_factor = 5 params_types = ['boolean', 'integer', 'float', 'text'] if (i_arc==cols.get_loc('Архивация') and i_name==cols.get_loc('Символьный тэг') and i_type==cols.get_loc('Тип')): for index, row in df.iterrows(): if row[i_arc] == 1: if math.isnan(row[i_type]): params_to_import[row[i_name]] = params_types[3] elif row[i_type] in range(6): if row[i_factor] == 1: params_to_import[row[i_name]] = params_types[1] else: params_to_import[row[i_name]] = params_types[2] elif row[i_type] == 6: params_to_import[row[i_name]] = params_types[2] else: params_to_import[row[i_name]] = params_types[3] df2 = pd.DataFrame([params_to_import]) df2.T.to_csv("params_to_import.csv", sep=";", index_label="Name", header=['Type']) except LoadCsvError as e: click.echo("Could not load {}: {}".format(filename, e), err=True) return df2 def sqlcol(dfparam): dtypedict = {} for index, values in dfparam.items(): for value in values: if value == "boolean": dtypedict.update({index: sqlalchemy.types.Boolean()}) elif value == "integer": dtypedict.update({index: sqlalchemy.types.Integer()}) elif value == "float": dtypedict.update({index: sqlalchemy.types.Float()}) elif value == "text": dtypedict.update({index: sqlalchemy.types.Text()}) return dtypedict df_for_sql = generate_dtypes_for_sql(types_file, separator, decimal, skip_errors, quoting, engine, shape) df_dtypes = sqlcol(df_for_sql) conn = sqlite3.connect(dbname, detect_types=sqlite3.PARSE_DECLTYPES) df.to_sql(df.name, conn, if_exists="append", index=False, dtype=df_dtypes_str)

解决方案： 我不知道为什么，但是 pandas to_sql 函数仅在我将其与标志： if_exists =“ append” 一起使用时才忽略 dtype 。但是，如果我将其与标志 if_exists =“ replace” 一起使用，则可以正常运行。

Answer 1

这里的问题不是熊猫忽略了dtype=参数，而是告诉to_sql if_exists="append" 表已经存在，所以列类型（实际上是SQLite中的“亲和力”）已在数据库中定义。该测试代码表明，如果该表尚不存在，那么使用dtype=参数确实会产生所需的结果：

import pandas as pd
import sqlalchemy as sa

connection_uri = "sqlite:///C:/__tmp/SQLite/walmart.sqlite"
engine = sa.create_engine(connection_uri)

def drop_table(table_name, engine):
    with engine.connect() as conn:
        conn.execute(sa.text(f'DROP TABLE IF EXISTS "{table_name}"'))

df = pd.read_csv(r"C:\Users\Gord\Desktop\test.csv")
print(df)
"""
   All_HY_SP1  All_HY_SP2
0           1         1.1
1           2         2.2
"""
# default behaviour
drop_table("from_csv", engine)
df.to_sql("from_csv", engine, if_exists="append", index=False)
tbl = sa.Table("from_csv", sa.MetaData(), autoload_with=engine)
print(", ".join([f'"{col.name}": {col.type}' for col in tbl.columns]))
# "All_HY_SP1": BIGINT, "All_HY_SP2": FLOAT
#               ^^^^^^
# fix with dtype:
dtype_dict = {"All_HY_SP1": sa.Float, "All_HY_SP2": sa.Float}
drop_table("from_csv", engine)
df.to_sql("from_csv", engine, if_exists="append", index=False, dtype=dtype_dict)
tbl = sa.Table("from_csv", sa.MetaData(), autoload_with=engine)
print(", ".join([f'"{col.name}": {col.type}' for col in tbl.columns]))
# "All_HY_SP1": FLOAT, "All_HY_SP2": FLOAT
#               ^^^^^

熊猫的to_sql函数未将正确的dtypes放入SQLite DB

1 个答案: