我有一个问题, pandas to_sql 函数没有将正确的dtypes放入 SQLite 3 数据库。 它会自动检测类型,并忽略提供的词典中指定的类型。 我尝试了很多变体或类型,例如“ int ”,“ 整数”,“ 浮点”,“ 真实< / strong>”,“ 浮动”,试图直接显示它们或使用 sqlalchemy.types 方法显示它们。
def generate_dtypes_for_sql(filename, separator, decimal, skip_errors, quoting, engine, shape):
df2 = pd.DataFrame()
if os.path.isfile(filename):
try:
df = load_csv(filename, separator, decimal, skip_errors, quoting, engine, shape)
params_to_import = {}
cols = df.columns
i_arc = 7; i_name = 6; i_type = 3; i_factor = 5
params_types = ['boolean', 'integer', 'float', 'text']
if (i_arc==cols.get_loc('Архивация') and
i_name==cols.get_loc('Символьный тэг') and
i_type==cols.get_loc('Тип')):
for index, row in df.iterrows():
if row[i_arc] == 1:
if math.isnan(row[i_type]):
params_to_import[row[i_name]] = params_types[3]
elif row[i_type] in range(6):
if row[i_factor] == 1:
params_to_import[row[i_name]] = params_types[1]
else:
params_to_import[row[i_name]] = params_types[2]
elif row[i_type] == 6:
params_to_import[row[i_name]] = params_types[2]
else:
params_to_import[row[i_name]] = params_types[3]
df2 = pd.DataFrame([params_to_import])
df2.T.to_csv("params_to_import.csv", sep=";", index_label="Name", header=['Type'])
except LoadCsvError as e:
click.echo("Could not load {}: {}".format(filename, e), err=True)
return df2
def sqlcol(dfparam):
dtypedict = {}
for index, values in dfparam.items():
for value in values:
if value == "boolean":
dtypedict.update({index: sqlalchemy.types.Boolean()})
elif value == "integer":
dtypedict.update({index: sqlalchemy.types.Integer()})
elif value == "float":
dtypedict.update({index: sqlalchemy.types.Float()})
elif value == "text":
dtypedict.update({index: sqlalchemy.types.Text()})
return dtypedict
df_for_sql = generate_dtypes_for_sql(types_file, separator, decimal, skip_errors, quoting, engine, shape)
df_dtypes = sqlcol(df_for_sql)
conn = sqlite3.connect(dbname, detect_types=sqlite3.PARSE_DECLTYPES)
df.to_sql(df.name, conn, if_exists="append", index=False, dtype=df_dtypes_str)
解决方案: 我不知道为什么,但是 pandas to_sql 函数仅在我将其与标志: if_exists =“ append” 一起使用时才忽略 dtype 。但是,如果我将其与标志 if_exists =“ replace” 一起使用,则可以正常运行。
答案 0 :(得分:2)
这里的问题不是熊猫忽略了dtype=
参数,而是告诉to_sql
if_exists="append"
表已经存在,所以列类型(实际上是SQLite中的“亲和力”)已在数据库中定义。该测试代码表明,如果该表尚不存在,那么使用dtype=
参数确实会产生所需的结果:
import pandas as pd
import sqlalchemy as sa
connection_uri = "sqlite:///C:/__tmp/SQLite/walmart.sqlite"
engine = sa.create_engine(connection_uri)
def drop_table(table_name, engine):
with engine.connect() as conn:
conn.execute(sa.text(f'DROP TABLE IF EXISTS "{table_name}"'))
df = pd.read_csv(r"C:\Users\Gord\Desktop\test.csv")
print(df)
"""
All_HY_SP1 All_HY_SP2
0 1 1.1
1 2 2.2
"""
# default behaviour
drop_table("from_csv", engine)
df.to_sql("from_csv", engine, if_exists="append", index=False)
tbl = sa.Table("from_csv", sa.MetaData(), autoload_with=engine)
print(", ".join([f'"{col.name}": {col.type}' for col in tbl.columns]))
# "All_HY_SP1": BIGINT, "All_HY_SP2": FLOAT
# ^^^^^^
# fix with dtype:
dtype_dict = {"All_HY_SP1": sa.Float, "All_HY_SP2": sa.Float}
drop_table("from_csv", engine)
df.to_sql("from_csv", engine, if_exists="append", index=False, dtype=dtype_dict)
tbl = sa.Table("from_csv", sa.MetaData(), autoload_with=engine)
print(", ".join([f'"{col.name}": {col.type}' for col in tbl.columns]))
# "All_HY_SP1": FLOAT, "All_HY_SP2": FLOAT
# ^^^^^