我正在编写的脚本从API请求新闻文章元数据。作为响应,它收到包含若干新闻文章的结果页面。它旨在一次处理一个记录,从json字典中提取数据字段并将其插入到postgres中。
但是,当我运行插入操作时,该函数返回:
CompileError: Unconsumed column names: urlToImage, publishedAt
如何使此插入操作起作用?
任何帮助将不胜感激!
这是我的代码:
from sqlalchemy import MetaData # for getting table metadata
from sqlalchemy import Table # for interacting with tables
from sqlalchemy import create_engine # for creating db engine
from sqlalchemy.dialects import postgresql
from sqlalchemy.dialects.postgresql import insert # for getting alterate query method to work
# Create DB engine
engine = create_engine('postgresql+psycopg2://{user}:{password}@{hostip}/{db}'.format(**dbkeys))
# Get metadata objects for tables in database
metadata = MetaData(engine, reflect=True)
nstream = metadata.tables['nstream']
for item in response_page['articles']:
# Convert datetime strings from api into Python datetime format
dtp = datetime.strptime(item['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
inserttw = nstream.insert().values(source_id = item['source']['id'],
source_name = item['source']['name'],
author = item['author'],
title = item['title'],
description = item['description'],
url = item['url'],
urlToImage = item['urlToImage'],
publishedAt = dtp,
uploaded2db = datetime.now(),
content = item['content'])
engine.execute(inserttw)
完整的追溯:
CompileError Traceback (most recent call last)
<ipython-input-10-c5f4a6bff45e> in <module>
63 # 2. If query has more than one page, get additional pages
64
---> 65 get_results(tfrom, engine = engine, max_retries = 5)
<ipython-input-10-c5f4a6bff45e> in get_results(tfrom, engine, max_retries)
39
40 # Append the results to the database using the helper
---> 41 process_page(results)
42
43 # If there is an exception, add to the retry counter and then sleep.
<ipython-input-10-c5f4a6bff45e> in process_page(response_page)
26 uploaded2db = datetime.now(),
27 content = item['content'])
---> 28 engine.execute(inserttw)
29
30 def get_results(tfrom, engine = engine, max_retries = 5):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, statement, *multiparams, **params)
2073
2074 connection = self.contextual_connect(close_with_result=True)
-> 2075 return connection.execute(statement, *multiparams, **params)
2076
2077 def scalar(self, statement, *multiparams, **params):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, object, *multiparams, **params)
946 raise exc.ObjectNotExecutableError(object)
947 else:
--> 948 return meth(self, multiparams, params)
949
950 def _execute_function(self, func, multiparams, params):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/elements.py in _execute_on_connection(self, connection, multiparams, params)
267 def _execute_on_connection(self, connection, multiparams, params):
268 if self.supports_execution:
--> 269 return connection._execute_clauseelement(self, multiparams, params)
270 else:
271 raise exc.ObjectNotExecutableError(self)
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_clauseelement(self, elem, multiparams, params)
1051 inline=len(distilled_params) > 1,
1052 schema_translate_map=self.schema_for_object
-> 1053 if not self.schema_for_object.is_default else None)
1054
1055 ret = self._execute_context(
<string> in <lambda>(self, bind, dialect, **kw)
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/elements.py in compile(self, default, bind, dialect, **kw)
440 else:
441 dialect = default.StrCompileDialect()
--> 442 return self._compiler(dialect, bind=bind, **kw)
443
444 def _compiler(self, dialect, **kw):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/elements.py in _compiler(self, dialect, **kw)
446 Dialect."""
447
--> 448 return dialect.statement_compiler(dialect, self, **kw)
449
450 def __str__(self):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/compiler.py in __init__(self, dialect, statement, column_keys, inline, **kwargs)
451 # dialect.label_length or dialect.max_identifier_length
452 self.truncated_names = {}
--> 453 Compiled.__init__(self, dialect, statement, **kwargs)
454
455 if (
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/compiler.py in __init__(self, dialect, statement, bind, schema_translate_map, compile_kwargs)
217 if self.can_execute:
218 self.execution_options = statement._execution_options
--> 219 self.string = self.process(self.statement, **compile_kwargs)
220
221 @util.deprecated("0.7", ":class:`.Compiled` objects now compile "
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/compiler.py in process(self, obj, **kwargs)
243
244 def process(self, obj, **kwargs):
--> 245 return obj._compiler_dispatch(self, **kwargs)
246
247 def __str__(self):
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/visitors.py in _compiler_dispatch(self, visitor, **kw)
79 raise exc.UnsupportedCompilationError(visitor, cls)
80 else:
---> 81 return meth(self, **kw)
82 else:
83 # The optimization opportunity is lost for this case because the
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/compiler.py in visit_insert(self, insert_stmt, asfrom, **kw)
2057
2058 crud_params = crud._setup_crud_params(
-> 2059 self, insert_stmt, crud.ISINSERT, **kw)
2060
2061 if not crud_params and \
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/crud.py in _setup_crud_params(compiler, stmt, local_stmt_type, **kw)
55 try:
56 if local_stmt_type in (ISINSERT, ISUPDATE):
---> 57 return _get_crud_params(compiler, stmt, **kw)
58 finally:
59 if should_restore:
~/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/crud.py in _get_crud_params(compiler, stmt, **kw)
144 raise exc.CompileError(
145 "Unconsumed column names: %s" %
--> 146 (", ".join("%s" % c for c in check))
147 )
148
CompileError: Unconsumed column names: urlToImage, publishedAt
答案 0 :(得分:1)
原来的问题是我把大写的列名大写错误。
我通过使用SQLalchemy's inspector function来解决这个问题。列名在那里,它们只是小写。
from sqlalchemy import create_engine
from sqlalchemy.engine import reflection
insp = reflection.Inspector.from_engine(engine)
print(insp.get_columns(nstream))
混淆的根源是,当您在Postgres中创建表时,除非您在命名它们时使用引号,否则Postgres将自动lower case your column names。
这是插入命令的工作版本:
inserttw = nstream.insert().values(source_id = item['source']['id'],
source_name = item['source']['name'],
author = item['author'],
title = item['title'],
description = item['description'],
url = item['url'],
urltoimage = item['urlToImage'],
publishedat = dtp,
uploaded2db = datetime.now(),
content = item['content'])