我编写了一个API,它将目录作为输入,并将该文件夹(及其子文件夹)中的每个文本文件(.txt)加载到Postgres数据库中。
API适用于几个文件(大约3个),但是当它下载到“读取”第四个文件时,无论文件如何,程序都会崩溃。我甚至将文件分成三个块,然后API停止了。这是我得到的错误:
ERROR:tornado.application:Uncaught exception
Traceback (most recent call last):
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/tornado/http1connection.py", line 237, in _read_message
delegate.finish()
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/tornado/httpserver.py", line 230, in finish
self.request_callback(self.request)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/tornado/wsgi.py", line 276, in __call__
WSGIContainer.environ(request), start_response)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/bottle.py", line 979, in __call__
return self.wsgi(environ, start_response)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/bottle.py", line 954, in wsgi
out = self._cast(self._handle(environ))
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/bottle.py", line 862, in _handle
return route.call(**args)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/bottle.py", line 1740, in wrapper
rv = callback(*a, **ka)
File "my_api_clientes_cuentas.py", line 63, in read_txt
index=False)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/pandas/core/generic.py", line 1534, in to_sql
chunksize=chunksize, dtype=dtype)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site- packages/pandas/io/sql.py", line 473, in to_sql
chunksize=chunksize, dtype=dtype)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/pandas/io/sql.py", line 1156, in to_sql
table.insert(chunksize)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/pandas/io/sql.py", line 670, in insert
self._execute_insert(conn, keys, chunk_iter)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/pandas/io/sql.py", line 645, in _execute_insert
conn.execute(self.insert_statement(), data)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 948, in execute
return meth(self, multiparams, params)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/sqlalchemy/sql/elements.py", line 269, in _execute_on_connection
return connection._execute_clauseelement(self, multiparams, params)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1060, in _execute_clauseelement
compiled_sql, distilled_params
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1200, in _execute_context
context)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1416, in _handle_dbapi_exception
util.reraise(*exc_info)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 187, in reraise
raise value
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1170, in _execute_context
context)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py", line 683, in do_executemany
cursor.executemany(statement, parameters)
File "/home/rvelez/.pyenv/versions/3.6.0/lib/python3.6/encodings/utf_8.py", line 15, in decode
def decode(input, errors='strict'):
KeyboardInterrupt
我也得到curl: (52) Empty reply from server
这是代码:
@apiR2A.route('/api/lectura', method=['POST'])
def read_txt():
#get_postgres_connection()
arch = [] #list of succesfully read files
arch_err = [] #list of files with errors
con = create_connection() #creating connection with db
archivos = request.query.archivos
for root, dirs, files in os.walk(archivos):
for file in files: #looping through all the files inside root
if file.endswith(".txt"): #only looking for text files
#creating dataframe
df = pd.read_csv(os.path.join(root, file), encoding="utf-8", sep="|", header=None)
if len(df.columns) is not 12: #verifying num of columns
print("WARNING: File {} has wrong format\n".format(os.path.join(root, file)))
arch_err.append(file)
continue
sleep(10)
df.columns = ["1","2","3","4","5","6","7","8","9","10","11","12"] #setting column names
print('Reading: {}'.format(os.path.join(root, file)))
#adding df to db
df.to_sql('FBDClientesCuentas',
con,
if_exists='append',
index=False)
print('{} succesfully added to db.\n'.format(file))
sleep(5)
arch.append(file)
if len(arch) > 0:
print('Files loaded to database:')
for x in arch:
print(x)
if len(arch_err) > 0:
print('\nUnread files:')
for x in arch_err:
print(x)
编辑:我似乎是问题的根源,API的运行方式如下:
if __name__ == '__main__':
apiR2A.run( server='tornado', host='0.0.0.0', port=3000, reloader=True)
问题在于我选择运行API的服务器
答案 0 :(得分:0)
将服务器从Tornado更改为Paste,API能够将所有文件加载到数据库中而不会出现任何问题。
if __name__ == '__main__':
apiR2A.run( server='paste', host='0.0.0.0', port=3000, reloader=False)