PyMySQL INSERT INTO查询忽略/切断查询未运行

时间:2019-08-28 10:02:10

标签: python mysql amazon-web-services lambda pymysql

我有两个具有两个不同conn和游标的INSERT INTO查询,sql_into_scores查询运行并将值(分数和统计输出)添加到DB,但是sql_into_metrics没有。两者都应向数据库添加值列表。它告诉我sql_into_metrics查询中存在一个问题(也单独检查了另一个查询)。所有这些都在lambda函数中运行并将结果存储在RDS数据库中。 MySQL版本是5.7。

connect(用户等。)全部正常,因为我还完全删除了sql_into_metrics查询,并且sql_into_scores查询中的值存储在AWS RDS中。

从s3中提取数据(在.txt文件中),并通过read_files函数将其读入两个numpy数组中。文本文件中数据的示例:

{“ entity_id”:[42、11、12],“得分”:[95、95、30],“ TruePos”:[0],“ FalsePos”:[1],“ FalseNeg”:[0 ],“ TrueNeg”:[0],“ score_overall”:[0.0],“ precision”:[0.0],“ recall”:[0.0],“ fscore”:[0.0],“ support”:[0.0]}

我已经尝试了以下方法来修复为什么不执行指标查询的原因:

1)修改将(metric_list)中添加了值的列表更改为以下格式:

- [ , , , ]
- ( , , , )
- [ [ , , , ] ]

2)将列表/元组内的值更改为全部浮动(然后分别更改VALUES(%f,...)。

3)我已经尝试将inscurs设置为inscurs.execute和inscurs.executemany

4)对于这两个查询都具有conn和inscurs,并且仅对于sql_into_metrics查询具有第二个conn_2和inscurs_2。

def lambda_handler(event, context):

    '''
    Take the metric data collected after each run and
    store in RDS for performance tracking internally.
    '''

    # Insert in database
    conn = pymysql.connect(user=DB_USERNAME, password=DB_PASSWORD, host=DB_HOST, database=DB_DATABASE)
    conn_2 = pymysql.connect(user=DB_USERNAME, password=DB_PASSWORD, host=DB_HOST, database=DB_DATABASE)

    sql_into_metrics = "INSERT INTO dealscore_metrics_main (score_overall, precision, recall, fscore, support, true_pos, false_pos, false_neg, true_neg) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"
    sql_into_scores = "INSERT INTO dealscore_scores (entity_id, score) VALUES (%s,%s)"

    inscurs = pymysql.cursors.Cursor(conn)
    inscurs_2 = pymysql.cursors.Cursor(conn_2)
    try:
        print("Adding the latest metric data into RDS...")
        score_data_s3,metric_data_s3 = read_files(prefix=prefix_to_metrics, bucket=BUCKET)
        metric_list = metric_data_s3.tolist()
        # Commented out last modifications to fix the problem(which didnt work, this was the last try, before reverting back to output now)
        # metric_list = [item for t in metric_list for item in t]
        # metric_list = [float(x) for x in metric_list]
        # print(type(metric_list[0]))

        score_data_s3 = score_data_s3.tolist()

        print("This is metric_list:", metric_list)
        print("This is score_data_s3:", score_data_s3)
        print(conn)
        try:
            inscurs_2.executemany(sql_into_metrics, metric_list)
            #inscurs_1.execute(sql_into_metrics, metric_list)
        except:
            print("did not upload")
            raise 

        inscurs.executemany(sql_into_scores, score_data_s3)
        print(inscurs)
        print(inscurs_2)

        conn.commit()
        conn_2.commit()

        print("Now deleting the file from s3")
        delete_s3_file(prefix=prefix_to_metrics)
        print("Delete finished, and")
    except Exception as e:
        print('Unable to add latest metrics to RDS, Error: ', e)
        print(traceback.format_exc())
        conn.rollback()
        conn_2.rollback()
    inscurs.close()
    inscurs_2.close()

    return print("The upload was successful, shutting down...")

#where it pulls the data from (S3, .txt file, transforms to np array for #scores and metrics and returns the two arrays to then be transformed #into lists before executing the query. (This read_files function does #run normally)

def read_files(prefix, bucket):
    metric_bucket = bucket
    session = boto3.session.Session()
    REGION = session.region_name

    s3 = boto3.client('s3', region_name=REGION)
    txt_file = s3.get_object(Bucket=metric_bucket, Key=prefix)

    metrics = txt_file['Body'].read()
    df = json.loads(metrics)        

    df['entity_id'] = [int(x) for x in df['entity_id']]
    df['score'] = [int(x) for x in df['score']]
    df['TruePos'] = [int(x) for x in df['TruePos']]
    df['FalsePos'] = [int(x) for x in df['FalsePos']]
    df['FalseNeg'] = [int(x) for x in df['FalseNeg']]
    df['TrueNeg'] = [int(x) for x in df['TrueNeg']]
    df['score_overall'] = [float(x) for x in df['score_overall']]
    df['precision'] = [float(x) for x in df['precision']]
    df['recall'] = [float(x) for x in df['recall']]
    df['fscore'] = [float(x) for x in df['fscore']]
    df['support'] = [float(x) for x in df['support']]

    print(df)

    numpy_array_scores = np.column_stack(
        (
        df['entity_id'],
        df['score']
        )
        )
    numpy_array_metrics = np.column_stack(
        (
        df['score_overall'],
        df['precision'],
        df['recall'],
        df['fscore'],
        df['support'],
        df['TruePos'],
        df['FalsePos'],
        df['FalseNeg'],
        df['TrueNeg']


        )
        )
    print("The numpy array_scores:", numpy_array_scores)
    print("The numpy array_metrics:", numpy_array_metrics)
    return numpy_array_scores, numpy_array_metrics

这是来自AWS lambda的日志输出/错误消息,我希望将metric_list和score_data_s3值(列表中的值)添加到RDS中。

START RequestId: .....
Connecting to the Dealscore RDS DB:
Adding the latest metric data into RDS...

{'entity_id': [42, 11, 12], 'score': [95, 95, 30], 'TruePos': [0], 'FalsePos': [1], 'FalseNeg': [0], 'TrueNeg': [0], 'score_overall': [0.0], 'precision': [0.0], 'recall': [0.0], 'fscore': [0.0], 'support': [0.0]}

The numpy array_scores: [[42 95]
 [11 95]
 [12 30]]

The numpy array_metrics: [[0. 0. 0. 0. 0. 0. 1. 0. 0.]]

This is metric_list: [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]]

This is score_data_s3: [[42, 95], [11, 95], [12, 30]]
<pymysql.connections.Connection object at 0x7fa52ff33f28>

did not upload

Unable to add latest metrics to RDS, Error:  (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'precision, recall, fscore, support, true_pos, false_pos, false_neg, true_neg) VA' at line 1")

Traceback (most recent call last):

  File "/var/task/upload_to_rds.py", line 57, in lambda_handler
    inscurs_2.executemany(sql_into_metrics, metric_list)
  File "/var/task/pymysql/cursors.py", line 197, in executemany
    self._get_db().encoding)
  File "/var/task/pymysql/cursors.py", line 234, in _do_execute_many
    rows += self.execute(sql + postfix)
  File "/var/task/pymysql/cursors.py", line 170, in execute
    result = self._query(query)
  File "/var/task/pymysql/cursors.py", line 328, in _query
    conn.query(q)
  File "/var/task/pymysql/connections.py", line 517, in query
    self._affected_rows = self._read_query_result(unbuffered=unbuffered)
  File "/var/task/pymysql/connections.py", line 732, in _read_query_result
    result.read()
  File "/var/task/pymysql/connections.py", line 1075, in read
    first_packet = self.connection._read_packet()
  File "/var/task/pymysql/connections.py", line 684, in _read_packet
    packet.check_error()
  File "/var/task/pymysql/protocol.py", line 220, in check_error
    err.raise_mysql_exception(self._data)
  File "/var/task/pymysql/err.py", line 109, in raise_mysql_exception
    raise errorclass(errno, errval)

pymysql.err.ProgrammingError: 

(1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'precision, recall, fscore, support, true_pos, false_pos, false_neg, true_neg) VA' at line 1")

The upload was successful, shutting down...


END RequestId: ...
REPORT RequestId: ...   
Duration: 813.15 ms Billed Duration: 900 ms     Memory Size: 256 MB Max Memory Used: 103 MB

1 个答案:

答案 0 :(得分:0)

问题在于precision是Mysql的保留字,并且已将其用作列名,因此应使用反斜线将其四舍五入并将查询更改为

sql_into_metrics = "INSERT INTO dealscore_metrics_main 
                    (score_overall, `precision`, recall, fscore, support, 
                     true_pos, false_pos, false_neg, true_neg) 
                    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"