Question

我的脚本返回JIRA票证的更改日志历史记录，它似乎在我的开发机器（Mac Pro）上正常工作。当我尝试实现异步以更快地提取请求时，它挂了几次，但是使用单线程进程它每次都有效。

当我在Windows生产服务器上部署时，它达到了大约90％的完成点，然后挂起而没有任何有用的消息或指示可能出错的地方。 Windows任务计划程序将其显示为＆＃34;完成＆＃34;，这意味着它必须返回某种不可见的成功完成代码。我甚至对哪里开始追踪这个问题的原因感到有点困惑。我将包含我的代码供参考：

# jira_changelog_history.py
"""
Records the history for every jira issue ID in a database.
"""
from concurrent.futures import ThreadPoolExecutor
from csv import DictWriter
import datetime
import gzip
import logging
from threading import Lock
from typing import Generator

from jira import JIRA

from inst_config import config3, jira_config as jc
from inst_utils import aws_utils
from inst_utils.inst_oauth import SigMethodRSA
from inst_utils.jira_utils import JiraOauth
from inst_utils.misc_utils import (
    add_etl_fields,
    clean_data,
    get_fieldnames,
    initialize_logger
)

TODAY = datetime.date.today()

logger = initialize_logger(config3.GET_LOGFILE(
    # r'C:\Runlogs\JiraChangelogHistory\{date}.txt'.format(
    #     date=TODAY
    # )
    'logfile.txt'
)
)


def return_jira_keys(
        jira_instance: JIRA,
        jql: str,
        result_list: list,
        start_at: int,
        max_res: int = 500
) -> Generator:
    issues = jira_instance.search_issues(
        jql_str=jql,
        startAt=start_at,
        maxResults=max_res,
        fields='key'
    )
    for issue in issues:
        result_list.append(issue.key)


def write_issue_history(
        jira_instance: JIRA,
        issue_id: str,
        writer: DictWriter,
        lock: Lock):
    logging.debug('Now processing data for issue {}'.format(issue_id))
    changelog = jira_instance.issue(issue_id, expand='changelog').changelog

    for history in changelog.histories:
        created = history.created
        for item in history.items:
            to_write = dict(issue_id=issue_id)
            to_write['date'] = created
            to_write['field'] = item.field
            to_write['changed_from'] = item.fromString
            to_write['changed_to'] = item.toString
            clean_data(to_write)
            add_etl_fields(to_write)
            with lock:
                writer.writerow(to_write)


if __name__ == '__main__':
    try:
        signature_method = SigMethodRSA(jc.JIRA_RSA_KEY_PATH)
        o = JiraOauth(jc.OAUTH_URLS, jc.CONSUMER_INFO, signature_method)
        req_pub = o.oauth_dance_part1()
        o.gain_authorization(jc.AUTHORIZATION_URL, req_pub)
        acc_pub, acc_priv = o.oauth_dance_part2()

        with open(jc.JIRA_RSA_KEY_PATH) as key_f:
            key_data = key_f.read()

        oauth_dict = {
            'access_token': acc_pub,
            'access_token_secret': acc_priv,
            'consumer_key': config3.CONSUMER_KEY,
            'key_cert': key_data
        }
        j = JIRA(
            server=config3.BASE_URL,
            oauth=oauth_dict
        )
        # Full load
        # jql = 'project not in ("IT Service Desk")'
        # 3 day load, need SQL statement to trunc out if key in
        jql = 'project not in ("IT Service Desk") AND updatedDate > -3d'

        # "total" attribute of JIRA.ReturnedList returns the total records
        total_records = j.search_issues(jql, maxResults=1).total
        logging.info('Total records: {total}'.format(total=total_records))
        start_at = tuple(range(0, total_records, 500))
        keys = []

        with ThreadPoolExecutor(max_workers=5) as exec:
            for start in start_at:
                exec.submit(return_jira_keys, j, jql, keys, start)

        table = r'ods_jira.staging_jira_changelog_history'
        fieldnames = get_fieldnames(
            table_name=table,
            db_info=config3.REDSHIFT_POSTGRES_INFO_PROD
        )
        # loadfile = (
        #     r'C:\etl3\file_staging\jira_changelog_history\{date}.csv.gz'.format(
        #         date=TODAY
        #     ))
        loadfile = r'jira_changelogs.csv.gz'
        with gzip.open(loadfile, 'wt') as outf:
            writer = DictWriter(
                f=outf,
                fieldnames=fieldnames,
                delimiter='|',
                extrasaction='ignore'
            )
            writer_lock = Lock()
            for index, key in enumerate(keys):
                logging.info(
                    'On #{num} of {total}: %{percent_done:.2f} '
                    'completed'.format(
                        num=index,
                        total=total_records,
                        percent_done=(index / total_records) * 100
                    ))
                write_issue_history(
                    jira_instance=j,
                    issue_id=key,
                    writer=writer,
                    lock=writer_lock
                )

                # with ThreadPoolExecutor(max_workers=3) as exec:
                #     for key in keys:
                #         exec.submit(
                #             write_issue_history,
                #             j,
                #             key,
                #             writer,
                #             writer_lock
                #         )

        s3 = aws_utils.S3Loader(
            infile=loadfile,
            s3_filepath='jira_scripts/changelog_history/'
        )
        s3.load()
        rs = aws_utils.RedshiftLoader(
            table_name=table,
            safe_load=True
        )
        delete_stmt = '''
          DELETE FROM {table_name}
          WHERE issue_id in {id_list}
        '''.format(
            table_name=table,
            id_list=(
                '('
                + ', '.join(['\'{}\''.format(key) for key in keys])
                + ')')
        )
        rs.execute(
            rs.use_custom_sql,
            sql=delete_stmt
        )
        rs.execute(
            rs.copy_to_db,
            copy_from=s3.get_full_destination()
        )
    except Exception as e:
        raise

Answer 1

我建议一个工人看看它是否更好

jira-python在Windows上请求挂起，没有任何类型的失败或通知

1 个答案: