test.csv

Question

import csv
from itertools import groupby
import threading
import requests

with open("student.csv", "r") as csv_ledger:
    r = csv.DictReader(csv_ledger)
    data = [dict(d) for d in r ]
    groups = {}

    for k, g in groupby(data, lambda r: (r['ref_num'])):
        items = []
        for i in g:

            chart_of_account = {k: v for k, v in i.items() if k in ['name', 'code']}
            item = {k: v for k, v in i.items() if k in ['debit', 'credit', 'desc','payee_id']}
            item.update({'chart_of_account': chart_of_account})
            items.append(item)
        groups.update({
            "date": i['date'],
            "desc": i['desc'],
            "ref_num": k,
            "items": items
        })

        def postRequest():
            postapi = requests.post(ENDPOINT_URL, json=groups, headers=headers)

        threads = []
        for i in range(3):
            t = threading.Thread(target=postRequest, args=())
            threads.append(t)

        if __name__ == '__main__':
            for i in threads:
                t.setDaemon(True)
                i.start()
                # keep thread
            for i in threads:
                i.join()

到目前为止，我可能会使用多线程示例3个线程来发布请求。但是，这3个线程将发布相同的行数据，而不是发布直接的行数据。

CSV 日期，ref_num，desc，借方，贷方，payee_id，代码，姓名

2019-01-31,L00001,john,30.00,,150,500,johnkino
2019-01-31,L00001,john,,30.00,150,600,johnkino
2019-01-31,L00002,john,30.00,,150,500,johnkino
2019-01-31,L00002,john,,30.00,150,600,johnkino

johnkino

Answer 1

我不确定您对这个项目的目标是什么，但是打开并读取一个csv文件并将信息传递给线程将看起来像这样：

test.csv

Hello
Kino
Jane

workspace.py

import csv # posting my imports because the program doesn't run otherwise
import threading

groups = []
threads = []

with open('test.csv', 'r') as file:
    my_reader = csv.reader(file, delimiter=',')
    for row in my_reader:
        groups.append(row)

# we no longer need to be nested within "with open()" because we loaded information into memory

def worker_thread(information):
    print(information[0]) # just print the first item from list (there is only 1)

for name in groups:
    # we pass information to thread with the 'args' argument
    t = threading.Thread(target=worker_thread, args=[name]) 
    t.start()
    threads.append(t)

for t in threads:
    t.join() # make sure that each thread is done printing before we end our program

输出：

Hello
Kino
Jane

更新

我不确定您如何计划从csv数据生成URL，但是一般结构应如下所示：

import csv
from itertools import groupby
import threading
import requests
from queue import Queue


def postRequest():
    print('thread starting')
    while True:
        item_to_process = q.get()
        print(item_to_process)
        q.task_done()
    # postapi = requests.post(ENDPOINT_URL, json=groups, headers=headers)


q = Queue()
with open("test.csv", "r") as csv_ledger:
    r = csv.DictReader(csv_ledger)
    data = [dict(d) for d in r]
    groups = {}

    for k, g in groupby(data, lambda r: (r['ref_num'])):
        items = []
        for i in g:
            chart_of_account = {k: v for k, v in i.items() if k in ['name', 'code']}
            item = {k: v for k, v in i.items() if k in ['debit', 'credit', 'desc', 'payee_id']}
            item.update({'chart_of_account': chart_of_account})
            items.append(item)
            groups.update({
                "date": i['date'],
                "desc": i['desc'],
                "ref_num": k,
                "items": items
                })

        for item in items:
            q.put(item)

for i in range(3):
    t = threading.Thread(target=postRequest)
    t.start()

print('Main thread waiting')
q.join()
print('Done.')

哪个输出：

thread starting
{'credit': '', 'payee_id': '150', 'chart_of_account': {'name': 'johnkino', 'code': '500'}, 'desc': 'john', 'debit': '30.00'}
thread starting
{'credit': '30.00', 'payee_id': '150', 'chart_of_account': {'name': 'johnkino', 'code': '600'}, 'desc': 'john', 'debit': ''}
{'credit': '', 'payee_id': '150', 'chart_of_account': {'name': 'johnkino', 'code': '500'}, 'desc': 'john', 'debit': '30.00'}
{'credit': '30.00', 'payee_id': '150', 'chart_of_account': {'name': 'johnkino', 'code': '600'}, 'desc': 'john', 'debit': ''}
thread starting
Main thread waiting
Done.

在这种情况下，您可以看到3个线程太多了，到第三个线程开始时，所有数据都已处理完毕。但是随着您在线程中实现更复杂的过程，情况可能会改变。本质上，您将'test.csv'更改为csv的名称，然后将print(item_to_process)更改为实际使用数据的内容。我认为，这说明了如何创建Queue()并将所有信息放入其中，然后让线程继续卸载队列，直到处理完所有数据为止。

如何使多线程过程成为不同的任务

1 个答案:

test.csv

workspace.py

更新