什么是传统C ++中NVIDA的CUDA'__syncthreads()'等价物。如何专业地同步线程?

时间:2013-10-26 15:35:33

标签: c++ multithreading winapi synchronization

我的应用程序中有4个线程。一个是主线程,另外三个是工作线程。我希望这3个工作线程中的前2个生成数据,并在生成时写入第3个。数据生成器线程将同步并行运行(同时开始'for'循环的每次迭代)。如果CPU足够快,写入程序线程应该一直写入。我不知道如何在C ++中专业地同步所有这3个线程,所以我编写了代码,就像有'__syncthreads()'函数来表示我的最佳方式。在传统的C ++中是否存在等效的CUDA C'__syncthreads()'?如果没有,那么如何以我想要的方式最佳地实现同步? (我不喜欢代码中的那些while循环。它们只是不必要地提高CPU利用率)

volatile bool write_flag ;

int main(int argc, char **argv)
{
    ...
    write_flag = false ; // nothing to write at the beginning
    ...
    HANDLE *trdHandles = new HANDLE[WORKING_THREADS] ;
    int IDs[] = {0, 1} ; // IDs for generator threads

    trdHandles[0] = CreateThread(NULL, 0, generator, &IDs[0], 0, NULL) ;  // 1st data generator thread
    if(trdHandles[0] == NULL)
    ExitProcess(0) ;
    trdHandles[1] = CreateThread(NULL, 0, generator, &IDs[1], 0, NULL) ;  // 2nd data generator thread
    if(trdHandles[1] == NULL)
    ExitProcess(0) ;

    trdHandles[2] = CreateThread(NULL, 0, writer, f_out, 0, NULL) ;  // writer thread
    if(trdHandles[2] == NULL)
    ExitProcess(0) ;
    ...
}

WINAPI DWORD generator(LPVOID lpParam)
{
    int *ID = static_cast<int*>(lpParam) ;
    dataGen(*ID) ;
    return 0 ;
}

void dataGen(int id)
{
    ...
    for(int aa = 0; aa < cycles; aa++)
    {
        __syncthreads() ;

        ... // both threads generate data here in parallel

        while(write_flag) // don't generate data too fast. Wait for writes to complete (this flag is initially set to 'false')
        ;
        setBuffers(id, aa) ; // for swapping in/out buffers
        if(id == 0) // only one thread needs to set the flag
        write_flag = true ;
     }
}

WINAPI DWORD writer(LPVOID lpParam)
{
    ofstream *f_out = static_cast<ofstream*>(lpParam) ;
    while(1)
    {
        if(write_flag)
        {
            f_out->write(out_buffer0, chunk_len) ;
            f_out->write(out_buffer1, chunk_len) ;
            write_flag = false ;
            if(!finish)
            continue ;
            else
            return 0 ;
        }
    }
}

3 个答案:

答案 0 :(得分:3)

查看The Little Book Of Semaphores的第3.5节中描述的屏障模式的实现。

屏障模式用于同步线程同步。

答案 1 :(得分:0)

C ++没有直接支持多线程(直到C ++ 11)。您必须使用OS服务来实现多线程和同步。在Windows上有一组丰富的Synchronization Functions。对于您的方案,请查看Wait FunctionsEvent FunctionsSetEventWaitForMultipleObjects的组合将是一个可行的解决方案。

答案 2 :(得分:0)

“信号量小书”这本书并不是那么糟糕,但它总体上集中在编程方面,而不仅仅是我预期的C ++。但这本书帮助了我,因为我发现了更快的详细C ++屏障模式解释,没有它我可以。看完之后:

http://adilevin.wordpress.com/2009/06/04/locking-mechanisms/

和此:

http://adilevin.wordpress.com/category/multithreading/(屏障主要功能部分)

为了解决我的问题,我不得不花一点时间。我通过使用下面代码中显示的一个bool标记,Semaphore对象和主要WaitForSingleObject()调用的某种组合来解决它。我确定它有效,因为在运行时没有断言错误。它的完整代码类似于我的应用程序的代码,但这只代表我如何解决问题。如果您对此代码有任何建议 - 如果可以更好的方式实施,请回答。

#include <iostream>
#include <conio.h>
#include <stdio.h>
#include <windows.h>
#include <sstream>
#include <cassert>

#define THREADS_NUM 3

WINAPI DWORD generator(LPVOID lpParam) ;
WINAPI DWORD writer(LPVOID lpParam) ;
void dataGen(int id) ;
void locker() ;
void sync_msg_display(std::string msg) ;

volatile bool write_flag = false, finish = false ;
volatile long entered_num ;
int time0 = 950, time1 = 1050, time2 = 550 ;

HANDLE sem0, sem1, sem2 ;

using namespace std ;

int main(void)
{
    HANDLE trdHandles[THREADS_NUM] ;
    int IDs[THREADS_NUM] ;

    for(int aa = 0; aa < THREADS_NUM; aa++)
    IDs[aa] = aa ;

    entered_num = 0 ;
    sem0 = CreateSemaphore(NULL, 0, 4096, NULL) ;

    for(int aa = 0; aa < THREADS_NUM - 1; aa++)
    trdHandles[aa] = CreateThread(NULL, 0, generator, &IDs[aa], 0, NULL) ;
    trdHandles[THREADS_NUM - 1] = CreateThread(NULL, 0, writer, &IDs[THREADS_NUM - 1], 0, NULL) ;

    for(int aa = 0; aa < THREADS_NUM; aa++)
    if(trdHandles[aa] == NULL)
    ExitProcess(0) ;

    WaitForMultipleObjects(THREADS_NUM, trdHandles, true, INFINITE) ;
    for(int aa = 0; aa < THREADS_NUM; aa++)
    CloseHandle(trdHandles[aa]) ;

    CloseHandle(sem0) ;
    CloseHandle(sem1) ;
    CloseHandle(sem2) ;

    cout << "finished !" << endl ; 

    getch() ;
    return 0 ;
}

WINAPI DWORD generator(LPVOID lpParam)
{
int id = *(static_cast<int*>(lpParam)) ;
dataGen(id) ;
return 0 ;
}

WINAPI DWORD writer(LPVOID lpParam)
{
    LONG prev ;
    sem1 = CreateSemaphore(NULL, 0, 4096, NULL) ;
    sem2 = CreateSemaphore(NULL, 0, 4096, NULL) ;
    while(1)
    {
        WaitForSingleObject(sem1, INFINITE) ;
        write_flag = true ;

        sync_msg_display("Write started.") ;
        Sleep(time2) ;
        sync_msg_display("Write finished.") ;

        write_flag = false ;
        ReleaseSemaphore(sem2, 2, &prev) ;
        if(finish)
        return 0 ;
    }
}

void dataGen(int id)
{
    LONG prev ;
    stringstream ss ;
    for(int aa = 0; aa < 20; aa++)
    {
        if(id == 0)
        {
            ss << aa ;
            sync_msg_display("Generator thread no. 0 iteration no. " + ss.str() + " start.") ;
            ss.str("") ;
            if(aa % 2)
            Sleep(time0) ;
            else
            Sleep(time1) ;
            ss << aa ;
            sync_msg_display("Generator thread no. 0 iteration no. " + ss.str() + " complete.") ;
            ss.str("") ;
        }
        else
        {
            ss << aa ;
            sync_msg_display("Generator thread no. 1 iteration no. " + ss.str() + " start.") ;
            ss.str("") ;
            if(aa % 2)
            Sleep(time1) ;
            else
            Sleep(time0) ;
            ss << aa ;
            sync_msg_display("Generator thread no. 1 iteration no. " + ss.str() + " complete.") ;
            ss.str("") ;
        }

        if(write_flag) // don't generate data too fast. Wait for writes to complete (this flag is initially set to 'false')
        WaitForSingleObject(sem2, INFINITE) ;

        assert(!write_flag) ;
        Sleep(10) ; ////
        assert(!write_flag) ;

        locker() ;

        if(id == 0) // only one thread needs to set the flag
        ReleaseSemaphore(sem1, 1, &prev) ;
    }
    locker() ;
    if(id == 0)
    finish = true ;
}

void locker()
{
    LONG prev ;
    if(InterlockedIncrement(&entered_num) < 2)
    WaitForSingleObject(sem0, INFINITE) ;
    else
    {
        entered_num = 0 ;
        ReleaseSemaphore(sem0, 1, &prev) ;
    }
}

void sync_msg_display(string msg)
{
    HANDLE lock = CreateMutex(NULL, FALSE, "mutex") ;
    WaitForSingleObject(lock, INFINITE) ;
    cout << msg << endl ;
    ReleaseMutex(lock) ;
    CloseHandle(lock) ;
}