cpp中的并行memcpy

时间:2018-11-15 03:53:24

标签: c++ multithreading pointers c++14 memcpy

我正在尝试并行复制矩阵。下面是我正在使用的代码。目前,它可以与char一起正常工作,但是当我使用短裤时它会出现段错误。我认为错误在于复制向量之外的内存。我试图调试我的假设,但没有成功。

CMakeLists.txt

cmake_minimum_required(VERSION 3.0)
project(memcpy CXX)
find_package (Threads)
add_executable(memcpy main.cpp)
set_property(TARGET memcpy PROPERTY CXX_STANDARD 17)
target_link_libraries (memcpy ${CMAKE_THREAD_LIBS_INIT})

main.cpp

#include <cassert>
#include <condition_variable>
#include <cstring>
#include <iostream>
#include <mutex>
#include <string>
#include <thread>
#include <vector>


class Barrier {
  public:
    explicit Barrier(std::size_t const count) : m_threshold(count), m_remaining(count), m_generation(0) {}

    void wait() {
        auto local = std::unique_lock<std::mutex>{m_mutex};
        auto current_generation = m_generation;

        m_remaining--;
        if (!m_remaining) {
            m_generation++;
            m_remaining = m_threshold;
            m_condition.notify_all();
        } else {
            m_condition.wait(local, [this, current_generation] { return current_generation != m_generation; });
        }
    }

  private:
    std::mutex m_mutex;
    std::condition_variable m_condition;
    std::size_t m_threshold;
    std::size_t m_remaining;
    std::size_t m_generation;
};


template <typename T>
class Matrix {
    using reference = typename std::vector<T>::reference;
    using const_reference = typename std::vector<T>::const_reference;

  public:
    Matrix(std::size_t rows, std::size_t cols) : m_rows(rows), m_cols(cols), m_data(m_cols * m_rows) {}
    Matrix(std::size_t rows, std::size_t cols, T const& default_val) : m_rows(rows), m_cols(cols), m_data(m_cols * m_rows, default_val) {}

    constexpr std::size_t get_columns() const { return m_cols; }
    constexpr std::size_t get_rows() const { return m_rows; }
    constexpr std::size_t get_element_count() const {
        assert(m_cols * m_rows == m_data.size());
        return m_cols * m_rows;
    }

    T* data() { return m_data.data(); }
    T const* data() const { return m_data.data(); }

    reference operator()(std::size_t const column_x, std::size_t const row_y) {
        assert(0 <= column_x);
        assert(column_x < get_columns());
        assert(0 <= row_y);
        assert(row_y < get_rows());

        return m_data[row_y * m_cols + column_x];
    }

    const_reference operator()(std::size_t const column_x, std::size_t const row_y) const {
        assert(0 <= column_x);
        assert(column_x < get_columns());
        assert(0 <= row_y);
        assert(row_y < get_rows());

        return m_data[row_y * m_cols + column_x];
    }

  private:
    std::size_t const m_rows;
    std::size_t const m_cols;
    std::vector<T> m_data;
};


static_assert(false, "FIX ME");
using T = char;
// using T = short;
// using T = int;
// using T = double;


void run(std::size_t const my_rank, std::size_t const num_threads, Barrier& barrier, Matrix<T> const& from_data, Matrix<T>& to_data) {
    auto n = from_data.get_element_count();
    std::string str;

    if (my_rank == 0) {
        std::cerr << "bytes to copy: " << (n * sizeof(T)) << '\n';
    }

    // initialization
    std::size_t segment_size = n / num_threads;
    std::size_t start = (my_rank * segment_size) * sizeof(T);
    std::size_t end = ((my_rank + 1) * segment_size) * sizeof(T);
    std::size_t distance = end - start;


    str += "  my_rank: " + std::to_string(my_rank);
    str += "  segment_size: " + std::to_string(segment_size);
    str += "  start: " + std::to_string(start);
    str += "  end: " + std::to_string(end);
    str += "  distance: " + std::to_string(distance);
    str += "  rank: " + std::to_string(my_rank);
    str += "  start: " + std::to_string(start);
    str += "  end: " + std::to_string(end);
    str += "  distance: " + std::to_string(distance);
    str += "  e: " + std::to_string(start + distance);
    str += "\n";
    std::cerr << str;

    barrier.wait();
    std::memcpy(to_data.data() + start, from_data.data() + start, distance);
    barrier.wait();


    if (my_rank == 0)
        for (auto y = 0; y < from_data.get_rows(); y++) {
            for (auto x = 0; x < from_data.get_columns(); x++) {
                if (to_data(x, y) != from_data(x, y)) {
                    std::cerr << "x: " << x << '\t' << "y: " << y << "\t\t";
                    std::cerr << "to: " << to_data(x, y) << '\t' << "from: " << from_data(x, y) << '\n';
                }
            }
        }

    barrier.wait();
}


int main() {
    auto const num_threads = 1;
    // auto const num_threads = 4;

    // auto const width = 64;
    // auto const height = 64;
    auto const width = 97;
    auto const height = 101;

    auto from_data = Matrix<T>(width, height, 70);
    auto to_data = Matrix<T>(width, height, 84);

    std::vector<std::thread> threads;
    auto barrier = Barrier{num_threads};
    for (auto i = 0; i < num_threads; i++) {
        threads.emplace_back(run, i, num_threads, std::ref(barrier), std::ref(from_data), std::ref(to_data));
    }

    for (auto& thread : threads) {
        thread.join();
    }
}

1 个答案:

答案 0 :(得分:5)

  

std::memcpy(to_data.data() + start, from_data.data() + start, distance)

std::vector<T>::data()返回一个T*,因此,如果您向其添加一个整数值foo,则可以有效地添加foo * sizeof T个字节...但是您已经乘以{{1 }}在计算sizeof(T)start时。另外,end不适用于不是POD的std::memcpy()

最好使用T