我正在尝试并行复制矩阵。下面是我正在使用的代码。目前,它可以与char一起正常工作,但是当我使用短裤时它会出现段错误。我认为错误在于复制向量之外的内存。我试图调试我的假设,但没有成功。
CMakeLists.txt
cmake_minimum_required(VERSION 3.0)
project(memcpy CXX)
find_package (Threads)
add_executable(memcpy main.cpp)
set_property(TARGET memcpy PROPERTY CXX_STANDARD 17)
target_link_libraries (memcpy ${CMAKE_THREAD_LIBS_INIT})
main.cpp
#include <cassert>
#include <condition_variable>
#include <cstring>
#include <iostream>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
class Barrier {
public:
explicit Barrier(std::size_t const count) : m_threshold(count), m_remaining(count), m_generation(0) {}
void wait() {
auto local = std::unique_lock<std::mutex>{m_mutex};
auto current_generation = m_generation;
m_remaining--;
if (!m_remaining) {
m_generation++;
m_remaining = m_threshold;
m_condition.notify_all();
} else {
m_condition.wait(local, [this, current_generation] { return current_generation != m_generation; });
}
}
private:
std::mutex m_mutex;
std::condition_variable m_condition;
std::size_t m_threshold;
std::size_t m_remaining;
std::size_t m_generation;
};
template <typename T>
class Matrix {
using reference = typename std::vector<T>::reference;
using const_reference = typename std::vector<T>::const_reference;
public:
Matrix(std::size_t rows, std::size_t cols) : m_rows(rows), m_cols(cols), m_data(m_cols * m_rows) {}
Matrix(std::size_t rows, std::size_t cols, T const& default_val) : m_rows(rows), m_cols(cols), m_data(m_cols * m_rows, default_val) {}
constexpr std::size_t get_columns() const { return m_cols; }
constexpr std::size_t get_rows() const { return m_rows; }
constexpr std::size_t get_element_count() const {
assert(m_cols * m_rows == m_data.size());
return m_cols * m_rows;
}
T* data() { return m_data.data(); }
T const* data() const { return m_data.data(); }
reference operator()(std::size_t const column_x, std::size_t const row_y) {
assert(0 <= column_x);
assert(column_x < get_columns());
assert(0 <= row_y);
assert(row_y < get_rows());
return m_data[row_y * m_cols + column_x];
}
const_reference operator()(std::size_t const column_x, std::size_t const row_y) const {
assert(0 <= column_x);
assert(column_x < get_columns());
assert(0 <= row_y);
assert(row_y < get_rows());
return m_data[row_y * m_cols + column_x];
}
private:
std::size_t const m_rows;
std::size_t const m_cols;
std::vector<T> m_data;
};
static_assert(false, "FIX ME");
using T = char;
// using T = short;
// using T = int;
// using T = double;
void run(std::size_t const my_rank, std::size_t const num_threads, Barrier& barrier, Matrix<T> const& from_data, Matrix<T>& to_data) {
auto n = from_data.get_element_count();
std::string str;
if (my_rank == 0) {
std::cerr << "bytes to copy: " << (n * sizeof(T)) << '\n';
}
// initialization
std::size_t segment_size = n / num_threads;
std::size_t start = (my_rank * segment_size) * sizeof(T);
std::size_t end = ((my_rank + 1) * segment_size) * sizeof(T);
std::size_t distance = end - start;
str += " my_rank: " + std::to_string(my_rank);
str += " segment_size: " + std::to_string(segment_size);
str += " start: " + std::to_string(start);
str += " end: " + std::to_string(end);
str += " distance: " + std::to_string(distance);
str += " rank: " + std::to_string(my_rank);
str += " start: " + std::to_string(start);
str += " end: " + std::to_string(end);
str += " distance: " + std::to_string(distance);
str += " e: " + std::to_string(start + distance);
str += "\n";
std::cerr << str;
barrier.wait();
std::memcpy(to_data.data() + start, from_data.data() + start, distance);
barrier.wait();
if (my_rank == 0)
for (auto y = 0; y < from_data.get_rows(); y++) {
for (auto x = 0; x < from_data.get_columns(); x++) {
if (to_data(x, y) != from_data(x, y)) {
std::cerr << "x: " << x << '\t' << "y: " << y << "\t\t";
std::cerr << "to: " << to_data(x, y) << '\t' << "from: " << from_data(x, y) << '\n';
}
}
}
barrier.wait();
}
int main() {
auto const num_threads = 1;
// auto const num_threads = 4;
// auto const width = 64;
// auto const height = 64;
auto const width = 97;
auto const height = 101;
auto from_data = Matrix<T>(width, height, 70);
auto to_data = Matrix<T>(width, height, 84);
std::vector<std::thread> threads;
auto barrier = Barrier{num_threads};
for (auto i = 0; i < num_threads; i++) {
threads.emplace_back(run, i, num_threads, std::ref(barrier), std::ref(from_data), std::ref(to_data));
}
for (auto& thread : threads) {
thread.join();
}
}
答案 0 :(得分:5)
std::memcpy(to_data.data() + start, from_data.data() + start, distance)
std::vector<T>::data()
返回一个T*
,因此,如果您向其添加一个整数值foo
,则可以有效地添加foo * sizeof T
个字节...但是您已经乘以{{1 }}在计算sizeof(T)
和start
时。另外,end
不适用于不是POD的std::memcpy()
。
最好使用T
。