Question

我正在尝试编写一个多线程记录器，当我测试以查看要写出的行的队列是否为空时，我得到带有无效参数的std :: system_error。在构造unique_lock时会发生这种情况。如果我传递std :: try_to_lock，那么似乎不是问题，即使未定义的conditions对于两个构造函数似乎都是相同的。

general.hh - 我认为没什么特别有意思的

#pragma once

#include <stdint.h>

#define likely(x)    __builtin_expect (!!(x), 1)
#define unlikely(x)  __builtin_expect (!!(x), 0)

namespace matan {

typedef int8_t s8;
typedef int16_t s16;
typedef int32_t s32;
typedef int64_t s64;
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;

template <typename T, typename... Args>
inline void place(T* loc, Args&&... args) {
  ::new (loc) T(args...);
}

template <typename T, typename... Args>
inline void replace(T* p, Args&&... args) {
  p->~T();
  ::new (p) T(args...);
}

} // matan

BunchQueue.hh - 似乎有问题的基本数据结构。

#pragma once

#include <stdlib.h>
#include <string.h>
#include <mutex>
#include <vector>
#include <utility>
#include <type_traits>
#include <condition_variable>

#include <iostream>  //PUSH_ASSERT

#include "general.hh"

namespace matan {

template <typename T>
class BaseQueue {
public:
  typedef T value_type;
  BaseQueue(size_t initCapacity) : m_capacity(initCapacity) {}
  void reset() { m_size=0; }
  size_t size() const { return m_size; }
  T* begin() { return m_vec; }
  const T* begin() const { return m_vec; }
  T* end() { return m_vec+m_size; }
  const T* end() const { return m_vec+m_size; }

protected:
  T* m_vec = nullptr;
  size_t m_capacity = 0;
  size_t m_size = 0;
};

template <typename T>
class TakerQueue : public BaseQueue<T> {
  /*
   * A vector, but you have to use std::move
   */
public:
  TakerQueue(size_t initCapacity = 1) :  BaseQueue<T>(initCapacity) {
    this->m_vec = (T*) malloc(sizeof(T)*(this->m_capacity));
  }
  void push_back(T& t) {
    if (unlikely(this->m_size >= this->m_capacity)) {
      this->m_capacity = this->m_capacity << 1;
      T* oldVec = this->m_vec;
      this->m_vec = (T*) malloc(sizeof(T)*(this->m_capacity));
      for (size_t i = 0; i < this->m_size; i++) {
        new (this->m_vec+i) T(std::move(oldVec[i]));
      }
      delete[] oldVec;
    }
    new (this->m_vec+this->m_size) T(std::move(t));
    ++(this->m_size);
  }
};


template <typename T>
class ShallowQueue : public BaseQueue<T>{
  //TODO: figure out the correct concept to use to guarantee T is trivially movable at compile time
  /*
   * A queue that instead of freeing and allocating memory constantly
   * simply reuses the same memory overwriting the appropriate values.
   *
   * It's use case is to be filled, then iterated through, and then reset.
   *
   * Meant for usage with trivial classes, specifically structs as
   * messages. The use of memcpy means I am not actually constructing
   * an object in place, but just taking a shallow copy,
   * and the use of realloc  in vectors is only be valid for a trivially movable
   * object.
   *
   */
public:
  ShallowQueue(size_t initCapacity = 1) : BaseQueue<T>(initCapacity) {
    this->m_vec = (T*) malloc(sizeof(T)*(this->m_capacity));
  }
  void push_back(const T& msg) {
    if (unlikely(this->m_size >= this->m_capacity)) {
      this->m_capacity = this->m_capacity << 1;
      this->m_vec = (T*) realloc(this->m_vec, sizeof(T)*this->m_capacity);
    }
    memcpy(this->m_vec+this->m_size, &msg, sizeof(T));
    ++(this->m_size);
  }
};

template <typename Queue>
class BunchQueue {
  /*
   * Multi writer single reader.
   *
   * Instead of popping off individual messages the reader takes all of them
   * at once. This works well if the architecture is focused on bunches.
   * Also good memory wise because it means fewer allocations and frees and
   * allows for adjacent memory access when reading through the messages.
   * Drawback is that you have a relatively large memory footprint with 1
   * vector just sitting around. Works best if you are not RAM bound and can
   * expect fairly consistent bunch sizes.
   */
public:
  BunchQueue(size_t initCapacity = 1) :
    m_queueA(initCapacity), m_queueB(initCapacity) {
  }
  void push_back(const typename Queue::value_type& msg) {
    std::unique_lock<std::mutex> lock(m_mtx);
    auto& q = getQueue();
    q.push_back(msg);
  }

  void push_back(typename Queue::value_type& msg) {
    std::unique_lock<std::mutex> lock(m_mtx);
    auto& q = getQueue();
    q.push_back(msg);
  }

  const Queue& takeQueue() {
    std::unique_lock<std::mutex> lock(m_mtx);
    auto q = &(getQueue());
    m_whichQueue = !m_whichQueue;
    getQueue().reset();
    return *q;
  }

  bool empty() {
    try {
      std::unique_lock<std::mutex> lock(m_mtx);
    } catch (const std::system_error& e) {
      std::cout << "error locking BunchQueue mutex: " <<  e.what() << std::endl;
      throw e;
    }
    std::unique_lock<std::mutex> lock(m_mtx);
    return m_queueA.size() == 0 && m_queueB.size() == 0;
  }

private:
  bool m_whichQueue = true;
  std::mutex m_mtx;
  Queue m_queueA;
  Queue m_queueB;
  Queue& getQueue() {
    //Only for use in takeQueue, haven't considered general use for thread safety
    return m_whichQueue ? m_queueA : m_queueB;
  }
};

template <typename Msg>
using MessageQueue = BunchQueue<ShallowQueue<Msg>>;

} //namespace matan

AsyncWorker.hh - 抽象类，负责组织条件变量和锁定工作线程。

#pragma once

#include "BunchQueue.hh"
#include <thread>
#include <atomic>

namespace matan {

class AsyncWorker {
  /*
   * A class to allow for a process to contain a worker thread that follows
   * a simple loop. The expected usage is for there to be some sort of queue
   * like data structure that the owners write to, and the worker thread
   * will run over each element performing some operation defined by doit.
   *
   * This class is capable of Multi writer, single reader (the internal thread).
   * but the details of implementation will determine the reality of if you
   * can take multiple writers.
   */
public:
  AsyncWorker() : m_worker(new std::thread([this]() { this->workerLoop();})) {}
  virtual ~AsyncWorker() = 0;
  AsyncWorker(const AsyncWorker&) = delete;

protected:
  void workerLoop() {
      while (true) {
        waitTillNeeded();
        doit();
        if (unlikely(m_bDone)) {
          break;
        }
      }
      doit();
  }

  /*
   * doit is the function that we actual want the worker thread to perform.
   * I assume that each doit call is enough to completely utilize all the
   * contents on the worker threads "queue."
   */
  virtual void doit() = 0;

  /*
   * Checks if there is any work for the worker thread to do, and if not puts
   * the thread to sleep.
   */
  virtual bool shouldSleep() = 0;

  /*
   * Locked so that waitTillNeeded can't be in an indeterminate state. Either
   * set beforehand so never wait, or set after already waiting so that the
   * notify that follows won't be waster in between the boolean and the
   * actual call to wait.
   */
  void done() {
    std::unique_lock<std::mutex> lock(m_mtx);
    m_bDone = true;
    notifyWorker();
    lock.unlock();
    m_worker->join();
  }

  void notifyWorker() {
    m_bRealWakeup = true;
    m_shouldDoit.notify_one();
  }

  std::atomic_bool m_bDone{false};
  std::atomic_bool m_bRealWakeup{false};
  std::unique_ptr<std::thread> m_worker;
  std::condition_variable m_shouldDoit;

private:
  void waitTillNeeded() {
    try {
      shouldSleep();
    } catch (const std::system_error& e) {
      std::cout << "error shouldSleep" << std::endl;
    }
    std::unique_lock<std::mutex> lock(m_mtx);
    if (!m_bDone && shouldSleep()) {
      m_bRealWakeup = false;
      m_shouldDoit.wait(lock, [this] { return this->realWakeup(); });
    }
  }

  //Prevent spurious system wake up calls
  bool realWakeup() { return m_bRealWakeup; }

  std::mutex m_mtx;
};

inline AsyncWorker::~AsyncWorker() {}

} // matan

Logger.hh

#pragma once

#include "AsyncWorker.hh"
#include <fstream>
#include <string>

/*
 * Has LogQueue, file to write to.
 */
namespace matan {

class Logger : public AsyncWorker {
  /*
   * Logger intended to prevent IO from blocking. Pushes the actual writing
   * to disk onto a separate thread.
   *
   * Single writer. To make the interface similar to std::cout we need to allow
   * separate calls to operator<<. For this to be multi writer we would need
   * each operator<< call to contain a complete elements, as opposed to
   * building it within m_buf and only later flushing it. (Please note this
   * issue would exist even if we actually flushed on every call to flush()).
   */
public:
  Logger(const std::string& ofname);
  virtual ~Logger() = default;
  Logger(const Logger&) = delete;
  Logger& operator<<(const std::string& str) {m_buf += str; return *this;}
  Logger& operator<<(const char* c) { m_buf += c; return *this; }
  Logger& operator<<(char c) { m_buf += c; return *this; }
  Logger& operator<<(Logger& (*pf)(Logger&)) {return pf(*this);}
  void flush();
  void close();

private:
  void doFlush();
  virtual void doit();
  virtual bool shouldSleep() { 
    try {
      return m_contents.empty();
    } catch (const std::system_error& e) {
      std::cout << "error checking m_contents.empty()" << std::endl;
      throw e;
    }
    return m_contents.empty();
  }

  std::string m_buf;
  std::ofstream m_ofstream;
  BunchQueue<TakerQueue<std::string>> m_contents;
  /*
   * I'm making a guess here that one page in memory is 4KB and that it will
   * be fastest if I can stay on one page (I need to pick a threshold
   * somehow) and that most logs will be less than 1024 characters.
   */
  static constexpr std::size_t MAX_LEN = 3 * 1024;
};

} // matan

namespace std {

inline matan::Logger& endl(matan::Logger& logger) {
  logger << '\n';
  logger.flush();
  return logger;
}

}

Logger.cc

#include "Logger.hh"


namespace matan {


/**********************    BunchLogger    *******************************/


Logger::Logger(const std::string& ofname) :
    AsyncWorker(),
    m_ofstream(ofname, std::ios::out) {}

void Logger::close() {
  doFlush();
  done();
  m_ofstream.close();
}

void Logger::flush() {
  if (m_buf.size() > MAX_LEN) {
    doFlush();
  }
}

void Logger::doFlush() {
  m_contents.push_back(m_buf);
  notifyWorker();
  m_buf.clear();
}

void Logger::doit() {
  for (const auto& line : m_contents.takeQueue()) {
    m_ofstream << line;
    m_ofstream.flush();
  }
}

} // matan

logger.cc

#include <string>
#include <sstream>
#include <algorithm>
#include <iterator>
#include <iostream>
#include <chrono>
#include "Logger.hh"

using namespace std::chrono;

int main() {
  std::string lyrics = "Row, row, row your boat Gently down the stream, Merrily merrily, merrily, merrily Life is but a dream";
  std::istringstream iss(lyrics);
  std::vector<std::string> lyric_vec(std::istream_iterator<std::string>{iss},
                                     std::istream_iterator<std::string>{});
  std::ofstream mystream("/tmp/logger1.log", std::ios::out);
  auto start1 = high_resolution_clock::now();
  for (auto& lyric : lyric_vec) {
    mystream << lyric << std::endl;
    mystream.flush();
  }
  mystream.close();
  std::cout
    << duration_cast<nanoseconds>(high_resolution_clock::now()-start1).count()
    << std::endl;

  matan::Logger bunchLogger("/tmp/logger2.log");
  auto start2 = high_resolution_clock::now();
  for (auto& lyric : lyric_vec) {
    bunchLogger << lyric << std::endl;
  }
  std::cout
    << duration_cast<nanoseconds>(high_resolution_clock::now()-start2).count()
    << std::endl;

  bunchLogger.close();
  std::cout << "finish logger" << std::endl;
  return 0;
}

生成文件

# to use Makefile variables later in the Makefile: $(<var>)
#
#  -g    adds debugging information to the executable file
#  -Wall turns on most, but not all, compiler warnings
CC = clang++
CFLAGS = -g -Wall -std=c++1z -pthread
BINDIR = bin

bigmap: timsort.hh BigMap.hh bigmap.cc
                $(CC) $(CFLAGS) bigmap.cc -o $(BINDIR)/bigmap

threadpool: ThreadPool.hh threadpool.cc
                $(CC) $(CFLAGS) threadpool.cc -o $(BINDIR)/threadpool

msgq: BunchQueue.hh message_queue.cc
                $(CC) $(CFLAGS) message_queue.cc -o $(BINDIR)/msgq

logger: Logger.o logger.cc
                $(CC) $(CFLAGS) Logger.o logger.cc -o $(BINDIR)/logger

Logger.o: BunchQueue.hh AsyncWorker.hh Logger.hh Logger.cc
                $(CC) $(CFLAGS) -c Logger.cc

# .PHONY is so that make doesn't confuse clean with a file
.PHONY clean: 
clean:
                rm -r $(BINDIR) && mkdir $(BINDIR)
                rm -f *.o *~

Answer 1

std :: mutex不是'可重入'。抛出异常是因为您尝试锁定AsyncWorker :: m_mtx，而互斥锁已由调用线程拥有。

发生双重锁定的路径是：

AsyncWorker::waitTillNeeded()->Logger::shouldSleep()

将AsyncWorker::m_mtx的类型更改为std::recursive_mutex可以解决此问题。我在BunchQueue :: m_mtx上找不到锁定递归路径，但在那里使用递归互斥锁也没什么坏处。

其他问题：

您在BaseQueue :: m_vec上有内存泄漏。永远不会在该指针上调用free()。我知道你试图通过使用realloc避免复制（那里的另一个问题......）但是请注意，如果当前块无法在适当的位置调整，则realloc 将复制数组内容。我建议你认真考虑使用普通的旧std :: vector。使用std::vector::reserve()根据需要为队列预先分配空间。
你在'ShallowQueue :: push_back（）`中调用realloc是有问题的：

this-＆gt; m_vec =（T *）realloc（this-＆gt; m_vec，sizeof（T）* this-＆gt; m_capacity）;

如果realloc（）返回NULL会发生什么？您丢失了之前的m_vec值，无法访问先前存储的数据。你也失去了有序释放内存的能力。

调用realloc的正确方法是：

 T* p = (T*) realloc(this->m_vec, sizeof(T)*this->m_capacity);
 if (!p)
 {
     // do some error handling..
     // return
 }
 this->m_vec = p;  // p is now known to be non-null, it is safe to assign its value to m_vec

使用unique_lock锁定互斥锁时调用std :: system_error

1 个答案: