Question

我正在尝试使用C中的pthreads计算两个向量a和b的总和。我给出了一个函数，它以顺序形式计算总和，另一个以并行形式计算。我的程序工作正常但在有多个线程时计算不同的总和。我在关键区域使用了正确的线程同步，但仍然无法看到我出错的地方。我在第一个线程上得到了正确的答案，因为只有一个线程正在完成这项工作，然后我在多个线程上得到了错误的答案。这是我的代码：

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>


// type for value of vector element
typedef short value_t;
// type for vector dimension / indices
typedef long index_t;
// function type to combine two values
typedef value_t (*function_t)(const value_t x, const value_t y);
// struct to store the respective values of the vectors a,b and c
typedef struct{
    index_t start;
    index_t end;
    value_t *arr;
    value_t *brr;
    value_t *crr;
    value_t *part_sum;
    pthread_mutex_t *mutex;
}arg_struct;

// function to combine two values
value_t add(const value_t x, const value_t y) {
  return ((x+y)*(x-y)) % ((int)x+1) +27;
}


// function to initialize the vectors a,b and c
void vectorInit(index_t n, value_t a[n], value_t b[n], value_t c[n]) {

  for(index_t i=0; i<n; i++) {
    a[i] = (value_t)(2*i);
    b[i] = (value_t)(n-i);
    c[i] = 0;
  }
}


// function to count the sum of two variables sequentially
value_t vectorOperation(index_t n, value_t a[n], value_t b[n], value_t c[n], function_t f) {

  value_t sum = 0;

  for(index_t i=0; i<n; i++) {
    sum += (c[i] = f(a[i], b[i]));
  }

  return sum;
}
/* Thread function */
void* vector_sum(void* arg)
{   
    arg_struct *param = (arg_struct*)arg;
   /*
    for(index_t i= param->start; i<param->end; i++)
    {
        pthread_mutex_lock(&param->mutex);
        *param->part_sum += vectorOperation(i,param->arr,param->brr,param->crr,add);
        pthread_mutex_unlock(&param->mutex);
    }
    */
    index_t n = param->end - param->start;
    pthread_mutex_lock(&(*param->mutex));
    // Each thread uses the vectorOperation function to calculate the sum sequentially(Also the critical area)
    *param->part_sum = *param->part_sum + vectorOperation(n,param->arr,param->brr,param->crr,add);
    //*param->part_sum += vectorOperation(param->end-param->start,param->arr,param->brr,param->crr,add);
    pthread_mutex_unlock(&(*param->mutex));

    pthread_exit(NULL);
}



// Sum of two vectors in parallel. 
value_t vectorOperationParallel(index_t n, value_t a[n], value_t b[n], value_t c[n], function_t f, int p) {

  value_t sum = 0;

    pthread_t threads[p];
    arg_struct thread_args[p];
    pthread_mutex_t mutex;
    pthread_mutex_init(&mutex,NULL);
    index_t div = (n+p-1)/p;

      for(int i=0; i<p; i++)
    {
        thread_args[i].start = i*div;
        thread_args[i].end = (i+1)*div;
        thread_args[i].arr = a;
        thread_args[i].brr = b;
        thread_args[i].crr = c;
          for(int j =0; j<div; j++)
          {
          thread_args[i].arr[j] = a[thread_args[i].start+j];
          thread_args[i].brr[j] = b[thread_args[i].start+j];
          thread_args[i].crr[j] = c[thread_args[i].start+j];

          }
        thread_args[i].part_sum = &sum;
        thread_args[i].mutex = &mutex;
        pthread_create(&threads[i],NULL,vector_sum, (void*)&thread_args[i]);
    }


    for(int i=0; i<p; i++)
    {
        pthread_join(threads[i],NULL);
    }
  return sum;
}


int main(int argc, char **argv)
{
  // check for correct argument count
  if (argc != 3)
    {
      printf ("usage: %s vector_size n_threads\n", argv[0]);
      exit (EXIT_FAILURE);
    }

  // get arguments
  // vector size
  index_t n = (index_t)atol (argv[1]);
  // number of threads
  int p = atoi (argv[2]);
  // check for plausible values
  if((p < 1) || (p > 1000)) {
      printf("illegal number of threads\n");
      exit (EXIT_FAILURE);
  }

  // allocate memory
  value_t *a = malloc(n * sizeof(*a));
  value_t *b = malloc(n * sizeof(*b));
  value_t *c = malloc(n * sizeof(*c));
  if((a == NULL) || (b == NULL) || (c == NULL)) {
    printf("no more memory\n");
    exit(EXIT_FAILURE);
  }

  // initialize vectors a,b,c
  vectorInit(n, a, b, c);

  // work on vectors sequentially
  value_t c1sum = vectorOperation(n, a, b, c, add);

  // work on vectors parallel for all thread counts from 1 to p
  for(int thr=1; thr<= p; thr++) {
    // do operation
    value_t c2sum = vectorOperationParallel(n, a, b, c, add, thr);

    // check result
    if(c1sum != c2sum) {
      printf("!!! error: vector results are not identical !!!\nsum1=%ld, sum2=%ld\n", (long)c1sum, (long)c2sum);
      return EXIT_FAILURE;
    } 
    else
        printf("The results are equal: sum1=%ld, sum2=%ld\n",(long)c1sum, (long)c2sum);
  }

  return EXIT_SUCCESS;

}

Answer 1

好吧我不确定，但这似乎是错的。

起初，变量的名称很糟糕。

然后是上午评论：

循环中的
pthread_mutex_init可能是一个坏主意

你计算class CountDownTimer: UIView { public var backgroundStrokeColor: CGColor = UIColor.white.cgColor public var backgroundFillColor: CGColor = UIColor.clear.cgColor public var backgroundLineWidth: CGFloat = 15 public var timeLeftSrtokeColor: CGColor = UIColor.red.cgColor public var timeLeftFillColor: CGColor = UIColor.clear.cgColor public var timeLeftLineWidth: CGFloat = 10 public var textColor: UIColor = UIColor.white public var textFont: UIFont = UIFont.balooRegular(10.0) fileprivate var timeLeft: TimeInterval = 0 fileprivate var endDate: Date? fileprivate var timeLeftShapeLayer: CAShapeLayer? fileprivate var bgShapeLayer: CAShapeLayer? fileprivate var timeLabel: UILabel? fileprivate var timer = Timer() fileprivate let strokeIt = CABasicAnimation(keyPath: "strokeEnd") //MARK: - UIView override func draw(_ rect: CGRect) { drawBgShape() drawTimeLeftShape() addTimeLabel() strokeIt.toValue = 1 //"fromValue" is set in "startTimer(duration, timerProgress) strokeIt.duration = self.timeLeft // add the animation to your timeLeftShapeLayer timeLeftShapeLayer?.add(strokeIt, forKey: nil) // define the future end time by adding the timeLeft to now Date() } //MARK: - Public public func startTimer(duration: TimeInterval, timerProgress: Double) { self.timeLeft = duration endDate = Date().addingTimeInterval(timeLeft) timer = Timer.scheduledTimer(timeInterval: 0.1, target: self, selector: #selector(updateTime), userInfo: nil, repeats: true) strokeIt.fromValue = timerProgress } //MARK: - Private fileprivate func drawBgShape() { //we initialize and add the layer only if there is not initialized if(bgShapeLayer == nil){ bgShapeLayer = CAShapeLayer() self.layer.addSublayer(bgShapeLayer!) } bgShapeLayer?.path = UIBezierPath(arcCenter: CGPoint(x: self.frame.midX , y: self.frame.midY), radius: min((frame.width - self.timeLeftLineWidth)/2, (frame.height - self.timeLeftLineWidth)/2), startAngle: -90.degreesToRadians, endAngle: 270.degreesToRadians, clockwise: true).cgPath bgShapeLayer?.strokeColor = self.backgroundStrokeColor bgShapeLayer?.fillColor = self.backgroundFillColor bgShapeLayer?.lineWidth = self.backgroundLineWidth } fileprivate func drawTimeLeftShape() { //we initialize and add the layer only if there is not initialized if(timeLeftShapeLayer == nil){ timeLeftShapeLayer = CAShapeLayer() self.layer.addSublayer(timeLeftShapeLayer!) } timeLeftShapeLayer?.path = UIBezierPath(arcCenter: CGPoint(x: self.frame.midX , y: self.frame.midY), radius: min((frame.width - self.timeLeftLineWidth)/2, (frame.height - self.timeLeftLineWidth)/2), startAngle: -90.degreesToRadians, endAngle: 270.degreesToRadians, clockwise: true).cgPath timeLeftShapeLayer?.strokeColor = self.timeLeftSrtokeColor timeLeftShapeLayer?.fillColor = self.timeLeftFillColor timeLeftShapeLayer?.lineWidth = self.timeLeftLineWidth } fileprivate func addTimeLabel() { //we initialize and add the UILabel only if there is not initialized if(timeLabel == nil){ timeLabel = UILabel() self.addSubview(timeLabel!) } timeLabel?.frame = CGRect(x: self.frame.midX-50 ,y: self.frame.midY-25, width: 100, height: 50) timeLabel?.adjustsFontSizeToFitWidth = true timeLabel?.textAlignment = .center timeLabel?.text = self.timeLeft.stringTime timeLabel?.textColor = self.textColor timeLabel?.font = self.textFont } //MARK: - Actions @objc fileprivate func updateTime() { if timeLeft > 0 { timeLeft = endDate?.timeIntervalSinceNow ?? 0 timeLabel?.text = self.timeLeft.stringTime } else { timeLabel?.text = self.timeLeft.stringTime timer.invalidate() } } } 之后您使用index_t div = (elements_in_vector + num_of_threads - 1) / num_of_threads;来分解元素。这样，您可以尝试访问比可用元素更多的元素。

示例：

div * num_of_threads

一旦访问index_t div = (elements_in_vector + num_of_threads - 1) / num_of_threads; //(13 * 5 - 1) / 5 = 3 thread_args[i].end = (i + 1) * div; // for the last i ( = 2) //(2 + 1) * 5 = 15，您就会获得垃圾值（未定义的行为）

然后你复制原始数组的部分内容（我认为这比传递原始数据的速度慢一些。）

您似乎根本不使用结果数组i >= 13。

您只需要互斥量作为所有值的总和，因为您在线程中传递的每个数组都有专用内存。如果你不在所有这些中使用sum变量，你甚至可以将原始数组的指针传递给没有互斥锁的线程。因为每个添加都是自包含的并且不访问另一个添加的内存，所以不需要互斥。

要计算所有值的总和，您可以使用线程的返回值，而不是对传递给每个值的值的引用。这样会快得多。

我不确定我是否找到了所有内容，但这可能会帮助您改善这一点。

使用pthreads添加两个向量而不使用全局和变量

1 个答案: