Question

继续我的Chapel冒险......

我有一个矩阵A.

var idx = {1..n};
var adom = {idx, idx};
var A: [adom] int;
//populate A;

var rowsums: [idx] int;

填充rowums的最有效方法是什么？

Answer 1

最有效的解决方案很难定义。但是，这里有一种计算rowsums并行和优雅的方法：

config const       n = 8;          // "naked" n would cause compilation to fail
const indices = 1..n;              // tio.chpl:1: error: 'n' undeclared (first use this function)
const adom = {indices, indices};
var A: [adom] int;

// Populate A
[(i,j) in adom] A[i, j] = i*j;

var rowsums: [indices] int;


forall i in indices {
  rowsums[i] = + reduce(A[i, ..]);
}

writeln(rowsums);

Try it online!

这是+ reduction array slices .htaccess {/ 3}}。

请注意，A和forall都会为上述程序引入并行性。如果+ reduce的大小足够小，那么仅使用for循环可能更有效，从而避免任务产生开销。

Answer 2

`SEQ`和`PAR`模式中的一些提示<to make the code actually run-live：

除了一些实现细节之外，上面提到的@ bencray的假设关于 PAR 设置的假定开销成本，这可能有利于纯粹的串行处理在 SEQ 设置中，未通过实验确认。此处还要注意，分布式模式未在live <TiO>-IDE到期时进行测试显而易见的原因，而一个小规模的小规模分布式实施远比一个具有科学意义的实验更具反义性。

事实重要

rowsums[]处理，即使在 2x2 的最小范围内，也处于 SEQ 模式但速度较慢， 256x256 模式中的 PAR 相同。

干得好，chapel团队，在最佳对齐方面确实很酷，可以在 PAR 中充分利用紧凑的硅资源！< / p>

有关确切运行时性能的记录，（参见自我记录的表格），或者不要犹豫参观实时IDE运行（参见上面的参考文献）并自行试验。

读者也可能认识到小规模试验中的外在噪声，因为O / S和托管IDE相关的过程通过不利的CPU / Lx-CACHE干预资源使用并影响<SECTION-UNDER-TEST>运行时性能/ memIO / process / et al conflict，这一事实将这些测量结果排除在用于某些广泛的解释之外。

希望所有人都能享受chapel可爱的`[TIME]`结果
_{在不断增长的[EXPSPACE] - 缩放计算环境中展示}

/* ---------------------------------------SETUP-SECTION-UNDER-TEST--*/ use Time;
/* ---------------------------------------SETUP-SECTION-UNDER-TEST--*/ var aStopWATCH_SEQ: Timer;
/* ---------------------------------------SETUP-SECTION-UNDER-TEST--*/ var aStopWATCH_PAR: Timer;

//nst max_idx =    123456;                   // seems to be too fat  for <TiO>-IDE to allocate                  <TiO>--   /wrappers/chapel: line 6: 24467 Killed
const max_idx =      4096;
//nst max_idx =      8192;                   // seems to be too long for <TiO>-IDE to let it run [SEQ] part     <TiO>--  The request exceeded the 60 second time limit and was terminated
//nst max_idx =     16384;                   // seems to be too long for <TiO>-IDE to let it run [PAR] part too <TiO>--   /wrappers/chapel: line 6: 12043 Killed
const indices = 1..max_idx;

const   adom  = {indices, indices};
var A: [adom] int;

[(i,j) in adom] A[i, j] = i*j;               // Populate A[,]

var rowsums: [indices] int;

/* ---------------------------------------------SECTION-UNDER-TEST--*/ aStopWATCH_SEQ.start();
for       i in indices {                     // SECTION-UNDER-TEST--
  rowsums[i] = + reduce(A[i, ..]);           // SECTION-UNDER-TEST--
}                                            // SECTION-UNDER-TEST--
/* ---------------------------------------------SECTION-UNDER-TEST--*/ aStopWATCH_SEQ.stop();

/* 
                                               <SECTION-UNDER-TEST> took     8973 [us] to run in [SEQ] mode for    2 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took    28611 [us] to run in [SEQ] mode for    4 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took    58824 [us] to run in [SEQ] mode for    8 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took   486786 [us] to run in [SEQ] mode for   64 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  1019990 [us] to run in [SEQ] mode for  128 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  2010680 [us] to run in [SEQ] mode for  256 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  4154970 [us] to run in [SEQ] mode for  512 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  8260960 [us] to run in [SEQ] mode for 1024 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took 15853000 [us] to run in [SEQ] mode for 2048 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took 33126800 [us] to run in [SEQ] mode for 4096 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took      n/a [us] to run in [SEQ] mode for 8192 elements on <TiO>-IDE

   ============================================ */


/* ---------------------------------------------SECTION-UNDER-TEST--*/ aStopWATCH_PAR.start();
forall    i in indices {                     // SECTION-UNDER-TEST--
  rowsums[i] = + reduce(A[i, ..]);           // SECTION-UNDER-TEST--
}                                            // SECTION-UNDER-TEST--
/* ---------------------------------------------SECTION-UNDER-TEST--*/ aStopWATCH_PAR.stop();
/*
                                               <SECTION-UNDER-TEST> took  12131 [us] to run in [PAR] mode for    2 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took   8095 [us] to run in [PAR] mode for    4 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took   8023 [us] to run in [PAR] mode for    8 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took   8156 [us] to run in [PAR] mode for   64 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took   7990 [us] to run in [PAR] mode for  128 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took   8692 [us] to run in [PAR] mode for  256 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  15134 [us] to run in [PAR] mode for  512 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  16926 [us] to run in [PAR] mode for 1024 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  30671 [us] to run in [PAR] mode for 2048 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took 105323 [us] to run in [PAR] mode for 4096 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took 292232 [us] to run in [PAR] mode for 8192 elements on <TiO>-IDE

   ============================================ */



writeln( rowsums,
        "\n <SECTION-UNDER-TEST> took ", aStopWATCH_SEQ.elapsed( Time.TimeUnits.microseconds ), " [us] to run in [SEQ] mode for ", max_idx, " elements on <TiO>-IDE",
        "\n <SECTION-UNDER-TEST> took ", aStopWATCH_PAR.elapsed( Time.TimeUnits.microseconds ), " [us] to run in [PAR] mode for ", max_idx, " elements on <TiO>-IDE"
         );

这使得chapel如此之大

感谢您为HPC开发和改进这样出色的计算工具。

在Chapel中计算矩阵的rowSums

2 个答案:

`SEQ`和`PAR`模式中的一些提示<to make the code actually run-live：

事实重要

希望所有人都能享受chapel可爱的`[TIME]`结果
_{在不断增长的[EXPSPACE] - 缩放计算环境中展示}

这使得chapel如此之大

在Chapel中计算矩阵的rowSums

2 个答案:

SEQ和PAR模式中的一些提示<to make the code actually run-live：

事实重要

希望所有人都能享受chapel可爱的[TIME]结果 在不断增长的[EXPSPACE] - 缩放计算环境中展示

这使得chapel如此之大

`SEQ`和`PAR`模式中的一些提示<to make the code actually run-live：

希望所有人都能享受chapel可爱的`[TIME]`结果
_{在不断增长的[EXPSPACE] - 缩放计算环境中展示}