mbind：如何在所有节点上均匀地交错现有段？

时间：2018-11-18 00:12:23

标签： linux memory linux-kernel numa numactl

使用mbind，可以为给定的映射内存段设置内存策略。

问：如何告诉`mbind`在所有节点上交错一个段？

如果在分配之后但在使用之前完成，则所有节点上的MPOL_INTERLEAVE将达到我们的预期-内存将在所有节点上均匀分配。

但是，如果该段已经被写入并且在例如节点为零，无法告诉内核在所有NUMA节点上均匀交织。

操作简单地变成了 no-op ，因为内核将其解释为“ 请将该段放置在这组节点上”。由于我们要传递所有NUMA节点的集合，因此没有分配到外部的需要移动的内存。

最小，完整和可验证的示例

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <sys/syscall.h>
#include <numaif.h>
#include <numa.h>

#define N ((1<<29) / sizeof(int))

#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#define PAGE_MASK (~(PAGE_SIZE - 1))

void print_command(char *cmd) {
  FILE *fp;
  char buf[1024];

  if ((fp = popen(cmd, "r")) == NULL) {
    perror("popen");
    exit(-1);
  }

  while(fgets(buf, sizeof(buf), fp) != NULL) {
    printf("%s", buf);
  }

  if(pclose(fp))  {
    perror("pclose");
    exit(-1);
  }
}

void print_node_allocations() {
  char buf[1024];
  snprintf(buf, sizeof(buf), "numastat -c %d", getpid());
  printf("\x1B[32m");
  print_command(buf);
  printf("\x1B[0m");
}

int main(int argc, char **argv) {
  int *a = numa_alloc_local(N * sizeof(int));
  size_t len = (N * sizeof(int)) & PAGE_MASK;
  unsigned long mymask = *numa_get_mems_allowed()->maskp;
  unsigned long maxnode = numa_get_mems_allowed()->size;

  // pin thread to core zero
  cpu_set_t mask;
  CPU_ZERO(&mask);
  CPU_SET(0, &mask);
  if (sched_setaffinity(syscall(SYS_gettid), sizeof(mask), &mask) < 0) {
    perror("sched_setaffinity");
    exit(-1);
  }

  // initialize array
  printf("\n\n(1) array allocated on local node\n");
  a[0] = 997;
  for(size_t i=1; i < N; i++) {
    a[i] = a[i-1] * a[i-1] % 1000000000;
  }
  print_node_allocations();

  // attempt to get it to be uniformly interleaved on all nodes
  printf("\n\n(2) array interleaved on all nodes\n");
  if (mbind(a, len, MPOL_INTERLEAVE, &mymask, maxnode, MPOL_MF_MOVE_ALL | MPOL_MF_STRICT) == -1) {
    perror("mbind failed");
    exit(-1);
  }
  print_node_allocations();

  // what if we interleave on all but the local node?
  printf("\n\n(3) array interleaved on all nodes (except local node)\n");
  mymask -= 0x01;
  if (mbind(a, len, MPOL_INTERLEAVE, &mymask, maxnode, MPOL_MF_MOVE_ALL | MPOL_MF_STRICT) == -1) {
    perror("mbind failed");
    exit(-1);
  }
  print_node_allocations();

  return 0;
}

编译并运行gcc -o interleave_all interleave_all.c -lnuma && sudo ./interleave_all会产生：

(1) array allocated on local node

Per-node process memory usage (in MBs) for PID 20636 (interleave_all)
         Node 0 Node 1 Node 2 Node 3 Total
         ------ ------ ------ ------ -----
Huge          0      0      0      0     0
Heap          0      0      0      0     0
Stack         0      0      0      0     0
Private     514      0      0      0   514
-------  ------ ------ ------ ------ -----
Total       514      0      0      0   514


(2) array interleaved on all nodes

Per-node process memory usage (in MBs) for PID 20636 (interleave_all)
         Node 0 Node 1 Node 2 Node 3 Total
         ------ ------ ------ ------ -----
Huge          0      0      0      0     0
Heap          0      0      0      0     0
Stack         0      0      0      0     0
Private     514      0      0      0   514
-------  ------ ------ ------ ------ -----
Total       514      0      0      0   514


(3) array interleaved on all nodes (except local node)

Per-node process memory usage (in MBs) for PID 20636 (interleave_all)
         Node 0 Node 1 Node 2 Node 3 Total
         ------ ------ ------ ------ -----
Huge          0      0      0      0     0
Heap          0      0      0      0     0
Stack         0      0      0      0     0
Private       2    171    171    171   514
-------  ------ ------ ------ ------ -----
Total         2    171    171    171   514

0 个答案:

没有答案