LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef KMP_AFFINITY_H
15 #define KMP_AFFINITY_H
16 
17 #include "kmp.h"
18 #include "kmp_os.h"
19 
20 #if KMP_AFFINITY_SUPPORTED
21 #if KMP_USE_HWLOC
22 class KMPHwlocAffinity : public KMPAffinity {
23 public:
24  class Mask : public KMPAffinity::Mask {
25  hwloc_cpuset_t mask;
26 
27  public:
28  Mask() {
29  mask = hwloc_bitmap_alloc();
30  this->zero();
31  }
32  ~Mask() { hwloc_bitmap_free(mask); }
33  void set(int i) override { hwloc_bitmap_set(mask, i); }
34  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36  void zero() override { hwloc_bitmap_zero(mask); }
37  void copy(const KMPAffinity::Mask *src) override {
38  const Mask *convert = static_cast<const Mask *>(src);
39  hwloc_bitmap_copy(mask, convert->mask);
40  }
41  void bitwise_and(const KMPAffinity::Mask *rhs) override {
42  const Mask *convert = static_cast<const Mask *>(rhs);
43  hwloc_bitmap_and(mask, mask, convert->mask);
44  }
45  void bitwise_or(const KMPAffinity::Mask *rhs) override {
46  const Mask *convert = static_cast<const Mask *>(rhs);
47  hwloc_bitmap_or(mask, mask, convert->mask);
48  }
49  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
50  int begin() const override { return hwloc_bitmap_first(mask); }
51  int end() const override { return -1; }
52  int next(int previous) const override {
53  return hwloc_bitmap_next(mask, previous);
54  }
55  int get_system_affinity(bool abort_on_error) override {
56  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57  "Illegal get affinity operation when not capable");
58  int retval =
59  hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
60  if (retval >= 0) {
61  return 0;
62  }
63  int error = errno;
64  if (abort_on_error) {
65  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
66  }
67  return error;
68  }
69  int set_system_affinity(bool abort_on_error) const override {
70  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71  "Illegal get affinity operation when not capable");
72  int retval =
73  hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
74  if (retval >= 0) {
75  return 0;
76  }
77  int error = errno;
78  if (abort_on_error) {
79  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
80  }
81  return error;
82  }
83  int get_proc_group() const override {
84  int group = -1;
85 #if KMP_OS_WINDOWS
86  if (__kmp_num_proc_groups == 1) {
87  return 1;
88  }
89  for (int i = 0; i < __kmp_num_proc_groups; i++) {
90  // On windows, the long type is always 32 bits
91  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
92  unsigned long second_32_bits =
93  hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
94  if (first_32_bits == 0 && second_32_bits == 0) {
95  continue;
96  }
97  if (group >= 0) {
98  return -1;
99  }
100  group = i;
101  }
102 #endif /* KMP_OS_WINDOWS */
103  return group;
104  }
105  };
106  void determine_capable(const char *var) override {
107  const hwloc_topology_support *topology_support;
108  if (__kmp_hwloc_topology == NULL) {
109  if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
110  __kmp_hwloc_error = TRUE;
111  if (__kmp_affinity_verbose)
112  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
113  }
114  if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
115  __kmp_hwloc_error = TRUE;
116  if (__kmp_affinity_verbose)
117  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
118  }
119  }
120  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
121  // Is the system capable of setting/getting this thread's affinity?
122  // Also, is topology discovery possible? (pu indicates ability to discover
123  // processing units). And finally, were there no errors when calling any
124  // hwloc_* API functions?
125  if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
126  topology_support->cpubind->get_thisthread_cpubind &&
127  topology_support->discovery->pu && !__kmp_hwloc_error) {
128  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
129  KMP_AFFINITY_ENABLE(TRUE);
130  } else {
131  // indicate that hwloc didn't work and disable affinity
132  __kmp_hwloc_error = TRUE;
133  KMP_AFFINITY_DISABLE();
134  }
135  }
136  void bind_thread(int which) override {
137  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
138  "Illegal set affinity operation when not capable");
139  KMPAffinity::Mask *mask;
140  KMP_CPU_ALLOC_ON_STACK(mask);
141  KMP_CPU_ZERO(mask);
142  KMP_CPU_SET(which, mask);
143  __kmp_set_system_affinity(mask, TRUE);
144  KMP_CPU_FREE_FROM_STACK(mask);
145  }
146  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
147  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
148  KMPAffinity::Mask *allocate_mask_array(int num) override {
149  return new Mask[num];
150  }
151  void deallocate_mask_array(KMPAffinity::Mask *array) override {
152  Mask *hwloc_array = static_cast<Mask *>(array);
153  delete[] hwloc_array;
154  }
155  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
156  int index) override {
157  Mask *hwloc_array = static_cast<Mask *>(array);
158  return &(hwloc_array[index]);
159  }
160  api_type get_api_type() const override { return HWLOC; }
161 };
162 #endif /* KMP_USE_HWLOC */
163 
164 #if KMP_OS_LINUX
165 /* On some of the older OS's that we build on, these constants aren't present
166  in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
167  all systems of the same arch where they are defined, and they cannot change.
168  stone forever. */
169 #include <sys/syscall.h>
170 #if KMP_ARCH_X86 || KMP_ARCH_ARM
171 #ifndef __NR_sched_setaffinity
172 #define __NR_sched_setaffinity 241
173 #elif __NR_sched_setaffinity != 241
174 #error Wrong code for setaffinity system call.
175 #endif /* __NR_sched_setaffinity */
176 #ifndef __NR_sched_getaffinity
177 #define __NR_sched_getaffinity 242
178 #elif __NR_sched_getaffinity != 242
179 #error Wrong code for getaffinity system call.
180 #endif /* __NR_sched_getaffinity */
181 #elif KMP_ARCH_AARCH64
182 #ifndef __NR_sched_setaffinity
183 #define __NR_sched_setaffinity 122
184 #elif __NR_sched_setaffinity != 122
185 #error Wrong code for setaffinity system call.
186 #endif /* __NR_sched_setaffinity */
187 #ifndef __NR_sched_getaffinity
188 #define __NR_sched_getaffinity 123
189 #elif __NR_sched_getaffinity != 123
190 #error Wrong code for getaffinity system call.
191 #endif /* __NR_sched_getaffinity */
192 #elif KMP_ARCH_X86_64
193 #ifndef __NR_sched_setaffinity
194 #define __NR_sched_setaffinity 203
195 #elif __NR_sched_setaffinity != 203
196 #error Wrong code for setaffinity system call.
197 #endif /* __NR_sched_setaffinity */
198 #ifndef __NR_sched_getaffinity
199 #define __NR_sched_getaffinity 204
200 #elif __NR_sched_getaffinity != 204
201 #error Wrong code for getaffinity system call.
202 #endif /* __NR_sched_getaffinity */
203 #elif KMP_ARCH_PPC64
204 #ifndef __NR_sched_setaffinity
205 #define __NR_sched_setaffinity 222
206 #elif __NR_sched_setaffinity != 222
207 #error Wrong code for setaffinity system call.
208 #endif /* __NR_sched_setaffinity */
209 #ifndef __NR_sched_getaffinity
210 #define __NR_sched_getaffinity 223
211 #elif __NR_sched_getaffinity != 223
212 #error Wrong code for getaffinity system call.
213 #endif /* __NR_sched_getaffinity */
214 # elif KMP_ARCH_MIPS
215 # ifndef __NR_sched_setaffinity
216 # define __NR_sched_setaffinity 4239
217 # elif __NR_sched_setaffinity != 4239
218 # error Wrong code for setaffinity system call.
219 # endif /* __NR_sched_setaffinity */
220 # ifndef __NR_sched_getaffinity
221 # define __NR_sched_getaffinity 4240
222 # elif __NR_sched_getaffinity != 4240
223 # error Wrong code for getaffinity system call.
224 # endif /* __NR_sched_getaffinity */
225 # elif KMP_ARCH_MIPS64
226 # ifndef __NR_sched_setaffinity
227 # define __NR_sched_setaffinity 5195
228 # elif __NR_sched_setaffinity != 5195
229 # error Wrong code for setaffinity system call.
230 # endif /* __NR_sched_setaffinity */
231 # ifndef __NR_sched_getaffinity
232 # define __NR_sched_getaffinity 5196
233 # elif __NR_sched_getaffinity != 5196
234 # error Wrong code for getaffinity system call.
235 # endif /* __NR_sched_getaffinity */
236 #elif KMP_ARCH_LOONGARCH64
237 #ifndef __NR_sched_setaffinity
238 #define __NR_sched_setaffinity 122
239 #elif __NR_sched_setaffinity != 122
240 #error Wrong code for setaffinity system call.
241 #endif /* __NR_sched_setaffinity */
242 #ifndef __NR_sched_getaffinity
243 #define __NR_sched_getaffinity 123
244 #elif __NR_sched_getaffinity != 123
245 #error Wrong code for getaffinity system call.
246 #endif /* __NR_sched_getaffinity */
247 #else
248 #error Unknown or unsupported architecture
249 #endif /* KMP_ARCH_* */
250 class KMPNativeAffinity : public KMPAffinity {
251  class Mask : public KMPAffinity::Mask {
252  typedef unsigned char mask_t;
253  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
254 
255  public:
256  mask_t *mask;
257  Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
258  ~Mask() {
259  if (mask)
260  __kmp_free(mask);
261  }
262  void set(int i) override {
263  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
264  }
265  bool is_set(int i) const override {
266  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
267  }
268  void clear(int i) override {
269  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
270  }
271  void zero() override {
272  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
273  mask[i] = 0;
274  }
275  void copy(const KMPAffinity::Mask *src) override {
276  const Mask *convert = static_cast<const Mask *>(src);
277  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
278  mask[i] = convert->mask[i];
279  }
280  void bitwise_and(const KMPAffinity::Mask *rhs) override {
281  const Mask *convert = static_cast<const Mask *>(rhs);
282  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
283  mask[i] &= convert->mask[i];
284  }
285  void bitwise_or(const KMPAffinity::Mask *rhs) override {
286  const Mask *convert = static_cast<const Mask *>(rhs);
287  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
288  mask[i] |= convert->mask[i];
289  }
290  void bitwise_not() override {
291  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
292  mask[i] = ~(mask[i]);
293  }
294  int begin() const override {
295  int retval = 0;
296  while (retval < end() && !is_set(retval))
297  ++retval;
298  return retval;
299  }
300  int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
301  int next(int previous) const override {
302  int retval = previous + 1;
303  while (retval < end() && !is_set(retval))
304  ++retval;
305  return retval;
306  }
307  int get_system_affinity(bool abort_on_error) override {
308  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
309  "Illegal get affinity operation when not capable");
310  int retval =
311  syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
312  if (retval >= 0) {
313  return 0;
314  }
315  int error = errno;
316  if (abort_on_error) {
317  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
318  }
319  return error;
320  }
321  int set_system_affinity(bool abort_on_error) const override {
322  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
323  "Illegal get affinity operation when not capable");
324  int retval =
325  syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
326  if (retval >= 0) {
327  return 0;
328  }
329  int error = errno;
330  if (abort_on_error) {
331  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
332  }
333  return error;
334  }
335  };
336  void determine_capable(const char *env_var) override {
337  __kmp_affinity_determine_capable(env_var);
338  }
339  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
340  KMPAffinity::Mask *allocate_mask() override {
341  KMPNativeAffinity::Mask *retval = new Mask();
342  return retval;
343  }
344  void deallocate_mask(KMPAffinity::Mask *m) override {
345  KMPNativeAffinity::Mask *native_mask =
346  static_cast<KMPNativeAffinity::Mask *>(m);
347  delete native_mask;
348  }
349  KMPAffinity::Mask *allocate_mask_array(int num) override {
350  return new Mask[num];
351  }
352  void deallocate_mask_array(KMPAffinity::Mask *array) override {
353  Mask *linux_array = static_cast<Mask *>(array);
354  delete[] linux_array;
355  }
356  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
357  int index) override {
358  Mask *linux_array = static_cast<Mask *>(array);
359  return &(linux_array[index]);
360  }
361  api_type get_api_type() const override { return NATIVE_OS; }
362 };
363 #endif /* KMP_OS_LINUX */
364 
365 #if KMP_OS_WINDOWS
366 class KMPNativeAffinity : public KMPAffinity {
367  class Mask : public KMPAffinity::Mask {
368  typedef ULONG_PTR mask_t;
369  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
370  mask_t *mask;
371 
372  public:
373  Mask() {
374  mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
375  }
376  ~Mask() {
377  if (mask)
378  __kmp_free(mask);
379  }
380  void set(int i) override {
381  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
382  }
383  bool is_set(int i) const override {
384  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
385  }
386  void clear(int i) override {
387  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
388  }
389  void zero() override {
390  for (int i = 0; i < __kmp_num_proc_groups; ++i)
391  mask[i] = 0;
392  }
393  void copy(const KMPAffinity::Mask *src) override {
394  const Mask *convert = static_cast<const Mask *>(src);
395  for (int i = 0; i < __kmp_num_proc_groups; ++i)
396  mask[i] = convert->mask[i];
397  }
398  void bitwise_and(const KMPAffinity::Mask *rhs) override {
399  const Mask *convert = static_cast<const Mask *>(rhs);
400  for (int i = 0; i < __kmp_num_proc_groups; ++i)
401  mask[i] &= convert->mask[i];
402  }
403  void bitwise_or(const KMPAffinity::Mask *rhs) override {
404  const Mask *convert = static_cast<const Mask *>(rhs);
405  for (int i = 0; i < __kmp_num_proc_groups; ++i)
406  mask[i] |= convert->mask[i];
407  }
408  void bitwise_not() override {
409  for (int i = 0; i < __kmp_num_proc_groups; ++i)
410  mask[i] = ~(mask[i]);
411  }
412  int begin() const override {
413  int retval = 0;
414  while (retval < end() && !is_set(retval))
415  ++retval;
416  return retval;
417  }
418  int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
419  int next(int previous) const override {
420  int retval = previous + 1;
421  while (retval < end() && !is_set(retval))
422  ++retval;
423  return retval;
424  }
425  int set_system_affinity(bool abort_on_error) const override {
426  if (__kmp_num_proc_groups > 1) {
427  // Check for a valid mask.
428  GROUP_AFFINITY ga;
429  int group = get_proc_group();
430  if (group < 0) {
431  if (abort_on_error) {
432  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
433  }
434  return -1;
435  }
436  // Transform the bit vector into a GROUP_AFFINITY struct
437  // and make the system call to set affinity.
438  ga.Group = group;
439  ga.Mask = mask[group];
440  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
441 
442  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
443  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
444  DWORD error = GetLastError();
445  if (abort_on_error) {
446  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
447  __kmp_msg_null);
448  }
449  return error;
450  }
451  } else {
452  if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
453  DWORD error = GetLastError();
454  if (abort_on_error) {
455  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
456  __kmp_msg_null);
457  }
458  return error;
459  }
460  }
461  return 0;
462  }
463  int get_system_affinity(bool abort_on_error) override {
464  if (__kmp_num_proc_groups > 1) {
465  this->zero();
466  GROUP_AFFINITY ga;
467  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
468  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
469  DWORD error = GetLastError();
470  if (abort_on_error) {
471  __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
472  KMP_ERR(error), __kmp_msg_null);
473  }
474  return error;
475  }
476  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
477  (ga.Mask == 0)) {
478  return -1;
479  }
480  mask[ga.Group] = ga.Mask;
481  } else {
482  mask_t newMask, sysMask, retval;
483  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
484  DWORD error = GetLastError();
485  if (abort_on_error) {
486  __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
487  KMP_ERR(error), __kmp_msg_null);
488  }
489  return error;
490  }
491  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
492  if (!retval) {
493  DWORD error = GetLastError();
494  if (abort_on_error) {
495  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
496  KMP_ERR(error), __kmp_msg_null);
497  }
498  return error;
499  }
500  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
501  if (!newMask) {
502  DWORD error = GetLastError();
503  if (abort_on_error) {
504  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
505  KMP_ERR(error), __kmp_msg_null);
506  }
507  }
508  *mask = retval;
509  }
510  return 0;
511  }
512  int get_proc_group() const override {
513  int group = -1;
514  if (__kmp_num_proc_groups == 1) {
515  return 1;
516  }
517  for (int i = 0; i < __kmp_num_proc_groups; i++) {
518  if (mask[i] == 0)
519  continue;
520  if (group >= 0)
521  return -1;
522  group = i;
523  }
524  return group;
525  }
526  };
527  void determine_capable(const char *env_var) override {
528  __kmp_affinity_determine_capable(env_var);
529  }
530  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
531  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
532  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
533  KMPAffinity::Mask *allocate_mask_array(int num) override {
534  return new Mask[num];
535  }
536  void deallocate_mask_array(KMPAffinity::Mask *array) override {
537  Mask *windows_array = static_cast<Mask *>(array);
538  delete[] windows_array;
539  }
540  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
541  int index) override {
542  Mask *windows_array = static_cast<Mask *>(array);
543  return &(windows_array[index]);
544  }
545  api_type get_api_type() const override { return NATIVE_OS; }
546 };
547 #endif /* KMP_OS_WINDOWS */
548 #endif /* KMP_AFFINITY_SUPPORTED */
549 
550 class Address {
551 public:
552  static const unsigned maxDepth = 32;
553  unsigned labels[maxDepth];
554  unsigned childNums[maxDepth];
555  unsigned depth;
556  unsigned leader;
557  Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
558  Address &operator=(const Address &b) {
559  depth = b.depth;
560  for (unsigned i = 0; i < depth; i++) {
561  labels[i] = b.labels[i];
562  childNums[i] = b.childNums[i];
563  }
564  leader = FALSE;
565  return *this;
566  }
567  bool operator==(const Address &b) const {
568  if (depth != b.depth)
569  return false;
570  for (unsigned i = 0; i < depth; i++)
571  if (labels[i] != b.labels[i])
572  return false;
573  return true;
574  }
575  bool isClose(const Address &b, int level) const {
576  if (depth != b.depth)
577  return false;
578  if ((unsigned)level >= depth)
579  return true;
580  for (unsigned i = 0; i < (depth - level); i++)
581  if (labels[i] != b.labels[i])
582  return false;
583  return true;
584  }
585  bool operator!=(const Address &b) const { return !operator==(b); }
586  void print() const {
587  unsigned i;
588  printf("Depth: %u --- ", depth);
589  for (i = 0; i < depth; i++) {
590  printf("%u ", labels[i]);
591  }
592  }
593 };
594 
595 class AddrUnsPair {
596 public:
597  Address first;
598  unsigned second;
599  AddrUnsPair(Address _first, unsigned _second)
600  : first(_first), second(_second) {}
601  AddrUnsPair &operator=(const AddrUnsPair &b) {
602  first = b.first;
603  second = b.second;
604  return *this;
605  }
606  void print() const {
607  printf("first = ");
608  first.print();
609  printf(" --- second = %u", second);
610  }
611  bool operator==(const AddrUnsPair &b) const {
612  if (first != b.first)
613  return false;
614  if (second != b.second)
615  return false;
616  return true;
617  }
618  bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
619 };
620 
621 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
622  const Address *aa = &(((const AddrUnsPair *)a)->first);
623  const Address *bb = &(((const AddrUnsPair *)b)->first);
624  unsigned depth = aa->depth;
625  unsigned i;
626  KMP_DEBUG_ASSERT(depth == bb->depth);
627  for (i = 0; i < depth; i++) {
628  if (aa->labels[i] < bb->labels[i])
629  return -1;
630  if (aa->labels[i] > bb->labels[i])
631  return 1;
632  }
633  return 0;
634 }
635 
636 /* A structure for holding machine-specific hierarchy info to be computed once
637  at init. This structure represents a mapping of threads to the actual machine
638  hierarchy, or to our best guess at what the hierarchy might be, for the
639  purpose of performing an efficient barrier. In the worst case, when there is
640  no machine hierarchy information, it produces a tree suitable for a barrier,
641  similar to the tree used in the hyper barrier. */
642 class hierarchy_info {
643 public:
644  /* Good default values for number of leaves and branching factor, given no
645  affinity information. Behaves a bit like hyper barrier. */
646  static const kmp_uint32 maxLeaves = 4;
647  static const kmp_uint32 minBranch = 4;
653  kmp_uint32 maxLevels;
654 
659  kmp_uint32 depth;
660  kmp_uint32 base_num_threads;
661  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
662  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
663  // 2=initialization in progress
664  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
665 
670  kmp_uint32 *numPerLevel;
671  kmp_uint32 *skipPerLevel;
672 
673  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
674  int hier_depth = adr2os[0].first.depth;
675  int level = 0;
676  for (int i = hier_depth - 1; i >= 0; --i) {
677  int max = -1;
678  for (int j = 0; j < num_addrs; ++j) {
679  int next = adr2os[j].first.childNums[i];
680  if (next > max)
681  max = next;
682  }
683  numPerLevel[level] = max + 1;
684  ++level;
685  }
686  }
687 
688  hierarchy_info()
689  : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
690 
691  void fini() {
692  if (!uninitialized && numPerLevel) {
693  __kmp_free(numPerLevel);
694  numPerLevel = NULL;
695  uninitialized = not_initialized;
696  }
697  }
698 
699  void init(AddrUnsPair *adr2os, int num_addrs) {
700  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
701  &uninitialized, not_initialized, initializing);
702  if (bool_result == 0) { // Wait for initialization
703  while (TCR_1(uninitialized) != initialized)
704  KMP_CPU_PAUSE();
705  return;
706  }
707  KMP_DEBUG_ASSERT(bool_result == 1);
708 
709  /* Added explicit initialization of the data fields here to prevent usage of
710  dirty value observed when static library is re-initialized multiple times
711  (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
712  OpenMP). */
713  depth = 1;
714  resizing = 0;
715  maxLevels = 7;
716  numPerLevel =
717  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
718  skipPerLevel = &(numPerLevel[maxLevels]);
719  for (kmp_uint32 i = 0; i < maxLevels;
720  ++i) { // init numPerLevel[*] to 1 item per level
721  numPerLevel[i] = 1;
722  skipPerLevel[i] = 1;
723  }
724 
725  // Sort table by physical ID
726  if (adr2os) {
727  qsort(adr2os, num_addrs, sizeof(*adr2os),
728  __kmp_affinity_cmp_Address_labels);
729  deriveLevels(adr2os, num_addrs);
730  } else {
731  numPerLevel[0] = maxLeaves;
732  numPerLevel[1] = num_addrs / maxLeaves;
733  if (num_addrs % maxLeaves)
734  numPerLevel[1]++;
735  }
736 
737  base_num_threads = num_addrs;
738  for (int i = maxLevels - 1; i >= 0;
739  --i) // count non-empty levels to get depth
740  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
741  depth++;
742 
743  kmp_uint32 branch = minBranch;
744  if (numPerLevel[0] == 1)
745  branch = num_addrs / maxLeaves;
746  if (branch < minBranch)
747  branch = minBranch;
748  for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
749  while (numPerLevel[d] > branch ||
750  (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
751  if (numPerLevel[d] & 1)
752  numPerLevel[d]++;
753  numPerLevel[d] = numPerLevel[d] >> 1;
754  if (numPerLevel[d + 1] == 1)
755  depth++;
756  numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
757  }
758  if (numPerLevel[0] == 1) {
759  branch = branch >> 1;
760  if (branch < 4)
761  branch = minBranch;
762  }
763  }
764 
765  for (kmp_uint32 i = 1; i < depth; ++i)
766  skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
767  // Fill in hierarchy in the case of oversubscription
768  for (kmp_uint32 i = depth; i < maxLevels; ++i)
769  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
770 
771  uninitialized = initialized; // One writer
772  }
773 
774  // Resize the hierarchy if nproc changes to something larger than before
775  void resize(kmp_uint32 nproc) {
776  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
777  while (bool_result == 0) { // someone else is trying to resize
778  KMP_CPU_PAUSE();
779  if (nproc <= base_num_threads) // happy with other thread's resize
780  return;
781  else // try to resize
782  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
783  }
784  KMP_DEBUG_ASSERT(bool_result != 0);
785  if (nproc <= base_num_threads)
786  return; // happy with other thread's resize
787 
788  // Calculate new maxLevels
789  kmp_uint32 old_sz = skipPerLevel[depth - 1];
790  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
791  // First see if old maxLevels is enough to contain new size
792  for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
793  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
794  numPerLevel[i - 1] *= 2;
795  old_sz *= 2;
796  depth++;
797  }
798  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
799  while (nproc > old_sz) {
800  old_sz *= 2;
801  incs++;
802  depth++;
803  }
804  maxLevels += incs;
805 
806  // Resize arrays
807  kmp_uint32 *old_numPerLevel = numPerLevel;
808  kmp_uint32 *old_skipPerLevel = skipPerLevel;
809  numPerLevel = skipPerLevel = NULL;
810  numPerLevel =
811  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
812  skipPerLevel = &(numPerLevel[maxLevels]);
813 
814  // Copy old elements from old arrays
815  for (kmp_uint32 i = 0; i < old_maxLevels;
816  ++i) { // init numPerLevel[*] to 1 item per level
817  numPerLevel[i] = old_numPerLevel[i];
818  skipPerLevel[i] = old_skipPerLevel[i];
819  }
820 
821  // Init new elements in arrays to 1
822  for (kmp_uint32 i = old_maxLevels; i < maxLevels;
823  ++i) { // init numPerLevel[*] to 1 item per level
824  numPerLevel[i] = 1;
825  skipPerLevel[i] = 1;
826  }
827 
828  // Free old arrays
829  __kmp_free(old_numPerLevel);
830  }
831 
832  // Fill in oversubscription levels of hierarchy
833  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
834  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
835 
836  base_num_threads = nproc;
837  resizing = 0; // One writer
838  }
839 };
840 #endif // KMP_AFFINITY_H