'Pinning pthread to certain core on Odroid XU3 running Ubunutu in C++

I am dividing workload between big and LITTLE cores on the ARM based Odroid XU3 platform by assigning varying workloads to pthreads and then pin these pthreads to certain cores.

So far I have used lstopo in the terminal to check for available cores. Then I wrote a small test program in C++ based on this blog post by Eli Bendersky https://eli.thegreenplace.net/2016/c11-threads-affinity-and-hyperthreading/ and the answear how to set CPU affinity of a particular pthread? where I tried to pin pthreads to big or LITTLE cores with pthread_setaffinity_np(). I then use sched_getcpu() to verify this by spot checking.

On my homogeneous Intel based labtop the testprogram reported that threads was pinned as expected, but on the Odroid XU3 platform the same pthread disregarded the call to pthread_setaffinity_np(). I suppose the pthreads where assigned to cores as the linux scheduler seemed fit. Right now I am stuck with Ubuntu 16.04.6 but this might change.

The end goal is to find an optimal static workload distribution between big and LITTLE cores. Before I conclude that my only option is to start digging in the Linux kernel source and write new modules, as suggested here for someone who wanted to prevent preemptions pinning a pthread to a single core is there some other API or some other method I could try (using pthreads)?

The testprogram:

#include <algorithm>
#include <chrono>
#include <iostream>
#include <iomanip>
#include <mutex>
#include <pthread.h>
#include <vector>
#include <sys/types.h>
#include <unistd.h>
#include <sys/syscall.h>

pthread_mutex_t cout_mutex;

struct thread_args
{
  int i;
  const char** argv;
};

int stick_this_thread_to_core(int core_id) {
   int num_cores = sysconf(_SC_NPROCESSORS_ONLN);
   if (core_id < 0 || core_id >= num_cores)
      return EINVAL;

   cpu_set_t cpuset;
   CPU_ZERO(&cpuset);
   CPU_SET(core_id, &cpuset);

   pthread_t current_thread = pthread_self();    
   return pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset);
}

void print_vector(std::vector<int> cpus)
{
  std::cout << "Executed on CPUs: ";
  for(const auto& cpu : cpus)
  {
    std::cout << cpu << ", ";
  }
}

void log_cpu(int current_cpu, std::vector<int> &cpus)
{
  bool flag_exist {false};
  for(int old_cpu : cpus)
  {
    if(old_cpu == current_cpu)
      flag_exist = true;
  } 
  if(!flag_exist)
    cpus.push_back(current_cpu);
}


void* thread_func(void* args)
{
  thread_args* arguments = (thread_args*) args;
  const char** argv = arguments->argv;
  int i = arguments->i;

  int err = stick_this_thread_to_core(i);
  if(err != 0)
    std::cout << "Set affinity error: " << err << "\n";

  std::vector<int> cpus {};
  
  using hires_clock = std::chrono::high_resolution_clock;
  using duration_ms = std::chrono::duration<double, std::milli>;
  
  auto t1 = hires_clock::now();
  long long int count = 0;
  while(count != std::stoll(argv[2]))
  {
    log_cpu(sched_getcpu(), cpus);
    count++;
  }
  auto t2 = hires_clock::now();
  
  pthread_mutex_lock(&cout_mutex);
  pid_t x = syscall(__NR_gettid);
  std::cout << "Thread #" << x << ": on CPU " << std::setw(2);
  std::cout << sched_getcpu();
  std::cout << std::setw(0) << " ";
  std::cout << "elapsed: " << std::setw(12) << duration_ms(t2 - t1).count();
  std::cout << std::setw(0) << " ms "  << "requested cpu: ";
  std::cout << std::setw(2) << i << std::setw(0) << " "; 
  print_vector(cpus);
  std::cout << "\n";
  pthread_mutex_unlock(&cout_mutex);
  return 0;
}


int main(int argc, const char** argv) {
  if(argc != 3)
  {
    std::cout << "Usage: ./bin num_cpus num_iterations" << std::endl;
    return 0;
  }
  int num_threads = std::stoi(argv[1]);
  pthread_t threads[num_threads];
  thread_args arguments[num_threads];

  pthread_mutex_init(&cout_mutex, NULL);

  for(int i = 0; i < num_threads; i++)
  {
    arguments[i].i = i;
    arguments[i].argv = argv;
  } 

  for(int i = 0; i < num_threads; i++)
  {
    void* arg = &arguments[i];
    if(pthread_create(&threads[i], NULL, thread_func, arg) != 0)
      std::cout << "pthread create error\n"; 
  } 

  for(int i = 0; i < num_threads; i++)
  {
    if(pthread_join(threads[i], NULL) != 0)
      std::cout << "pthread join error\n";
  } 

  pthread_mutex_destroy(&cout_mutex);

  return 0;
}


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source