#!/usr/bin/env python

# Arguments are a list of process ids or executable names.  Threads in
# the processes are mapped round robin to the list of cores.

# If you are really using MPI it may be worth leaving a core free for
#  any additional threads created by MPI/IB etc.  To do this just
#  delete a core from cores[]

import sys,os,subprocess

# Wait for the processes to have created all of their threads
os.system('sleep 1')

# On dual-socket Sandybridge (8 real cores per socket) both of these
# alernating mapping of threads to cores are a lot slower
# ... basically U want threads collaborating on a socket.
#cores = [1,9,2,10,3,11,4,12,5,13,6,14,7,15,8,0]
#cores = [0,1,2,3,  8,9,10,11,  4,5,6,7,  12,13,14,15]

cores = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
ncores = len(cores)

pids = sys.argv[1:]

core=0

for p in pids:
    plist = [p]
    try:
        q = int(pids[0])
    except:
        cmd = "pgrep %s" % p
        plist = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True).strip().split()

    for pid in plist:
        cmd = "ps -mo tid -p %s | egrep -v 'T|-'" % pid
        res = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True).strip().split()
        for tid in res:
            cmd = 'taskset -p -c %d %s' %(cores[core], tid)
            try:
                junk = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
            except:
                print 'failed binding thread',tid
            core = (core + 1)%ncores
