Evo malo preradjeni test, detektuje broj procesora i dize broj threadova po tome,
ali i alocira paralelno i setuje afinitet po procesoru tako da ako je malloc numa aware
ovo bi trebalo da sljaka ok i kad je numa. Nisam bas hteo da koristim libnuma,
to bi bilo preterivanje za ovo ;)
Code:
#include <stdlib.h>
#include <semaphore.h>
#include <pthread.h>
#include <stdio.h>
#define BLCK_SIZE 16777216
sem_t sema,sema1;
typedef struct Data{ int blck_size,cpu; } Data_t;
void* tf(void* p)
{
Data_t* dt = (Data_t*)p;
int *mem=NULL;
cpu_set_t set;
CPU_ZERO(&set);
CPU_SET(dt->cpu,&set);
if(sched_setaffinity(0,sizeof(cpu_set_t),&set)<0)
printf("set affinity failed cpu %d\n",dt->cpu);
printf("set affinity cpu %d\n",dt->cpu);
mem = malloc(dt->blck_size*sizeof(int));
if(!mem)return 0;
int j = 80;
while(j--)
{
int i = 0;
unsigned long long sum=0;
for(;i<dt->blck_size;++i)
{
*(mem+i) = i;
}
for(i=0;i<dt->blck_size;++i)
{
sum += mem[i];
}
if(j%20==0)
{
printf("thread %lu, sum %llu\n",(unsigned long)pthread_self(),sum);
sem_post(&sema);
sem_wait(&sema1);
}
}
free(mem);
return 0;
}
static void waitT(int count)
{
while(count--)
{
sem_wait(&sema);
}
}
static void postT(int count)
{
while(count--)
{
sem_post(&sema1);
}
}
int main (int argc,char* argv[])
{
int single=0;
int numcpus=0,i;
pthread_t* tid=NULL;
cpu_set_t set;
if(argc>1)single=1;
sem_init(&sema,0,0);
sem_init(&sema1,0,0);
CPU_ZERO(&set);
CPU_SET(0,&set);
while(sched_setaffinity(0,sizeof(cpu_set_t),&set)==0)
{
++numcpus;
CPU_ZERO(&set);
CPU_SET(numcpus,&set);
}
printf("numcpus %d\n",numcpus);
tid = calloc(numcpus,sizeof(pthread_t));
Data_t dt[numcpus];
if(single)numcpus=1;
for(i=0;i<numcpus;++i)
{
dt[i].blck_size = BLCK_SIZE*4/numcpus;
dt[i].cpu = i;
pthread_create(tid+i,NULL,tf,dt+i);
}
waitT(numcpus);
printf ("25 percent done:\n");
postT(numcpus);
waitT(numcpus);
printf("50 percent done:\n");
postT(numcpus);
waitT(numcpus);
printf("75 percent done:\n");
postT(numcpus);
waitT(numcpus);
printf("100 percent done - Test completed!\n");
postT(numcpus);
for(i=0;i<numcpus;++i)
pthread_join(tid[i],NULL);
free(tid);
sem_destroy(&sema);
sem_destroy(&sema1);
return 0;
}
Kompajlira se sa:
gcc -Wall -O2 -D_GNU_SOURCE -lpthread ime.c -o ime
mora _GNU_SOURCE zbog afinitija, ako nekome treba moze dodati i #include <sched.h>
mada meni nije bilo potrebno
bmaxa@maxa:~$ time ./omp 1
numcpus 2
set affinity cpu 0
thread 1092589904, sum 2251799780130816
25 percent done:
thread 1092589904, sum 2251799780130816
50 percent done:
thread 1092589904, sum 2251799780130816
75 percent done:
thread 1092589904, sum 2251799780130816
100 percent done - Test completed!
real 0m9.828s
user 0m9.641s
sys 0m0.144s
bmaxa@maxa:~$ time ./omp
numcpus 2
set affinity cpu 0
set affinity cpu 1
thread 1106848080, sum 562949936644096
thread 1115240784, sum 562949936644096
25 percent done:
thread 1106848080, sum 562949936644096
thread 1115240784, sum 562949936644096
50 percent done:
thread 1115240784, sum 562949936644096
thread 1106848080, sum 562949936644096
75 percent done:
thread 1115240784, sum 562949936644096
thread 1106848080, sum 562949936644096
100 percent done - Test completed!
real 0m8.669s
user 0m15.941s
sys 0m0.160s
edit: Dodao sam i memory read test osim samo write-a a ko hoce moze modifikovati da radi i sa
razlicitim velicinama blokova, stavio sam da se velicina rasporedi po threadovima itd
edit2: potkrala mi se greska morao sam da bacim razilicite data strukture za svaki thread ;)
dodao sam i mogucnost ako se baci neki parametar onda program racuna u samo jednom threadu.
Pozdrav!
[Ovu poruku je menjao Branimir Maksimovic dana 11.11.2008. u 06:25 GMT+1]
[Ovu poruku je menjao Branimir Maksimovic dana 11.11.2008. u 09:21 GMT+1]