OSTEP第29章的問題解答

問題:

Questions

  1. We’ll start by redoing the measurements within this chapter. Use
    the call gettimeofday() to measure time within your program.
    How accurate is this timer? What is the smallest interval it can measure? Gain confidence in its workings, as we will need it in all subsequent questions. You can also look into other timers, such as the
    cycle counter available on x86 via the rdtsc instruction.
  2. Now, build a simple concurrent counter and measure how long it
    takes to increment the counter many times as the number of threads
    increases. How many CPUs are available on the system you are
    using? Does this number impact your measurements at all?
  3. Next, build a version of the sloppy counter. Once again, measure its
    performance as the number of threads varies, as well as the threshold. Do the numbers match what you see in the chapter?
#----------------------------------
# 1 cpus
[root@localhost test]# ./a.out -C simple -l 1000000 -t 1 -c 0 
time: 0.026906
[root@localhost test]# ./a.out -C simple -l 1000000  -t 2 -c 0,0
time: 0.020573
[root@localhost test]# ./a.out -C simple -l 1000000  -t 3 -c 0,0,0
time: 0.020608
[root@localhost test]# ./a.out -C simple -l 1000000  -t 4 -c 0,0,0,0
time: 0.024155


# 2 cpus 

[root@localhost test]# ./a.out -C simple -l 1000000 -t 1 -c 0x3 
time: 0.024874
[root@localhost test]# ./a.out -C simple -l 1000000  -t 2 -c 0x3,0x3
time: 0.020580
[root@localhost test]# ./a.out -C simple -l 1000000  -t 3 -c 0x3,0x3,0x3
time: 0.020614
[root@localhost test]# ./a.out -C simple -l 1000000  -t 4 -c 0x3,0x3,0x3,0x3
time: 0.023882


# 3 cpus 

[root@localhost test]# ./a.out -C simple -l 1000000 -t 1 -c 0x7 
time: 0.026345
[root@localhost test]# ./a.out -C simple -l 1000000  -t 2 -c 0x7,0x7
time: 0.020492
[root@localhost test]# ./a.out -C simple -l 1000000  -t 3 -c 0x7,0x7,0x7
time: 0.020479
[root@localhost test]# ./a.out -C simple -l 1000000  -t 4 -c 0x7,0x7,0x7,0x7
time: 0.025776


# 4 cpus 

[root@localhost test]# # 4 cpus 
[root@localhost test]# ./a.out -C simple -l 1000000 -t 1 -c 0xf 
time: 0.025357
[root@localhost test]# ./a.out -C simple -l 1000000  -t 2 -c 0xf,0xf
time: 0.020473
[root@localhost test]# ./a.out -C simple -l 1000000  -t 3 -c 0xf,0xf,0xf
time: 0.020487
[root@localhost test]# ./a.out -C simple -l 1000000  -t 4 -c 0xf,0xf,0xf,0xf
time: 0.027208


#----------------------------------
# scalable:1, 1 cpus

[root@localhost test]# ./a.out -C scalable:1 -l 1000000 -t 1  -c 0x1 
time: 0.035701
[root@localhost test]# ./a.out -C scalable:1 -l 1000000 -t 2  -c 0x1,0x1
time: 0.032167
[root@localhost test]# ./a.out -C scalable:1 -l 1000000 -t 3  -c 0x1,0x1,0x1
time: 0.032740
[root@localhost test]# ./a.out -C scalable:1 -l 1000000 -t 4  -c 0x1,0x1,0x1,0x1
time: 0.036457


# scalable:2, 1 cpus

[root@localhost test]# ./a.out -C scalable:2 -l 1000000 -t 1  -c 0x1 
time: 0.032058
[root@localhost test]# ./a.out -C scalable:2 -l 1000000 -t 2  -c 0x1,0x1
time: 0.029418
[root@localhost test]# ./a.out -C scalable:2 -l 1000000 -t 3  -c 0x1,0x1,0x1
time: 0.029760
[root@localhost test]# ./a.out -C scalable:2 -l 1000000 -t 4  -c 0x1,0x1,0x1,0x1
time: 0.033746


# scalable:3, 1 cpus

[root@localhost test]# ./a.out -C scalable:3 -l 1000000 -t 1  -c 0x1 
time: 0.035122
[root@localhost test]# ./a.out -C scalable:3 -l 1000000 -t 2  -c 0x1,0x1
time: 0.028383
[root@localhost test]# ./a.out -C scalable:3 -l 1000000 -t 3  -c 0x1,0x1,0x1
time: 0.028425
[root@localhost test]# ./a.out -C scalable:3 -l 1000000 -t 4  -c 0x1,0x1,0x1,0x1
time: 0.033497


# scalable:4, 1 cpus

[root@localhost test]# ./a.out -C scalable:4 -l 1000000 -t 1  -c 0x1 
time: 0.032937
[root@localhost test]# ./a.out -C scalable:4 -l 1000000 -t 2  -c 0x1,0x1
time: 0.027356
[root@localhost test]# ./a.out -C scalable:4 -l 1000000 -t 3  -c 0x1,0x1,0x1
time: 0.027314
[root@localhost test]# ./a.out -C scalable:4 -l 1000000 -t 4  -c 0x1,0x1,0x1,0x1
time: 0.034496


#----------------------------------
# scalable:1, 4 cpus
                         
[root@localhost test]# ./a.out -C scalable:1 -l 1000000 -t 1  -c 0xf
time: 0.036803
[root@localhost test]# ./a.out -C scalable:1 -l 1000000 -t 2  -c 0xf,0xf
time: 0.032622
[root@localhost test]# ./a.out -C scalable:1 -l 1000000 -t 3  -c 0xf,0xf,0xf
time: 0.032171
[root@localhost test]# ./a.out -C scalable:1 -l 1000000 -t 4  -c 0xf,0xf,0xf,0xf
time: 0.036665

						 
# scalable:2, 4 cpus
  
[root@localhost test]# ./a.out -C scalable:2 -l 1000000 -t 1  -c 0xf
time: 0.032224
[root@localhost test]# ./a.out -C scalable:2 -l 1000000 -t 2  -c 0xf,0xf
time: 0.029285
[root@localhost test]# ./a.out -C scalable:2 -l 1000000 -t 3  -c 0xf,0xf,0xf
time: 0.029446
[root@localhost test]# ./a.out -C scalable:2 -l 1000000 -t 4  -c 0xf,0xf,0xf,0xf
time: 0.030861

  
# scalable:3, 4 cpus

[root@localhost test]# ./a.out -C scalable:3 -l 1000000 -t 1  -c 0xf
time: 0.031900
[root@localhost test]# ./a.out -C scalable:3 -l 1000000 -t 2  -c 0xf,0xf
time: 0.028310
[root@localhost test]# ./a.out -C scalable:3 -l 1000000 -t 3  -c 0xf,0xf,0xf
time: 0.028338
[root@localhost test]# ./a.out -C scalable:3 -l 1000000 -t 4  -c 0xf,0xf,0xf,0xf
time: 0.029111

                         
# scalable:4, 4 cpus
                         
[root@localhost test]# ./a.out -C scalable:4 -l 1000000 -t 1  -c 0xf
time: 0.031149
[root@localhost test]# ./a.out -C scalable:4 -l 1000000 -t 2  -c 0xf,0xf
time: 0.027425
[root@localhost test]# ./a.out -C scalable:4 -l 1000000 -t 3  -c 0xf,0xf,0xf
time: 0.027471
[root@localhost test]# ./a.out -C scalable:4 -l 1000000 -t 4  -c 0xf,0xf,0xf,0xf
time: 0.027432

-------------
# scalable:1, 4 cpus sperated

[root@localhost test]# ./a.out -C scalable:1 -l 1000000 -t 1  -c 0 
time: 0.038303
[root@localhost test]# ./a.out -C scalable:1 -l 1000000 -t 2  -c 0,1
time: 0.075887
[root@localhost test]# ./a.out -C scalable:1 -l 1000000 -t 3  -c 0,1,2
time: 0.123658
[root@localhost test]# ./a.out -C scalable:1 -l 1000000 -t 4  -c 0,1,2,3
time: 0.123024

                         
# scalable:2, 4 cpus sperated

[root@localhost test]# ./a.out -C scalable:2 -l 1000000 -t 1  -c 0 
time: 0.029121
[root@localhost test]# ./a.out -C scalable:2 -l 1000000 -t 2  -c 0,1
time: 0.059742
[root@localhost test]# ./a.out -C scalable:2 -l 1000000 -t 3  -c 0,1,2
time: 0.112505
[root@localhost test]# ./a.out -C scalable:2 -l 1000000 -t 4  -c 0,1,2,3
time: 0.110705

                      
# scalable:3, 4 cpus sperated

[root@localhost test]# ./a.out -C scalable:3 -l 1000000 -t 1  -c 0 
time: 0.028052
You have mail in /var/spool/mail/root
[root@localhost test]# ./a.out -C scalable:3 -l 1000000 -t 2  -c 0,1
time: 0.071130
[root@localhost test]# ./a.out -C scalable:3 -l 1000000 -t 3  -c 0,1,2
time: 0.112936
[root@localhost test]# ./a.out -C scalable:3 -l 1000000 -t 4  -c 0,1,2,3
time: 0.105164

                         
# scalable:4, 4 cpus sperated

[root@localhost test]# ./a.out -C scalable:4 -l 1000000 -t 1  -c 0 
time: 0.032684
[root@localhost test]# ./a.out -C scalable:4 -l 1000000 -t 2  -c 0,1
time: 0.077285
[root@localhost test]# ./a.out -C scalable:4 -l 1000000 -t 3  -c 0,1,2
time: 0.096257
[root@localhost test]# ./a.out -C scalable:4 -l 1000000 -t 4  -c 0,1,2,3
time: 0.102119

main.c 源碼內容:

#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>


#include <assert.h>
#include <sched.h>
#include <pthread.h>
#include <unistd.h>

#if 0	/* for debug only */
#define __malloc(size) ({ \
	void *__ptr__ = malloc(size); \
	printf("[ALLOC] %32s:%4d | addr= %p, size= %lu, expr= `%s`\n" \
		, __FUNCTION__, __LINE__ \
		, __ptr__, size \
		, #size); \
	__ptr__; \
})

#define __free(ptr) ({ \
	printf("[ FREE] %32s:%4d | addr= %p, expr= `%s`\n" \
		, __FUNCTION__, __LINE__ \
		, ptr\
		, #ptr); \
	free(ptr); \
})
#else
#define __malloc(size)	malloc(size)
#define __free(ptr)		free(ptr)
#endif

#if 0	/* for debug only */
#define ASSERT(expr)	assert(expr)
#else
#define ASSERT(expr)	
#endif
/* ----------------------------- */
int thread_create(pthread_t *thread_id, void *(*routine)(), void *arg) {
	int error_code;
	error_code = pthread_create(thread_id, NULL, routine, arg);
	ASSERT(0 == error_code);
	return error_code;
}

int thread_join(pthread_t thread_id, void **value_returned) {
	int error_code;
	error_code = pthread_join(thread_id, value_returned);
	ASSERT(0 == error_code);
	return error_code;
}

int thread_set_affinity(pthread_t thread_id, const cpu_set_t *cpu_set) {
/* const cpu_set_t *cpu_set); */
	int logic_cpu_count = sysconf(_SC_NPROCESSORS_ONLN); 
	int i; 
	cpu_set_t cpu_set_running;
	CPU_ZERO(&cpu_set_running);
	for (i = 0; i < logic_cpu_count; ++i) {
		CPU_SET(i, &cpu_set_running);
	}
	CPU_AND(&cpu_set_running, &cpu_set_running, cpu_set);

	if (CPU_COUNT(&cpu_set_running) <= 0) {
		return 0;
	}

	int error_code;
	error_code = pthread_setaffinity_np(thread_id, sizeof(cpu_set_t), &cpu_set_running); 
	ASSERT(0 == error_code);
	return error_code;
}


/* ----------------------------- */

int mutex_setup(pthread_mutex_t *lock) {
	int error_code;
	error_code = pthread_mutex_init(lock, NULL);
	ASSERT(0 == error_code);
	return error_code;
}

int mutex_cleanup(pthread_mutex_t *lock) { 
	int error_code;
	error_code = pthread_mutex_destroy(lock);
	ASSERT(0 == error_code);
	return error_code;
}

int mutex_lock(pthread_mutex_t *lock) {
	int error_code;
	error_code = pthread_mutex_lock(lock);
	ASSERT(0 == error_code);
	return error_code;
}

int mutex_unlock(pthread_mutex_t *lock) {
	int error_code;
	error_code = pthread_mutex_unlock(lock);
	ASSERT(0 == error_code);
	return error_code;
}


/* ----------------------------- */
int barrier_setup(pthread_barrier_t *barrier, unsigned count) {
	int error_code;
	error_code = pthread_barrier_init(barrier, NULL, count);
	ASSERT(0 == error_code);
	return error_code;
}
int barrier_cleanup(pthread_barrier_t *barrier) {
	int error_code;
	error_code = pthread_barrier_destroy(barrier);
	ASSERT(0 == error_code);
	return error_code;
}

int barrier_wait(pthread_barrier_t *barrier) {
	int error_code;
	error_code = pthread_barrier_wait(barrier);
	return error_code;
}

/* ----------------------------- */
struct simple_counter {
	int counter; 
	pthread_mutex_t lock;
};

int simple_counter_setup(struct simple_counter *counter) {
	counter->counter = 0;
	mutex_setup(&(counter->lock));	
	return 0;
}

void simple_counter_cleanup(struct simple_counter *counter) {
	counter->counter = 0;
	mutex_cleanup(&(counter->lock));	
	
}

int simple_counter_update(void *counter, int increment) {
	struct simple_counter *simple_counter = (struct simple_counter *)counter;
	mutex_lock(&(simple_counter->lock));
	simple_counter->counter += increment;	
	mutex_unlock(&(simple_counter->lock));
	return 0;
}

int simple_counter_get(void *counter) {
	struct simple_counter *simple_counter = (struct simple_counter *)counter;
	int value;
	mutex_lock(&(simple_counter->lock));
	value = simple_counter->counter;	
	mutex_unlock(&(simple_counter->lock));
	return value;
}

struct simple_counter * simple_counter_create() {
	struct simple_counter *new_counter;
	new_counter = __malloc(sizeof(struct simple_counter));
	if (NULL == new_counter) {
		return NULL;
	}
	if (0 != simple_counter_setup(new_counter)) {
		goto fail_alloc;
	}
	return new_counter;

fail_alloc:
	__free(new_counter);
	return NULL;

}

void simple_counter_destroy(void *counter) {
	if (NULL == counter) {
		return;
	}
	struct simple_counter *simple_counter = (struct simple_counter *)counter;
	simple_counter_cleanup(simple_counter);
	__free(simple_counter);
}

/* ----------------------------- */
#define SCALABLE_COUNTER_SLOT_COUNT (16U)
#define SCALABLE_COUNTER_GET_SLOT(thread_id) ((unsigned int)(thread_id) & (SCALABLE_COUNTER_SLOT_COUNT -1U))

struct scalable_counter {
	int global_counter; 
	pthread_mutex_t global_lock;
	int thread_counters[SCALABLE_COUNTER_SLOT_COUNT]; 
	pthread_mutex_t thread_locks[SCALABLE_COUNTER_SLOT_COUNT];
	int update_threshold;
};

int scalable_counter_setup(struct scalable_counter *counter, int update_threshold) {
	counter->update_threshold = update_threshold;
	counter->global_counter = 0;
	mutex_setup(&(counter->global_lock));
	int i; 
	for (i = 0; i < SCALABLE_COUNTER_SLOT_COUNT; ++i) {
		counter->thread_counters[i]= 0;
		mutex_setup(&(counter->thread_locks[i]));
	}
	return 0;
}

void scalable_counter_cleanup(struct scalable_counter *counter) {
	counter->global_counter = 0;
	mutex_cleanup(&(counter->global_lock));	
	
	int i; 
	for (i = 0; i < SCALABLE_COUNTER_SLOT_COUNT; ++i) {
		counter->thread_counters[i]= 0;
		mutex_cleanup(&(counter->thread_locks[i]));
	}
}

int scalable_counter_update(void *counter, int increment) {
	struct scalable_counter *scalable_counter = (struct scalable_counter *)counter;
	unsigned int slot = SCALABLE_COUNTER_GET_SLOT(pthread_self());

	mutex_lock(&(scalable_counter->thread_locks[slot]));
	scalable_counter->thread_counters[slot] += increment;
	if (scalable_counter->thread_counters[slot] > scalable_counter->update_threshold) {
		mutex_lock(&(scalable_counter->global_lock));
		scalable_counter->global_counter += scalable_counter->thread_counters[slot];
		mutex_unlock(&(scalable_counter->global_lock));
		scalable_counter->thread_counters[slot] = 0;
	}
	mutex_unlock(&(scalable_counter->thread_locks[slot]));
}

int scalable_counter_get(void *counter) {
	struct scalable_counter *scalable_counter = (struct scalable_counter *)counter;
	int value;
	unsigned int slot;
	for (slot = 0; slot < SCALABLE_COUNTER_SLOT_COUNT; ++slot) {
		mutex_lock(&(scalable_counter->thread_locks[slot]));
		mutex_lock(&(scalable_counter->global_lock));
		scalable_counter->global_counter += scalable_counter->thread_counters[slot];
		mutex_unlock(&(scalable_counter->global_lock));
		scalable_counter->thread_counters[slot] = 0;
		mutex_unlock(&(scalable_counter->thread_locks[slot]));
	}
	mutex_lock(&(scalable_counter->global_lock));
	value = scalable_counter->global_counter;
	mutex_unlock(&(scalable_counter->global_lock));

	return value;
}

struct scalable_counter * scalable_counter_create(int update_threshold) {
	struct scalable_counter *new_counter;
	new_counter = __malloc(sizeof(struct scalable_counter));
	if (NULL == new_counter) {
		return NULL;
	}
	if (0 != scalable_counter_setup(new_counter, update_threshold)) {
		goto fail_alloc;
	}
	return new_counter;

fail_alloc:
	__free(new_counter);
	return NULL;

}

void scalable_counter_destroy(void *counter) {
	if (NULL == counter) {
		return;
	}
	struct scalable_counter *scalable_counter = (struct scalable_counter *)counter;
	scalable_counter_cleanup(scalable_counter);
	__free(scalable_counter);
}


/* ----------------------------- */
#include <sys/time.h>

struct thread_info {
	int index;
	pthread_t tid; 
	cpu_set_t cpu_set;
};

struct app {
	int loop_count;	
	int thread_count;
	struct thread_info *threads;
	struct timeval time_start;
	struct timeval time_stop;
	pthread_barrier_t barrier_start;
	void (*counter_destroy)(void *counter);
	int (*counter_update)(void *counter, int increment);
	int (*counter_get)(void *counter);
	void *counter;
};

int app_setup(struct app *app) {
	app->loop_count = 1;
	app->thread_count = 1;
	app->threads = NULL;
	/* ----------------- */
	/* ----------------- */
	app->counter_update = NULL;
	app->counter_destroy= NULL;
	app->counter_get= NULL;
	app->counter = NULL;
	return 0;
}

void app_cleanup(struct app *app) {
	__free(app->threads);
	app->thread_count = 0;
	app->loop_count = 0;

	barrier_cleanup(&(app->barrier_start)); 
	/* ----------------- */
	if (NULL != app->counter
	&&  NULL != app->counter_destroy) {
		(app->counter_destroy)(app->counter);
	}

	app->counter_update = NULL;
	app->counter_destroy= NULL;
	app->counter_get= NULL;
}

int app_dump(struct app *app) {
	printf("loop count: %d\n", app->loop_count);
	printf("thread count: %d\n", app->thread_count);
	
	int i; 
	for (i = 0; i < app->thread_count; ++i) { 
		char buffer[512] = {0};
		cpu_set_to_string(buffer, sizeof(buffer), &(app->threads[i].cpu_set));
		printf("  threads[%d].cpu_set = %s\n", i, buffer); 
	}
	return 0;
}


void usage_show(const char *program_name);
int app_parse_loop_count(struct app *app, const char *argument_string);
int app_parse_thread_count(struct app *app, const char *argument_string);
int app_parse_thread_cpu(struct app *app, const char *argument_string);

int app_parse(struct app *app, int argc, char **argv) {
#define OPTION_STRING "C:l:t:c:h"
	int c;
	extern char *optarg;
	extern int optind, opterr, optopt;
	while(-1 != (c = getopt(argc, argv, OPTION_STRING))) {
		switch(c) {
			case 'C':
				if (0 != app_parse_counter(app, optarg)) {
					goto fail_parse;
				}
				break; 
			case 'l':
				if (0 != app_parse_loop_count(app, optarg)) {
					goto fail_parse;
				}
				break; 
			case 't':
				if (0 != app_parse_thread_count(app, optarg)) {
					goto fail_parse;
				}
				break; 
			case 'c':
				if (0 != app_parse_thread_cpu(app, optarg)) {
					goto fail_parse;
				}
					
				break; 
			case 'h':
			default:
				usage_show(argv[0]);
				return 0;
				break; 
		}
	}

	if (NULL == app->threads) {
		printf("shoud setup the thread with option -t\n");
		usage_show(argv[0]);
		goto fail_parse;
	}


	if (NULL == app->counter) {
		printf("shoud setup the counter with option -C\n");
		usage_show(argv[0]);
		goto fail_parse;
	}

	/* reset loop */
	app->loop_count /= app->thread_count;
	
	/* main thread + childen thread */ 
	barrier_setup(&(app->barrier_start), 1 + app->thread_count);  

	return 0;
fail_parse:
	return -1;
}

int app_setup_simpler_counter(struct app *app, const char *counter_parameter_string) {
	app->counter_destroy = simple_counter_destroy;
	app->counter_update = simple_counter_update;
	app->counter_get = simple_counter_get;
	app->counter = simple_counter_create();
	if (NULL == app->counter) {
		return -1;
	}
	return 0;
}
int app_setup_scalable_counter(struct app *app, const char *counter_parameter_string) {
	if (NULL == counter_parameter_string) {
		return -1;
	}
	int update_threshold = atoi(counter_parameter_string);
	if (update_threshold < 0) {
		return -1;
	}
	
	app->counter_destroy = scalable_counter_destroy;
	app->counter_update = scalable_counter_update;
	app->counter_get = scalable_counter_get;
	app->counter = scalable_counter_create(update_threshold);
	if (NULL == app->counter) {
		return -1;
	}
	return 0;
}

int app_parse_counter(struct app *app, const char *argument_string) {
	char *dup_tokens; 
	char *next_tokens;
	char *counter_type;
	char *counter_parameter;
	dup_tokens = strdup(argument_string);
	if (NULL == dup_tokens) {
		printf("fail to duplicate argrment string.\n");
		return -1;
	}	

	counter_type = strtok_r(dup_tokens, ":", &next_tokens);
	if (NULL == counter_type) {
		goto fail_empty_counter_type;
	}
	counter_parameter = strtok_r(NULL, ":", &next_tokens);

	if (0 == strcmp("simple", counter_type)) {
		if (0 != app_setup_simpler_counter(app, NULL)) {
			goto fail_create_counter;
		}
	} 
	else if (0 == strcmp("scalable", counter_type)) {
		if (0 != app_setup_scalable_counter(app, counter_parameter)) {
			goto fail_create_counter;
		}
	}
	else {
		goto fail_create_counter;
	}

	__free(dup_tokens);
	return 0; 

fail_create_counter:
fail_empty_counter_type:
	__free(dup_tokens);
	return -1;
}


int app_parse_loop_count(struct app *app, const char *argument_string) {
	int value = atoi(argument_string);
	if (value <= 0) {
		printf("loop count should great than zero.\n");
		return -1;
	}
	app->loop_count = value;
	return 0;
}

#define ANY_CPU -1
int app_parse_thread_count(struct app *app, const char *argument_string) {
	int value = atoi(argument_string);
	if (value <= 0) {
		printf("thread count should great than zero.\n");
		return -1;
	}

	app->threads = __malloc(sizeof(struct thread_info) * value);
	if (NULL == app->threads) {
		printf("fail to alloc memory for thread info.\n");
		goto fail_alloc_thread_id;
	}
	
	int i;
	for (i = 0; i < value; ++i) {
		app->threads[i].index = i;
		app->threads[i].tid = 0;
		CPU_ZERO(&(app->threads[i].cpu_set));
	}


	app->thread_count = value;
	
	return 0;
fail_alloc_thread_id:
	return -1;
	
}

int cpu_set_setup_by_index(cpu_set_t *cpu_set, int cpu_index) 
{
	CPU_ZERO(cpu_set);
	if (cpu_index < 0) {
		return -1;
	}
	CPU_SET(cpu_index, cpu_set);
	return 0;
}

int cpu_set_setup_by_bitmap(cpu_set_t *cpu_set, const char *bitmap_string) 
{
#define CPU_SET_FOR_OFFSET(offset, digit, weight, cpu_set) ({ \
	if ((1U << (offset)) & (digit)) { \
		CPU_SET((offset) + (weight), (cpu_set)); \
	} \
})

	CPU_ZERO(cpu_set);
	if ((0 == strncmp("0x", bitmap_string, 2)) 
	||  (0 == strncmp("0X", bitmap_string, 2))) {
		bitmap_string = bitmap_string + 2;
	}
	size_t digit_count = strlen(bitmap_string);
	int digit;
	size_t i;
	unsigned int weight = (digit_count-1) * 4;
	for (i = 0; i < digit_count; ++i, weight -= 4) {
		digit = tolower(bitmap_string[i]);
		if (isxdigit(digit)) {
			if (isdigit(digit)) {
				/* 0-9 */
				digit = digit - '0';
			}
			else {
				/* a-f */
				digit = digit - 'a' + 10; 
			}
			CPU_SET_FOR_OFFSET(0, digit, weight, cpu_set);
			CPU_SET_FOR_OFFSET(1, digit, weight, cpu_set);
			CPU_SET_FOR_OFFSET(2, digit, weight, cpu_set);
			CPU_SET_FOR_OFFSET(3, digit, weight, cpu_set);
		}
		else {
			goto fail_not_xdigit;
		}
	}
	return 0;

	/* error handle */
fail_not_xdigit:
	CPU_ZERO(cpu_set);
	return -1;
#undef CPU_SET_FOR_OFFSET
}

int cpu_set_to_string(char *buffer, int len, cpu_set_t *cpu_set) {
	int section;
	int bit;
	int cpu;
	int byte_writed = 0;
	unsigned char section_bits = 0;
	int is_leading_printed = 0;
	byte_writed += snprintf(buffer + byte_writed, len - byte_writed, "0x");
	cpu = 8 * sizeof(*cpu_set) - 1;

	for (section = sizeof(*cpu_set) - 1; section >= 0; --section) {
		section_bits = 0;
		for (bit = 7; bit >= 0; --bit) {
			if (CPU_ISSET(cpu, cpu_set)) {
				section_bits |= (1 << bit);
			}
			--cpu;
		}
		
		if (is_leading_printed) {
			if (sizeof(*cpu_set) - 1 == section) {
				/* the first byte */
				byte_writed += snprintf(buffer + byte_writed, len - byte_writed, "%x", section_bits);
			}
			else {
				byte_writed += snprintf(buffer + byte_writed, len - byte_writed, "%02x", section_bits);
			}
		}
		else {
			if ((0 != section_bits)
			||	(0 == section)) {
				byte_writed += snprintf(buffer + byte_writed, len - byte_writed, "%x", section_bits);
				is_leading_printed = 1;
			}
		}
	} 
	return 0;
}

int app_parse_thread_cpu(struct app *app, const char *argument_string) {
	char *token;
	char *next_tokens;
	char *dup_tokens;
	int value;
	int index = 0;
	if (NULL == app->threads) {
		printf("should set up thread count (-t) before thread cpu (-c).\n");
		return -1;
	}
	dup_tokens = strdup(argument_string);
	if (NULL == dup_tokens) {
		printf("fail to duplicate argrment string.\n");
		return -1;

	}
	token = strtok_r(dup_tokens, ",", &next_tokens);
	while(token) {
		if (index >= app->thread_count) {
			break;
		}
		if ('\0' == token[0]		/* is ommit */
		|| !isdigit(token[0])) {	/* is not start with digit */
			/* toke start with '-' */ 
			CPU_ZERO(&(app->threads[index].cpu_set));
		}
		else if ((0 == strncmp("0x", token, 2)) 
			 ||  (0 == strncmp("0X", token, 2))) {
			// cpu bitmap in hex
			cpu_set_setup_by_bitmap(&(app->threads[index].cpu_set), token);
		} 
		else {
			// cpu index in dec
			value = atoi(token);
			cpu_set_setup_by_index(&(app->threads[index].cpu_set), value);	
		}

		token = strtok_r(NULL, ",", &next_tokens);
		++index;
	}
	__free(dup_tokens);
	return 0;
}


void usage_show(const char *program_name) {
	printf("usage: \n");
	printf("  -C simple | scalable:<threshold>\n");
	printf("  -l <loop_count>\n");
	printf("  -t <thread_count>\n");
	printf("  -c <thread_cpu_list>\n");
	printf("     <thread_cpu_list>=<cpu_set_for_thread_0>:<cpu_set_for_thread_1>:...\n");
	printf("     <cpu_set_for_thread_*> is hexadecimal, is a bitmap of cpu, etc: 0x5, the 0th and 2th cpu is selected\n");
	printf("     <cpu_set_for_thread_*> is decimal, is a index of cpu, etc: 7, the 7th cpu is selected.\n");
	printf("     <cpu_set_for_thread_*> is negative, etc: -1, none of cpu is selected.\n");
}

/* ----------------------------- */
int timestamp_now(struct timeval *timestamp) {
	return gettimeofday(timestamp, NULL);
}

int timestamp_delta(struct timeval *delta, struct timeval *time_stop, struct timeval *time_start) {
	if (time_stop->tv_usec >= time_start->tv_usec) {
		delta->tv_usec = time_stop->tv_usec - time_start->tv_usec;
		delta->tv_sec = time_stop->tv_sec - time_start->tv_sec; 	
	}
	else {
		delta->tv_usec = 1e6 + time_stop->tv_usec - time_start->tv_usec;
		delta->tv_sec = time_stop->tv_sec - time_start->tv_sec - 1; 	
	}
	return 0;
}

int timestamp_dump(struct timeval *time) {
	printf("time: %d.%06d\n", time->tv_sec, time->tv_usec);
}

/* ----------------------------- */
extern struct app app;

void *test_routine(void *arg) {
	struct thread_info *thread_info= (struct thread_info *)arg;
	int i;

	thread_set_affinity(pthread_self(), &thread_info->cpu_set);	
	
	/* child wait for main thread to resume */
	barrier_wait(&(app.barrier_start));

	for (i = 0; i < app.loop_count; ++i) {
		(app.counter_update)(app.counter, 1);
	}
	pthread_exit(NULL);
}

int app_run(struct app *app) {
	int i; 
	/* create children */
	for (i = 0; i < app->thread_count; ++i) {
		thread_create(&(app->threads[i].tid), test_routine, &(app->threads[i]));
	}
	
	/* main fire the signal to all childen to resume */
	usleep(1000);
	timestamp_now(&(app->time_start));
	barrier_wait(&(app->barrier_start));

	/* wait for all child is done */
	for (i = 0; i < app->thread_count; ++i) {
		thread_join(app->threads[i].tid, NULL);
	}	
	timestamp_now(&(app->time_stop));
	
	/* print out test result */
	/* printf("counter= %d\n", (app->counter_get)(app->counter)); */
	struct timeval time_delta;
	timestamp_delta(&time_delta, &(app->time_stop), &(app->time_start));
	timestamp_dump(&time_delta);
	return 0;
}


/* ----------------------------- */
struct app app;
int main(int argc, char **argv) {
	int error_code = 0;

       /* int getopt(int argc, char * const argv[], const char *optstring); */
       /* extern char *optarg; */
       /* extern int optind, opterr, optopt; */
	app_setup(&app);
	if (0 != app_parse(&app, argc, argv)) {
		goto fail_parse;
	}

	/* app_dump(&app); */
	app_run(&app);
	
	app_cleanup(&app);
	return error_code;

	/* error handle */
fail_parse:
	app_cleanup(&app);
	return error_code;
}

Makefile 代碼內容:

all:
	gcc -g main.c -lpthread

clean:
	rm a.out -f
	rm *.o -f

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章