diff options
-rw-r--r-- | Makefile | 10 | ||||
-rw-r--r-- | color.c | 36 | ||||
-rw-r--r-- | color.h | 4 | ||||
-rw-r--r-- | nebula2.c | 212 | ||||
-rw-r--r-- | render.c | 278 |
5 files changed, 515 insertions, 25 deletions
@@ -1,15 +1,19 @@ CC=gcc CFLAGS=-Wall -Werror -pedantic LIBS=-lpthread +# (Optimization) flags as suggested by SFMT docu. +SFMTFLAGS=-DHAVE_SSE2 -DSFMT_MEXP=19937 +OPTIMIZE=-O3 -fno-strict-aliasing +#OPTIMIZE=-ggdb -nebula2: nebula2.o config.o render.o statefile.o mutex_helpers.o iniparser/libiniparser.a - $(CC) $(CFLAGS) $(LIBS) -o nebula2 nebula2.o config.o render.o statefile.o mutex_helpers.o iniparser/libiniparser.a +nebula2: nebula2.o config.o render.o statefile.o color.o mutex_helpers.o iniparser/libiniparser.a SFMT/SFMT.c + $(CC) $(CFLAGS) $(OPTIMIZE) $(SFMTFLAGS) $(LIBS) -o nebula2 nebula2.o config.o render.o statefile.o color.o mutex_helpers.o iniparser/libiniparser.a SFMT/SFMT.c iniparser/libiniparser.a: make -C iniparser libiniparser.a %.o:%.c - $(CC) $(CFLAGS) -c -o $@ $< + $(CC) $(CFLAGS) $(OPTIMIZE) $(SFMTFLAGS) -c -o $@ $< clean: rm -f *.o @@ -0,0 +1,36 @@ +#include "color.h" + +static int +fix_range(int x, int min, int max) { + if(x < min) { + return min; + } + if(x > max) { + return max; + } + return x; +} + +color_t +color_fix(color_t col) { + col.r = fix_range(col.r, 0, 255); + col.g = fix_range(col.g, 0, 255); + col.b = fix_range(col.b, 0, 255); + return col; +} + +color_t +color_add(color_t a, color_t b) { + a.r += b.r; + a.g += b.g; + a.b += b.b; + return a; +} + +color_t +color_mul(color_t col, double s) { + col.r = ((double) col.r) * s; + col.g = ((double) col.g) * s; + col.b = ((double) col.b) * s; + return col; +} @@ -5,4 +5,8 @@ typedef struct { int r, g, b; } color_t; +extern color_t color_fix(color_t col); +extern color_t color_add(color_t a, color_t b); +extern color_t color_mul(color_t col, double s); + #endif
\ No newline at end of file @@ -11,6 +11,35 @@ #include "render.h" #include "mutex_helpers.h" +#include "SFMT/SFMT.h" + +/* Bailout is currently hard coded... perhaps we should move this to the config file? */ +#define BAILOUT 8 + +inline static long +fast_floor(double input) { + return (long) input - (input > 0 ? 0 : 1); +} + +inline static void +calc_mandelbrot(double cx, double cy, double* zx, double* zy) { + double ty; + ty = (*zy) * (*zy) - (*zx) * (*zx) + cy; + *zx = 2.0 * (*zy) * (*zx) + cx; + *zy = ty; +} + +void +precalc_nebula_params(config_t* conf, double* conv, double* mult_x, double* mult_y, int* hw, int* hh) { + *conv = ((conf->width < conf->height) ? conf->width : conf->height) / 4.0; + + *mult_x = (double) (0.8 * conf->width / *conv) / (UINT32_MAX / 2.0); + *mult_y = (double) (0.8 * conf->height / *conv) / (UINT32_MAX / 2.0); + + *hw = conf->width / 2; + *hh = conf->height / 2; +} + void usage(void) { fputs("nebula2 needs the name of a config file as 1st argument.\n", stderr); @@ -101,12 +130,20 @@ nebula_data_destroy(nebula_data_t* nd) { free(nd); } +typedef struct { + int x, y; +} pos_t; + /* Data of a single worker */ typedef struct { int id; pthread_mutex_t* mu; int ok; + pos_t* pointlist; + + sfmt_t* sfmt_state; + config_t* conf; nebula_data_t* nd; @@ -114,11 +151,49 @@ typedef struct { int thread_started; } worker_data_t; +inline static pos_t +calc_pos(double zx, double zy, size_t width, size_t height, double conv, int hw, int hh) { + pos_t rv; + rv.x = fast_floor(zx * conv) + hw; + rv.y = fast_floor(zy * conv) + hh; + + if((rv.x < 0) || (rv.y < 0) || (rv.x >= width) || (rv.y >= height)) { + rv.x = -1; + } + + return rv; +} + /* The background worker */ void* worker(void* _wd) { - worker_data_t* wd = _wd; - nebula_data_t* nd = wd->nd; + /* Precalculated data (scaling factors etc.) */ + double conv, mult_x, mult_y; + int hw, hh; + + /* Mandelbrot point vars */ + uint64_t xy; + double cx, cy, zx, zy; + + /* Misc... */ + int todo; + int iter, maxiter, mii; + size_t off, mapsize; + pos_t pos; + + /* Aliases */ + worker_data_t* wd = _wd; + nebula_data_t* nd = wd->nd; + config_t* conf = wd->conf; + sfmt_t* sfmt_state = wd->sfmt_state; + pos_t* pointlist; + uint32_t* map = nd->map; + size_t width = conf->width; + size_t height = conf->height; + + precalc_nebula_params(conf, &conv, &mult_x, &mult_y, &hw, &hh); + mapsize = width * height; + maxiter = conf->iters[conf->iters_n - 1]; for(;; ) { jobrq_set(nd, wd->id); @@ -126,10 +201,71 @@ worker(void* _wd) { if(!wd->ok) { return NULL; } - /* TODO: Do magic... */ + + for(todo = conf->jobsize; todo--; ) { + xy = sfmt_genrand_uint64(sfmt_state); + cx = ((int32_t) (xy >> 32)) * mult_x; + cy = ((int32_t) (xy & UINT32_MAX)) * mult_y; + zx = zy = .0; + + pointlist = wd->pointlist; + for(iter = 0; iter < maxiter; iter++) { + calc_mandelbrot(cx, cy, &zx, &zy); + *(pointlist++) = calc_pos(zx, zy, width, height, conv, hw, hh); + if((zx * zx) + (zy * zy) > BAILOUT) { + for(mii = 0; iter > conf->iters[mii]; mii++) {} + off = mii * mapsize; + do { + /* + * To be 100% accurate, we would need to synchronize the access to the map here. + * We ignore this, since collision should be seldom. + */ + pos = *(--pointlist); + if(pos.x < 0) { + continue; + } + map[off + width * pos.y + pos.x]++; + } while(iter-- > 0); + break; + } + } + } } } +sfmt_t* +init_sfmt(void) { + sfmt_t* sfmt_state = NULL; + uint32_t seed; + FILE* fh = NULL; + + if(!(sfmt_state = malloc(sizeof(sfmt_t)))) { + goto failed; + } + + if(!(fh = fopen("/dev/urandom", "rb"))) { + goto failed; + } + + if(fread(&seed, sizeof(uint32_t), 1, fh) != 1) { + goto failed; + } + + fclose(fh); + + sfmt_init_gen_rand(sfmt_state, seed); + + return sfmt_state; +failed: + if(sfmt_state) { + free(sfmt_state); + } + if(fh) { + fclose(fh); + } + return NULL; +} + /* Init and run a worker */ int worker_init(worker_data_t* wd, int id, config_t* conf, nebula_data_t* nd) { @@ -139,17 +275,40 @@ worker_init(worker_data_t* wd, int id, config_t* conf, nebula_data_t* nd) { wd->nd = nd; wd->thread_started = 0; + wd->mu = NULL; + wd->pointlist = NULL; + wd->sfmt_state = NULL; + + if(!(wd->sfmt_state = init_sfmt())) { + goto failed; + } + if(!(wd->mu = mutex_create())) { - return 0; + goto failed; + } + + if(!(wd->pointlist = malloc(sizeof(pos_t) * conf->iters[conf->iters_n - 1]))) { + goto failed; } if(pthread_create(&(wd->thread), NULL, worker, wd) != 0) { - mutex_destroy(wd->mu); - return 0; + goto failed; } wd->thread_started = 1; return 1; + +failed: + if(wd->mu) { + mutex_destroy(wd->mu); + } + if(wd->pointlist) { + free(wd->pointlist); + } + if(wd->sfmt_state) { + free(wd->sfmt_state); + } + return 0; } void @@ -157,6 +316,12 @@ worker_cleanup(worker_data_t* wd) { if(wd->thread_started) { pthread_join(wd->thread, NULL); } + if(wd->pointlist) { + free(wd->pointlist); + } + if(wd->sfmt_state) { + free(wd->sfmt_state); + } mutex_destroy(wd->mu); } @@ -191,12 +356,26 @@ setup_sighandler(void) { return 1; } +void +stop_workers(nebula_data_t* nd, worker_data_t* workers, int* workers_alive) { + int rq; + + while(*workers_alive > 0) { + rq = jobrq_get(nd); + if(rq >= 0) { + workers[rq].ok = 0; + pthread_mutex_unlock(workers[rq].mu); + (*workers_alive)--; + } + } +} + int nebula2(config_t* conf) { int rv = 1; nebula_data_t* nd = NULL; uint32_t jobs_done; - worker_data_t* workers; + worker_data_t* workers = NULL; int i; int rq; int workers_alive = 0; @@ -212,11 +391,11 @@ nebula2(config_t* conf) { } nd->jobs_todo = conf->jobs - jobs_done; - if(!(workers = calloc(conf->procn, sizeof(worker_data_t)))) { + if(!(workers = calloc(conf->threads, sizeof(worker_data_t)))) { fputs("Could not allocate memory for worker data.\n", stderr); goto tidyup; } - for(i = 0; i < conf->procn; i++) { + for(i = 0; i < conf->threads; i++) { if(!(worker_init(&(workers[i]), i, conf, nd))) { fputs("Could not init worker.\n", stderr); goto tidyup; @@ -229,7 +408,6 @@ nebula2(config_t* conf) { goto tidyup; } - /* We need to manually unlock the set mutex once, so the first worker is allowed to set jobrq. */ while(nd->jobs_todo > 0) { rq = jobrq_get(nd); if(rq < 0) { @@ -240,30 +418,24 @@ nebula2(config_t* conf) { (nd->jobs_todo)--; pthread_mutex_unlock(workers[rq].mu); } + stop_workers(nd, workers, &workers_alive); if(!(state_save(conf, nd->map, conf->jobs - nd->jobs_todo))) { fprintf(stderr, "Error while saving state: %s\n", strerror(errno)); goto tidyup; } - + rv = render(conf, nd->map) ? 0 : 1; tidyup: - while(workers_alive > 0) { - rq = jobrq_get(nd); - if(rq >= 0) { - workers[rq].ok = 0; - pthread_mutex_unlock(workers[rq].mu); - workers_alive--; - } - } + stop_workers(nd, workers, &workers_alive); if(nd) { nebula_data_destroy(nd); } if(workers) { - for(i = 0; i < conf->procn; i++) { + for(i = 0; i < conf->threads; i++) { worker_cleanup(&(workers[i])); } free(workers); @@ -1,12 +1,286 @@ #include <stdio.h> #include <stdlib.h> #include <stdint.h> +#include <errno.h> +#include <string.h> #include "config.h" #include "color.h" +/* Adding submap i to all submaps > i to reconstruct the result of single buddhabrot calculations. */ +static void +add_submaps(config_t* conf, uint32_t* map) { + int i, j; + size_t k, mapsize, off_i, off_j; + + mapsize = conf->width * conf->height; + + for(i = conf->iters_n - 1; i <= 0; i--) { + off_i = mapsize * i; + for(j = 0; j < i; j++) { + off_j = mapsize * j; + for(k = 0; k < mapsize; k++) { + map[off_i + k] += map[off_j + k]; + } + } + } +} + +#define BYTES_PER_PIXEL 3 +#define OFFSET_bfSize 2 +#define OFFSET_biWidth 18 +#define OFFSET_biHeight 22 +#define HEADERSIZE 54 + +static const char* header_template = "BM \0\0\0\0\x36\0\0\0\x28\0\0\0 \x01\0\x18\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + +static int +bmp_write_header(FILE* fh, int32_t width, int32_t height) { + char* header; + uint32_t filesize; + + if(!(header = malloc(HEADERSIZE))) { + fputs("Could not allocate memory for BMP header.\n", stderr); + return 0; + } + + filesize = HEADERSIZE + (width * height * BYTES_PER_PIXEL); + height *= -1; + + memcpy(header, header_template, HEADERSIZE); + memcpy(header + OFFSET_bfSize, &filesize, 4); + memcpy(header + OFFSET_biWidth, &width, 4); + memcpy(header + OFFSET_biHeight, &height, 4); + + if(fwrite(header, HEADERSIZE, 1, fh) != 1) { + fprintf(stderr, "Could not write BMP header: %s\n", strerror(errno)); + free(header); + return 0; + } + + free(header); + return 1; +} + +static int +bmp_write_pixel(FILE* fh, color_t col) { + uint8_t pixel[3]; + pixel[0] = col.b; + pixel[1] = col.g; + pixel[2] = col.r; + + return (fwrite(pixel, 3, 1, fh) == 1); +} + +typedef struct lookup_elem_t { + uint32_t val; + struct lookup_elem_t* prev; + struct lookup_elem_t* next; +} lookup_elem_t; + +static lookup_elem_t* +new_lookup_elem(uint32_t val, lookup_elem_t* prev, lookup_elem_t* next) { + lookup_elem_t* rv = malloc(sizeof(lookup_elem_t)); + if(!rv) { + return NULL; + } + + rv->val = val; + rv->prev = prev; + rv->next = next; + + return rv; +} + +static int +maybe_insert(uint32_t val, lookup_elem_t* a, lookup_elem_t* b) { + lookup_elem_t* new; + + if(!(new = new_lookup_elem(val, a, b))) { + return 0; + } + + if(!a) { + b->prev = new; + return 1; + } + if(!b) { + a->next = new; + return 1; + } + + if((a->val < val) && (val < b->val)) { + a->next = new; + b->prev = new; + } else { + free(new); + } + return 1; +} + +static int +build_lookup(uint32_t* map, size_t mapsize, lookup_elem_t** e) { + size_t i; + uint32_t val; + + for(i = 0; i < mapsize; i++) { + val = map[i]; + + if(!*e) { + if(!(*e = new_lookup_elem(val, NULL, NULL))) { + return 0; + } + continue; + } + + for(;;) { + if((*e)->val == val) { + break; + } + + if(val > (*e)->val) { + if(!maybe_insert(val, *e, (*e)->next)) { + return 0; + } + *e = (*e)->next; + } else { + if(!maybe_insert(val, (*e)->prev, *e)) { + return 0; + } + *e = (*e)->prev; + } + } + } + + while((*e)->prev) { + *e = (*e)->prev; + } + + return 1; +} + +static size_t +lookup_len(lookup_elem_t* e) { + size_t len; + + if(!e) { + return 0; + } + + while(e->prev) { + e = e->prev; + } + + len = 0; + while(e) { + len++; + e = e->next; + } + return len; +} + +static void +lookup_destroy(lookup_elem_t* e) { + lookup_elem_t* next; + while(e->prev) { + e = e->prev; + } + while(e) { + next = e->next; + free(e); + e = next; + } +} + +static size_t +lookup(uint32_t val, lookup_elem_t** e, size_t* pos) { + while((*e)->val != val) { + if(val < (*e)->val) { + *e = (*e)->prev; + (*pos)--; + } else { + *e = (*e)->next; + (*pos)++; + } + } + return *pos; +} + int render(config_t* conf, uint32_t* map) { - /* TODO */ - return 1; + int rv = 0; + size_t i, j; + color_t col; + double factor; + FILE* fh = NULL; + size_t mapsize = conf->width * conf->height; + + lookup_elem_t** lookups = NULL; + size_t* poss = NULL; + size_t* lens = NULL; + + lookups = calloc(sizeof(lookup_elem_t*), conf->iters_n); + poss = calloc(sizeof(size_t), conf->iters_n); + lens = calloc(sizeof(size_t), conf->iters_n); + if((!lookups) || (!poss) || (!lens)) { + fputs("Could not allocate memory for lookup tables.\n", stderr); + goto tidyup; + } + + if(!(fh = fopen(conf->output, "wb"))) { + fprintf(stderr, "Could not open output file '%s': %s\n", conf->output, strerror(errno)); + goto tidyup; + } + + if(!bmp_write_header(fh, conf->width, conf->height)) { + goto tidyup; + } + + add_submaps(conf, map); + + for(i = 0; i < conf->iters_n; i++) { + if(!build_lookup(map+(mapsize*i), mapsize, &(lookups[i]))) { + fputs("Could not build lookup table.\n", stderr); + goto tidyup; + } + lens[i] = lookup_len(lookups[i]); + } + + for(i = 0; i < mapsize; i++) { + col.r = 0; + col.g = 0; + col.b = 0; + + for(j = 0; j < conf->iters_n; j++) { + factor = (double) lookup(map[i + mapsize * j], &(lookups[j]), &(poss[j])) / (double) lens[j]; + col = color_add(col, color_mul(conf->colors[j], factor)); + } + if(!bmp_write_pixel(fh, color_fix(col))) { + fputs("Could not write pixel data.", stderr); + goto tidyup; + } + } + + rv = 1; + +tidyup: + if(fh) { + fclose(fh); + } + if(lookups) { + for(i = 0; i < conf->iters_n; i++) { + if(lookups[i]) { + lookup_destroy(lookups[i]); + } + } + free(lookups); + } + if(poss) { + free(poss); + } + if(lens) { + free(lens); + } + + return rv; } |