summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile10
-rw-r--r--color.c36
-rw-r--r--color.h4
-rw-r--r--nebula2.c212
-rw-r--r--render.c278
5 files changed, 515 insertions, 25 deletions
diff --git a/Makefile b/Makefile
index 238d359..76a46b1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,15 +1,19 @@
CC=gcc
CFLAGS=-Wall -Werror -pedantic
LIBS=-lpthread
+# (Optimization) flags as suggested by SFMT docu.
+SFMTFLAGS=-DHAVE_SSE2 -DSFMT_MEXP=19937
+OPTIMIZE=-O3 -fno-strict-aliasing
+#OPTIMIZE=-ggdb
-nebula2: nebula2.o config.o render.o statefile.o mutex_helpers.o iniparser/libiniparser.a
- $(CC) $(CFLAGS) $(LIBS) -o nebula2 nebula2.o config.o render.o statefile.o mutex_helpers.o iniparser/libiniparser.a
+nebula2: nebula2.o config.o render.o statefile.o color.o mutex_helpers.o iniparser/libiniparser.a SFMT/SFMT.c
+ $(CC) $(CFLAGS) $(OPTIMIZE) $(SFMTFLAGS) $(LIBS) -o nebula2 nebula2.o config.o render.o statefile.o color.o mutex_helpers.o iniparser/libiniparser.a SFMT/SFMT.c
iniparser/libiniparser.a:
make -C iniparser libiniparser.a
%.o:%.c
- $(CC) $(CFLAGS) -c -o $@ $<
+ $(CC) $(CFLAGS) $(OPTIMIZE) $(SFMTFLAGS) -c -o $@ $<
clean:
rm -f *.o
diff --git a/color.c b/color.c
new file mode 100644
index 0000000..9a5c149
--- /dev/null
+++ b/color.c
@@ -0,0 +1,36 @@
+#include "color.h"
+
+static int
+fix_range(int x, int min, int max) {
+ if(x < min) {
+ return min;
+ }
+ if(x > max) {
+ return max;
+ }
+ return x;
+}
+
+color_t
+color_fix(color_t col) {
+ col.r = fix_range(col.r, 0, 255);
+ col.g = fix_range(col.g, 0, 255);
+ col.b = fix_range(col.b, 0, 255);
+ return col;
+}
+
+color_t
+color_add(color_t a, color_t b) {
+ a.r += b.r;
+ a.g += b.g;
+ a.b += b.b;
+ return a;
+}
+
+color_t
+color_mul(color_t col, double s) {
+ col.r = ((double) col.r) * s;
+ col.g = ((double) col.g) * s;
+ col.b = ((double) col.b) * s;
+ return col;
+}
diff --git a/color.h b/color.h
index badc6e7..61a5a56 100644
--- a/color.h
+++ b/color.h
@@ -5,4 +5,8 @@ typedef struct {
int r, g, b;
} color_t;
+extern color_t color_fix(color_t col);
+extern color_t color_add(color_t a, color_t b);
+extern color_t color_mul(color_t col, double s);
+
#endif \ No newline at end of file
diff --git a/nebula2.c b/nebula2.c
index 9f0ae19..1e0a948 100644
--- a/nebula2.c
+++ b/nebula2.c
@@ -11,6 +11,35 @@
#include "render.h"
#include "mutex_helpers.h"
+#include "SFMT/SFMT.h"
+
+/* Bailout is currently hard coded... perhaps we should move this to the config file? */
+#define BAILOUT 8
+
+inline static long
+fast_floor(double input) {
+ return (long) input - (input > 0 ? 0 : 1);
+}
+
+inline static void
+calc_mandelbrot(double cx, double cy, double* zx, double* zy) {
+ double ty;
+ ty = (*zy) * (*zy) - (*zx) * (*zx) + cy;
+ *zx = 2.0 * (*zy) * (*zx) + cx;
+ *zy = ty;
+}
+
+void
+precalc_nebula_params(config_t* conf, double* conv, double* mult_x, double* mult_y, int* hw, int* hh) {
+ *conv = ((conf->width < conf->height) ? conf->width : conf->height) / 4.0;
+
+ *mult_x = (double) (0.8 * conf->width / *conv) / (UINT32_MAX / 2.0);
+ *mult_y = (double) (0.8 * conf->height / *conv) / (UINT32_MAX / 2.0);
+
+ *hw = conf->width / 2;
+ *hh = conf->height / 2;
+}
+
void
usage(void) {
fputs("nebula2 needs the name of a config file as 1st argument.\n", stderr);
@@ -101,12 +130,20 @@ nebula_data_destroy(nebula_data_t* nd) {
free(nd);
}
+typedef struct {
+ int x, y;
+} pos_t;
+
/* Data of a single worker */
typedef struct {
int id;
pthread_mutex_t* mu;
int ok;
+ pos_t* pointlist;
+
+ sfmt_t* sfmt_state;
+
config_t* conf;
nebula_data_t* nd;
@@ -114,11 +151,49 @@ typedef struct {
int thread_started;
} worker_data_t;
+inline static pos_t
+calc_pos(double zx, double zy, size_t width, size_t height, double conv, int hw, int hh) {
+ pos_t rv;
+ rv.x = fast_floor(zx * conv) + hw;
+ rv.y = fast_floor(zy * conv) + hh;
+
+ if((rv.x < 0) || (rv.y < 0) || (rv.x >= width) || (rv.y >= height)) {
+ rv.x = -1;
+ }
+
+ return rv;
+}
+
/* The background worker */
void*
worker(void* _wd) {
- worker_data_t* wd = _wd;
- nebula_data_t* nd = wd->nd;
+ /* Precalculated data (scaling factors etc.) */
+ double conv, mult_x, mult_y;
+ int hw, hh;
+
+ /* Mandelbrot point vars */
+ uint64_t xy;
+ double cx, cy, zx, zy;
+
+ /* Misc... */
+ int todo;
+ int iter, maxiter, mii;
+ size_t off, mapsize;
+ pos_t pos;
+
+ /* Aliases */
+ worker_data_t* wd = _wd;
+ nebula_data_t* nd = wd->nd;
+ config_t* conf = wd->conf;
+ sfmt_t* sfmt_state = wd->sfmt_state;
+ pos_t* pointlist;
+ uint32_t* map = nd->map;
+ size_t width = conf->width;
+ size_t height = conf->height;
+
+ precalc_nebula_params(conf, &conv, &mult_x, &mult_y, &hw, &hh);
+ mapsize = width * height;
+ maxiter = conf->iters[conf->iters_n - 1];
for(;; ) {
jobrq_set(nd, wd->id);
@@ -126,10 +201,71 @@ worker(void* _wd) {
if(!wd->ok) {
return NULL;
}
- /* TODO: Do magic... */
+
+ for(todo = conf->jobsize; todo--; ) {
+ xy = sfmt_genrand_uint64(sfmt_state);
+ cx = ((int32_t) (xy >> 32)) * mult_x;
+ cy = ((int32_t) (xy & UINT32_MAX)) * mult_y;
+ zx = zy = .0;
+
+ pointlist = wd->pointlist;
+ for(iter = 0; iter < maxiter; iter++) {
+ calc_mandelbrot(cx, cy, &zx, &zy);
+ *(pointlist++) = calc_pos(zx, zy, width, height, conv, hw, hh);
+ if((zx * zx) + (zy * zy) > BAILOUT) {
+ for(mii = 0; iter > conf->iters[mii]; mii++) {}
+ off = mii * mapsize;
+ do {
+ /*
+ * To be 100% accurate, we would need to synchronize the access to the map here.
+ * We ignore this, since collision should be seldom.
+ */
+ pos = *(--pointlist);
+ if(pos.x < 0) {
+ continue;
+ }
+ map[off + width * pos.y + pos.x]++;
+ } while(iter-- > 0);
+ break;
+ }
+ }
+ }
}
}
+sfmt_t*
+init_sfmt(void) {
+ sfmt_t* sfmt_state = NULL;
+ uint32_t seed;
+ FILE* fh = NULL;
+
+ if(!(sfmt_state = malloc(sizeof(sfmt_t)))) {
+ goto failed;
+ }
+
+ if(!(fh = fopen("/dev/urandom", "rb"))) {
+ goto failed;
+ }
+
+ if(fread(&seed, sizeof(uint32_t), 1, fh) != 1) {
+ goto failed;
+ }
+
+ fclose(fh);
+
+ sfmt_init_gen_rand(sfmt_state, seed);
+
+ return sfmt_state;
+failed:
+ if(sfmt_state) {
+ free(sfmt_state);
+ }
+ if(fh) {
+ fclose(fh);
+ }
+ return NULL;
+}
+
/* Init and run a worker */
int
worker_init(worker_data_t* wd, int id, config_t* conf, nebula_data_t* nd) {
@@ -139,17 +275,40 @@ worker_init(worker_data_t* wd, int id, config_t* conf, nebula_data_t* nd) {
wd->nd = nd;
wd->thread_started = 0;
+ wd->mu = NULL;
+ wd->pointlist = NULL;
+ wd->sfmt_state = NULL;
+
+ if(!(wd->sfmt_state = init_sfmt())) {
+ goto failed;
+ }
+
if(!(wd->mu = mutex_create())) {
- return 0;
+ goto failed;
+ }
+
+ if(!(wd->pointlist = malloc(sizeof(pos_t) * conf->iters[conf->iters_n - 1]))) {
+ goto failed;
}
if(pthread_create(&(wd->thread), NULL, worker, wd) != 0) {
- mutex_destroy(wd->mu);
- return 0;
+ goto failed;
}
wd->thread_started = 1;
return 1;
+
+failed:
+ if(wd->mu) {
+ mutex_destroy(wd->mu);
+ }
+ if(wd->pointlist) {
+ free(wd->pointlist);
+ }
+ if(wd->sfmt_state) {
+ free(wd->sfmt_state);
+ }
+ return 0;
}
void
@@ -157,6 +316,12 @@ worker_cleanup(worker_data_t* wd) {
if(wd->thread_started) {
pthread_join(wd->thread, NULL);
}
+ if(wd->pointlist) {
+ free(wd->pointlist);
+ }
+ if(wd->sfmt_state) {
+ free(wd->sfmt_state);
+ }
mutex_destroy(wd->mu);
}
@@ -191,12 +356,26 @@ setup_sighandler(void) {
return 1;
}
+void
+stop_workers(nebula_data_t* nd, worker_data_t* workers, int* workers_alive) {
+ int rq;
+
+ while(*workers_alive > 0) {
+ rq = jobrq_get(nd);
+ if(rq >= 0) {
+ workers[rq].ok = 0;
+ pthread_mutex_unlock(workers[rq].mu);
+ (*workers_alive)--;
+ }
+ }
+}
+
int
nebula2(config_t* conf) {
int rv = 1;
nebula_data_t* nd = NULL;
uint32_t jobs_done;
- worker_data_t* workers;
+ worker_data_t* workers = NULL;
int i;
int rq;
int workers_alive = 0;
@@ -212,11 +391,11 @@ nebula2(config_t* conf) {
}
nd->jobs_todo = conf->jobs - jobs_done;
- if(!(workers = calloc(conf->procn, sizeof(worker_data_t)))) {
+ if(!(workers = calloc(conf->threads, sizeof(worker_data_t)))) {
fputs("Could not allocate memory for worker data.\n", stderr);
goto tidyup;
}
- for(i = 0; i < conf->procn; i++) {
+ for(i = 0; i < conf->threads; i++) {
if(!(worker_init(&(workers[i]), i, conf, nd))) {
fputs("Could not init worker.\n", stderr);
goto tidyup;
@@ -229,7 +408,6 @@ nebula2(config_t* conf) {
goto tidyup;
}
- /* We need to manually unlock the set mutex once, so the first worker is allowed to set jobrq. */
while(nd->jobs_todo > 0) {
rq = jobrq_get(nd);
if(rq < 0) {
@@ -240,30 +418,24 @@ nebula2(config_t* conf) {
(nd->jobs_todo)--;
pthread_mutex_unlock(workers[rq].mu);
}
+ stop_workers(nd, workers, &workers_alive);
if(!(state_save(conf, nd->map, conf->jobs - nd->jobs_todo))) {
fprintf(stderr, "Error while saving state: %s\n", strerror(errno));
goto tidyup;
}
-
+
rv = render(conf, nd->map) ? 0 : 1;
tidyup:
- while(workers_alive > 0) {
- rq = jobrq_get(nd);
- if(rq >= 0) {
- workers[rq].ok = 0;
- pthread_mutex_unlock(workers[rq].mu);
- workers_alive--;
- }
- }
+ stop_workers(nd, workers, &workers_alive);
if(nd) {
nebula_data_destroy(nd);
}
if(workers) {
- for(i = 0; i < conf->procn; i++) {
+ for(i = 0; i < conf->threads; i++) {
worker_cleanup(&(workers[i]));
}
free(workers);
diff --git a/render.c b/render.c
index b64aa25..5b3d8ca 100644
--- a/render.c
+++ b/render.c
@@ -1,12 +1,286 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
+#include <errno.h>
+#include <string.h>
#include "config.h"
#include "color.h"
+/* Adding submap i to all submaps > i to reconstruct the result of single buddhabrot calculations. */
+static void
+add_submaps(config_t* conf, uint32_t* map) {
+ int i, j;
+ size_t k, mapsize, off_i, off_j;
+
+ mapsize = conf->width * conf->height;
+
+ for(i = conf->iters_n - 1; i <= 0; i--) {
+ off_i = mapsize * i;
+ for(j = 0; j < i; j++) {
+ off_j = mapsize * j;
+ for(k = 0; k < mapsize; k++) {
+ map[off_i + k] += map[off_j + k];
+ }
+ }
+ }
+}
+
+#define BYTES_PER_PIXEL 3
+#define OFFSET_bfSize 2
+#define OFFSET_biWidth 18
+#define OFFSET_biHeight 22
+#define HEADERSIZE 54
+
+static const char* header_template = "BM \0\0\0\0\x36\0\0\0\x28\0\0\0 \x01\0\x18\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+
+static int
+bmp_write_header(FILE* fh, int32_t width, int32_t height) {
+ char* header;
+ uint32_t filesize;
+
+ if(!(header = malloc(HEADERSIZE))) {
+ fputs("Could not allocate memory for BMP header.\n", stderr);
+ return 0;
+ }
+
+ filesize = HEADERSIZE + (width * height * BYTES_PER_PIXEL);
+ height *= -1;
+
+ memcpy(header, header_template, HEADERSIZE);
+ memcpy(header + OFFSET_bfSize, &filesize, 4);
+ memcpy(header + OFFSET_biWidth, &width, 4);
+ memcpy(header + OFFSET_biHeight, &height, 4);
+
+ if(fwrite(header, HEADERSIZE, 1, fh) != 1) {
+ fprintf(stderr, "Could not write BMP header: %s\n", strerror(errno));
+ free(header);
+ return 0;
+ }
+
+ free(header);
+ return 1;
+}
+
+static int
+bmp_write_pixel(FILE* fh, color_t col) {
+ uint8_t pixel[3];
+ pixel[0] = col.b;
+ pixel[1] = col.g;
+ pixel[2] = col.r;
+
+ return (fwrite(pixel, 3, 1, fh) == 1);
+}
+
+typedef struct lookup_elem_t {
+ uint32_t val;
+ struct lookup_elem_t* prev;
+ struct lookup_elem_t* next;
+} lookup_elem_t;
+
+static lookup_elem_t*
+new_lookup_elem(uint32_t val, lookup_elem_t* prev, lookup_elem_t* next) {
+ lookup_elem_t* rv = malloc(sizeof(lookup_elem_t));
+ if(!rv) {
+ return NULL;
+ }
+
+ rv->val = val;
+ rv->prev = prev;
+ rv->next = next;
+
+ return rv;
+}
+
+static int
+maybe_insert(uint32_t val, lookup_elem_t* a, lookup_elem_t* b) {
+ lookup_elem_t* new;
+
+ if(!(new = new_lookup_elem(val, a, b))) {
+ return 0;
+ }
+
+ if(!a) {
+ b->prev = new;
+ return 1;
+ }
+ if(!b) {
+ a->next = new;
+ return 1;
+ }
+
+ if((a->val < val) && (val < b->val)) {
+ a->next = new;
+ b->prev = new;
+ } else {
+ free(new);
+ }
+ return 1;
+}
+
+static int
+build_lookup(uint32_t* map, size_t mapsize, lookup_elem_t** e) {
+ size_t i;
+ uint32_t val;
+
+ for(i = 0; i < mapsize; i++) {
+ val = map[i];
+
+ if(!*e) {
+ if(!(*e = new_lookup_elem(val, NULL, NULL))) {
+ return 0;
+ }
+ continue;
+ }
+
+ for(;;) {
+ if((*e)->val == val) {
+ break;
+ }
+
+ if(val > (*e)->val) {
+ if(!maybe_insert(val, *e, (*e)->next)) {
+ return 0;
+ }
+ *e = (*e)->next;
+ } else {
+ if(!maybe_insert(val, (*e)->prev, *e)) {
+ return 0;
+ }
+ *e = (*e)->prev;
+ }
+ }
+ }
+
+ while((*e)->prev) {
+ *e = (*e)->prev;
+ }
+
+ return 1;
+}
+
+static size_t
+lookup_len(lookup_elem_t* e) {
+ size_t len;
+
+ if(!e) {
+ return 0;
+ }
+
+ while(e->prev) {
+ e = e->prev;
+ }
+
+ len = 0;
+ while(e) {
+ len++;
+ e = e->next;
+ }
+ return len;
+}
+
+static void
+lookup_destroy(lookup_elem_t* e) {
+ lookup_elem_t* next;
+ while(e->prev) {
+ e = e->prev;
+ }
+ while(e) {
+ next = e->next;
+ free(e);
+ e = next;
+ }
+}
+
+static size_t
+lookup(uint32_t val, lookup_elem_t** e, size_t* pos) {
+ while((*e)->val != val) {
+ if(val < (*e)->val) {
+ *e = (*e)->prev;
+ (*pos)--;
+ } else {
+ *e = (*e)->next;
+ (*pos)++;
+ }
+ }
+ return *pos;
+}
+
int
render(config_t* conf, uint32_t* map) {
- /* TODO */
- return 1;
+ int rv = 0;
+ size_t i, j;
+ color_t col;
+ double factor;
+ FILE* fh = NULL;
+ size_t mapsize = conf->width * conf->height;
+
+ lookup_elem_t** lookups = NULL;
+ size_t* poss = NULL;
+ size_t* lens = NULL;
+
+ lookups = calloc(sizeof(lookup_elem_t*), conf->iters_n);
+ poss = calloc(sizeof(size_t), conf->iters_n);
+ lens = calloc(sizeof(size_t), conf->iters_n);
+ if((!lookups) || (!poss) || (!lens)) {
+ fputs("Could not allocate memory for lookup tables.\n", stderr);
+ goto tidyup;
+ }
+
+ if(!(fh = fopen(conf->output, "wb"))) {
+ fprintf(stderr, "Could not open output file '%s': %s\n", conf->output, strerror(errno));
+ goto tidyup;
+ }
+
+ if(!bmp_write_header(fh, conf->width, conf->height)) {
+ goto tidyup;
+ }
+
+ add_submaps(conf, map);
+
+ for(i = 0; i < conf->iters_n; i++) {
+ if(!build_lookup(map+(mapsize*i), mapsize, &(lookups[i]))) {
+ fputs("Could not build lookup table.\n", stderr);
+ goto tidyup;
+ }
+ lens[i] = lookup_len(lookups[i]);
+ }
+
+ for(i = 0; i < mapsize; i++) {
+ col.r = 0;
+ col.g = 0;
+ col.b = 0;
+
+ for(j = 0; j < conf->iters_n; j++) {
+ factor = (double) lookup(map[i + mapsize * j], &(lookups[j]), &(poss[j])) / (double) lens[j];
+ col = color_add(col, color_mul(conf->colors[j], factor));
+ }
+ if(!bmp_write_pixel(fh, color_fix(col))) {
+ fputs("Could not write pixel data.", stderr);
+ goto tidyup;
+ }
+ }
+
+ rv = 1;
+
+tidyup:
+ if(fh) {
+ fclose(fh);
+ }
+ if(lookups) {
+ for(i = 0; i < conf->iters_n; i++) {
+ if(lookups[i]) {
+ lookup_destroy(lookups[i]);
+ }
+ }
+ free(lookups);
+ }
+ if(poss) {
+ free(poss);
+ }
+ if(lens) {
+ free(lens);
+ }
+
+ return rv;
}