aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles <sircharlesaze@gmail.com>2020-05-22 17:42:39 +0200
committerCharles <sircharlesaze@gmail.com>2020-05-22 17:42:39 +0200
commit9ab34fff22bb2d6ebedefc702f7ec6c55937e175 (patch)
tree311aae03ddf5ac62cb1c0b678eda954a7fa5f15a
parent6d4c3864e3b742a92e9874f1e4dfe3a9c1565188 (diff)
downloadmandelbrot_cpu-master.tar.gz
mandelbrot_cpu-master.tar.bz2
mandelbrot_cpu-master.zip
AVX draftHEADmaster
-rw-r--r--Makefile2
-rw-r--r--inc/mandel.h5
-rw-r--r--src/mandelbrot.c51
-rw-r--r--src/state.c26
4 files changed, 60 insertions, 24 deletions
diff --git a/Makefile b/Makefile
index 5ee5d60..27c551e 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ OBJ_DIR = obj
CC = gcc
OFLAG = -Ofast -funroll-loops -ffast-math
-CCFLAGS = -I$(INC_DIR) -Wall -Wextra $(OFLAG)
+CCFLAGS = -I$(INC_DIR) -Wall -Wextra $(OFLAG) -mavx
LDFLAGS = -lm -lpthread \
$(shell pkg-config --libs --cflags sdl2 SDL2_ttf)
diff --git a/inc/mandel.h b/inc/mandel.h
index 80cb5f4..d5c8d9b 100644
--- a/inc/mandel.h
+++ b/inc/mandel.h
@@ -1,6 +1,10 @@
#ifndef MANDEL_H
# define MANDEL_H
+# ifndef __AVX__
+# error "AVX not supported"
+# endif
+
# include <stdlib.h>
# include <stdbool.h>
# include <math.h>
@@ -78,6 +82,7 @@ typedef struct
// mandelbrot.c
int mandelbrot(double ca, double cb, int iterations);
+void mandelbrot_avx(State *state, Color *pixels, int width, int height);
// state.c
bool state_init(State *state);
diff --git a/src/mandelbrot.c b/src/mandelbrot.c
index 3c8ab98..3f47d1b 100644
--- a/src/mandelbrot.c
+++ b/src/mandelbrot.c
@@ -21,3 +21,54 @@ int mandelbrot(double ca, double cb, int iterations)
}
return n;
}
+
+void mandelbrot_avx(State *state, Color *pixels, int width, int height)
+{
+ __m256d real_step = _mm256_set1_pd((state->real_end - state->real_end) / (double)width);
+ __m256d imag_step = _mm256_set1_pd((state->imag_end - state->imag_end) / (double)height);
+
+ __m256i ones = _mm256_set1_epi64x(1);
+ __m256i iterations = _mm256_set1_epi64x(state->iterations);
+
+ __m256d twos = _mm256_set1_pd(2.0);
+ __m256d fours = _mm256_set1_pd(4.0);
+
+ __m256d offset = _mm256_setr_pd(0.0, 1.0, 2.0, 3.0);
+
+ __m256d ci = _mm256_set1_pd(0.0);
+ for (int y = 0; y < height; y++)
+ {
+ __m256d cr = _mm256_set1_pd(0.0);
+ for (int x = 0; x < width; x += 4)
+ {
+ __m256d zr = cr;
+ __m256d zi = ci;
+ __m256d zr_square;
+ __m256d zi_square;
+ __m256i n;
+
+ /* while { */
+
+ zi_square = _mm256_mul_pd(zi, zi);
+ zr_square = _mm256_mul_pd(zr, zr);
+
+ __m256d dist = _mm256_add_pd(zi_square, zr_square);
+ __m256d escaped = _mm256_cmp_pd(dist, fours, _CMP_LT_OQ);
+
+
+ // zi = 2.0 * zi * zr
+ zi = _mm256_mul_pd(zi, zr);
+ zi = _mm256_mul_pd(zi, twos);
+
+ zr = _mm256_sub_pd(zr_square, zi_square);
+ zi = _mm256_add_pd(zi, ci);
+ zr = _mm256_add_pd(zr, cr);
+
+ /* } */
+
+
+ cr = _mm256_add_pd(cr, real_step);
+ }
+ ci = _mm256_add_pd(ci, imag_step);
+ }
+}
diff --git a/src/state.c b/src/state.c
index c1bdacb..b0bc38b 100644
--- a/src/state.c
+++ b/src/state.c
@@ -47,36 +47,16 @@ void state_run(State *state)
{
event_handle(state);
- double real_step;
- double imag_step;
- double real;
- double imag;
+ int _;
void *pixels;
- int pitch;
int width;
int height;
- SDL_CALL(SDL_LockTexture(state->texture, NULL, &pixels, &pitch));
+ SDL_CALL(SDL_LockTexture(state->texture, NULL, &pixels, &_));
SDL_CALL(SDL_QueryTexture(state->texture, NULL, NULL, &width, &height));
- real_step = (state->real_end - state->real_start) / width;
- imag_step = (state->imag_end - state->imag_start) / height;
- imag = state->imag_start;
-
uint32_t render_start_time = SDL_GetTicks();
-
- for (int i = 0; i < height; i++)
- {
- real = state->real_start;
- for (int j = 0; j < width; j++)
- {
- int n = mandelbrot(real, imag, state->iterations);
- ((Color*)pixels)[i * width + j] = state->palette[n];
- real += real_step;
- }
- imag += imag_step;
- }
-
+ mandelbrot_avx(state, pixels, width, height);
uint32_t render_end_time = SDL_GetTicks();
SDL_UnlockTexture(state->texture);