From 65bc3d53c24f4a348853be3ad96e6176dd6cddac Mon Sep 17 00:00:00 2001 From: Not Zed Date: Tue, 28 Jan 2020 08:45:29 +1030 Subject: [PATCH] Trying double-double implementation. Doesn't add any precision so there must be a bug in the code. --- .../classes/fxdemo/fract/Calculate.java | 21 +- .../classes/fxdemo/fract/Mandelbrot.java | 85 +++++- .../classes/fxdemo/fract/mandelbrot.cl | 285 ++++++++++++++++++ 3 files changed, 379 insertions(+), 12 deletions(-) diff --git a/src/notzed.zcl.fxdemo/classes/fxdemo/fract/Calculate.java b/src/notzed.zcl.fxdemo/classes/fxdemo/fract/Calculate.java index ab069b1..e344c81 100644 --- a/src/notzed.zcl.fxdemo/classes/fxdemo/fract/Calculate.java +++ b/src/notzed.zcl.fxdemo/classes/fxdemo/fract/Calculate.java @@ -40,6 +40,7 @@ public class Calculate { int width, height; CLKernel mandelbrot; CLKernel dmandelbrot; + CLKernel ddmandelbrot; Calculate(int width, int height) throws CLException { devs = new CLDevice[]{CLPlatform.getBestDevice(CL_DEVICE_TYPE_ALL)}; @@ -61,10 +62,14 @@ public class Calculate { dmandelbrot = prog.createKernel("dmandelbrot"); dmandelbrot.setArg(0, image); dmandelbrot.setArg(1, width, height); + + ddmandelbrot = prog.createKernel("ddmandelbrot"); + ddmandelbrot.setArg(0, image); + ddmandelbrot.setArg(1, width, height); } void release() { - CLObject.release(dmandelbrot, mandelbrot, prog, image, q, cl); + CLObject.release(ddmandelbrot, dmandelbrot, mandelbrot, prog, image, q, cl); CLObject.release(devs); } @@ -96,7 +101,21 @@ public class Calculate { } catch (CLException ex) { ex.printStackTrace(); } + } + public void rundd(ByteBuffer buffer, DD ox, DD oy, DD scale, double arg, int m) { + try { + ddmandelbrot.setArg(2, ox.x, ox.y, oy.x, oy.y); + ddmandelbrot.setArg(3, scale.x, scale.y); + ddmandelbrot.setArg(4, sin(arg), cos(arg)); + ddmandelbrot.setArg(5, m); + + q.enqueue2DKernel(ddmandelbrot, 0, 0, width, height, 8, 8, null, null); + q.enqueueReadBuffer(image, true, 0, buffer.limit(), buffer, null, null); + q.finish(); + } catch (CLException ex) { + ex.printStackTrace(); + } } } diff --git a/src/notzed.zcl.fxdemo/classes/fxdemo/fract/Mandelbrot.java b/src/notzed.zcl.fxdemo/classes/fxdemo/fract/Mandelbrot.java index 47fc224..6add08a 100644 --- a/src/notzed.zcl.fxdemo/classes/fxdemo/fract/Mandelbrot.java +++ b/src/notzed.zcl.fxdemo/classes/fxdemo/fract/Mandelbrot.java @@ -82,6 +82,13 @@ public class Mandelbrot extends Application { } + enum Mode { + FLOAT, + DOUBLE, + DD, + + } + // some from here, i made up the names http://paulbourke.net/fractals/mandelbrot/ static Target[] targets = { new Target("jewel", -0.7746806106269039, -0.1374168856037867, 30), @@ -95,7 +102,7 @@ public class Mandelbrot extends Application { }; Target target = targets[0]; - boolean isdouble = true; + Mode mode = Mode.DD; double alpha = 0.005; double beta = 0; double gamma = 80; @@ -176,10 +183,13 @@ public class Mandelbrot extends Application { getParameters().getUnnamed().forEach((c) -> { switch (c) { case "-f": - isdouble = false; + mode = mode.FLOAT; break; case "-d": - isdouble = true; + mode = mode.DOUBLE; + break; + case "-dd": + mode = mode.DD; break; case "--sync": case "-s": @@ -197,7 +207,7 @@ public class Mandelbrot extends Application { image = new WritableImage(width, height); if (beta == 0.0) - beta = isdouble ? target.beta : Math.min(12, target.beta); + beta = mode != Mode.FLOAT ? target.beta : Math.min(12, target.beta); ImageView iv = new ImageView(image); Text fps = new Text(); @@ -282,7 +292,17 @@ public class Mandelbrot extends Application { double scale = alpha / Math.exp(frac * beta); int depth = (int)(-Math.log(scale) * gamma); - recalc(target, isdouble, scale, arg, depth); + DD dscale = DD.div(new DD(alpha), DD.exp(new DD(frac * beta))); + int ddepth = (int)DD.neg(DD.mul(DD.log(dscale), gamma)).x; + + System.out.printf("scale: %f dscale: %f\n", scale, dscale.x); + System.out.printf("depth: %d ddepth: %d\n", depth, ddepth); + + if (mode == Mode.DD) { + recalc(target, mode, dscale, arg, ddepth); + } else { + recalc(target, mode, scale, arg, depth); + } } } @@ -290,7 +310,7 @@ public class Mandelbrot extends Application { double arg = 0; int lastiter; - void recalc(Target target, boolean isdouble, double scale, double arg, int m) { + void recalc(Target target, Mode mode, double scale, double arg, int m) { double x = target.cx - width * scale * 0.5; double y = target.cy - height * scale * 0.5; @@ -302,10 +322,14 @@ public class Mandelbrot extends Application { busy = true; queue.submit(() -> { - if (isdouble) + switch (mode) { + case DOUBLE: + case DD: calc.rund(buffer, x, y, scale, scale, arg, m); - else + break; + case FLOAT: calc.run(buffer, x, y, scale, scale, arg, m); + } Platform.runLater(() -> { nframes++; @@ -315,11 +339,50 @@ public class Mandelbrot extends Application { }); }); } else { - - if (isdouble) + switch (mode) { + case DOUBLE: + case DD: calc.rund(buffer, x, y, scale, scale, arg, m); - else + break; + case FLOAT: calc.run(buffer, x, y, scale, scale, arg, m); + } + nframes++; + image.getPixelWriter() + .setPixels(0, 0, width, height, PixelFormat.getByteBgraInstance(), buffer, width * 4); + } + } + + void recalc(Target target, Mode mode, DD scale, double arg, int m) { + //double x = target.cx - width * scale * 0.5; + //double y = target.cy - height * scale * 0.5; + + DD x = DD.sub(new DD(target.cx), DD.mul_pwr2(DD.mul(scale, width), 0.5)); + DD y = DD.sub(new DD(target.cy), DD.mul_pwr2(DD.mul(scale, height), 0.5)); + + lastiter = m; + + if (!synchronous) { + if (busy) + return; + busy = true; + + queue.submit(() -> { + calc.rundd(buffer, x, y, scale, arg, m); + + Platform.runLater(() -> { + nframes++; + image.getPixelWriter() + .setPixels(0, 0, width, height, PixelFormat.getByteBgraInstance(), buffer, width * 4); + busy = false; + }); + }); + } else { + switch (mode) { + case DD: + calc.rundd(buffer, x, y, scale, arg, m); + break; + } nframes++; image.getPixelWriter() .setPixels(0, 0, width, height, PixelFormat.getByteBgraInstance(), buffer, width * 4); diff --git a/src/notzed.zcl.fxdemo/classes/fxdemo/fract/mandelbrot.cl b/src/notzed.zcl.fxdemo/classes/fxdemo/fract/mandelbrot.cl index bb7009c..e9e0272 100644 --- a/src/notzed.zcl.fxdemo/classes/fxdemo/fract/mandelbrot.cl +++ b/src/notzed.zcl.fxdemo/classes/fxdemo/fract/mandelbrot.cl @@ -95,3 +95,288 @@ dmandelbrot(global uchar4 *image, int2 size, double2 origin, double2 scale, doub image[dx + dy * size.x] = convert_uchar4(colour * 255.0f); } } + +// double double? +// https://github.com/lumianph/gpuprec/tree/master/gqd +typedef double2 dd_real; + +#define _GQD_SPLITTER (134217729.0) // = 2^27 + 1 +#define _GQD_SPLIT_THRESH (6.69692879491417e+299) // = 2^996 + +dd_real make_d(double x) { + return (dd_real)( x, 0.0 ); +} + +dd_real make_dd(double x, double y) { + return (dd_real)( x, 0.0 ); +} + + +//computs fl( a + b ) and err( a + b ), assumes |a| > |b| + +double quick_two_sum(double a, double b, double *err) { + + if (b == 0.0) { + *err = 0.0; + return (a + b); + } + + double s = a + b; + *err = b - (s - a); + + return s; +} + +double two_sum(double a, double b, double *err) { + + if ((a == 0.0) || (b == 0.0)) { + *err = 0.0; + return (a + b); + } + + double s = a + b; + double bb = s - a; + *err = (a - (s - bb)) + (b - bb); + + return s; +} + +//computes fl( a - b ) and err( a - b ), assumes |a| >= |b| +double quick_two_diff(double a, double b, double *err) { + if (a == b) { + *err = 0.0; + return 0.0; + } + + double s; + + /* + if(fabs((a-b)/a) < GPU_D_EPS) { + s = 0.0; + err = 0.0; + return s; + } + */ + + s = a - b; + *err = (a - s) - b; + return s; +} + +//computes fl( a - b ) and err( a - b ) +double two_diff(double a, double b, double *err) { + if (a == b) { + *err = 0.0; + return 0.0; + } + + double s = a - b; + + /* + if(fabs((a-b)/a) < GPU_D_EPS) { + s = 0.0; + err = 0.0; + return s; + } + */ + + double bb = s - a; + *err = (a - (s - bb)) - (b + bb); + return s; +} + +/* double-double = double + double */ +dd_real ddadd(double a, double b) { + double s, e; + s = two_sum(a, b, &e); + return make_dd(s, e); +} + +dd_real negative(const dd_real a) { + return make_dd(-a.x, -a.y); +} + +/* double-double + double */ +dd_real dd_add_d(const dd_real a, double b) { + double s1, s2; + s1 = two_sum(a.x, b, &s2); + s2 += a.y; + s1 = quick_two_sum(s1, s2, &s2); + return make_dd(s1, s2); +} + +dd_real dd_add_dd(const dd_real a, dd_real b) { + double s, e; + + s = two_sum(a.x, b.x, &e); + e += (a.y + b.y); + s = quick_two_sum(s, e, &e); + return make_dd(s, e); +} +/*********** Subtractions *********/ + +dd_real dd_sub_dd(const dd_real a, const dd_real b) { + + double s, e; + s = two_diff(a.x, b.x, &e); + //return make_dd(s, e); + e += a.y; + e -= b.y; + s = quick_two_sum(s, e, &e); + return make_dd(s, e); +} + + +// Computes high word and lo word of a +void dd_split(double a, double *hi, double *lo) { + double temp; + if (a > _GQD_SPLIT_THRESH || a < -_GQD_SPLIT_THRESH) { + a *= 3.7252902984619140625e-09; // 2^-28 + temp = _GQD_SPLITTER * a; + *hi = temp - (temp - a); + *lo = a - *hi; + *hi *= 268435456.0; // 2^28 + *lo *= 268435456.0; // 2^28 + } else { + temp = _GQD_SPLITTER * a; + *hi = temp - (temp - a); + *lo = a - *hi; + } +} + +/* Computes fl(a*b) and err(a*b). */ +double two_prod(double a, double b, double *err) { + double a_hi, a_lo, b_hi, b_lo; + double p = a * b; + + dd_split(a, &a_hi, &a_lo); + dd_split(b, &b_hi, &b_lo); + + //err = (a_hi*b_hi) - p + (a_hi*b_lo) + (a_lo*b_hi) + (a_lo*b_lo); + *err = (a_hi * b_hi) - p + (a_hi * b_lo) + (a_lo * b_hi) + (a_lo * b_lo); + + return p; +} + +/* Computes fl(a*a) and err(a*a). Faster than the above method. */ +double dd_two_sqr(double a, double *err) { + double hi, lo; + double q = a * a; + + dd_split(a, &hi, &lo); + *err = ((hi * hi - q) + 2.0 * hi * lo) + lo * lo; + return q; +} + +/*********** Squaring **********/ +dd_real dd_sqr(const dd_real a) { + double p1, p2; + double s1, s2; + p1 = dd_two_sqr(a.x, &p2); + p2 += (2.0 * a.x * a.y); + p2 += (a.y * a.y); + s1 = quick_two_sum(p1, p2, &s2); + return make_dd(s1, s2); +} + +dd_real d_sqr(double a) { + double p1, p2; + p1 = dd_two_sqr(a, &p2); + return make_dd(p1, p2); +} + +dd_real ldexp_dd(const dd_real a, int exp) { + return make_dd(ldexp(a.x, exp), ldexp(a.y, exp)); +} + +/* double-double * double, where double is a power of 2. */ +dd_real dd_mul_pwr2(const dd_real a, double b) { + return make_dd(a.x * b, a.y * b); +} + +/* double-double * double-double */ +dd_real dd_mul_dd(const dd_real a, const dd_real b) { + double p1, p2; + + p1 = two_prod(a.x, b.x, &p2); + p2 += (a.x * b.y + a.y * b.x); + + p1 = quick_two_sum(p1, p2, &p2); + + return make_dd(p1, p2); +} + +/* double-double * double */ +dd_real dd_mul_d(const dd_real a, double b) { + double p1, p2; + + p1 = two_prod(a.x, b, &p2); + p2 += a.y * b; + p1 = quick_two_sum(p1, p2, &p2); + return make_dd(p1, p2); +} + +/* double-double < double */ +int dd_lt_d(const dd_real a, double b) { + return (a.x < b || (a.x == b && a.y < 0.0)); +} + +kernel void +__attribute__((reqd_work_group_size(LWS_X, LWS_Y, 1))) +ddmandelbrot(global uchar4 *image, int2 size, double4 origin, double2 scale, double2 sincos, int m) { + int dx = get_global_id(0); + int dy = get_global_id(1); + + if (dx < size.x && dy < size.y) { + double2 s = convert_double2(size); + double2 c; + + c = (double2)(dx, dy) - s * 0.5; + c = (double2)(c.x * sincos.x + c.y * sincos.y, + c.x * sincos.y - c.y * sincos.x); + c += s * 0.5; + + dd_real cx = make_d(c.x); + dd_real cy = make_d(c.y); + + //c *= scale; + //c += origin; + + cx = dd_mul_dd(cx, scale); + cy = dd_mul_dd(cy, scale); + + cx = dd_add_dd(cx, origin.s01); + cy = dd_add_dd(cy, origin.s23); + + dd_real zx = make_d(0.0); + dd_real zy = make_d(0.0); + + int n = 0; + + do { + dd_real tx = dd_add_dd(dd_sub_dd(dd_sqr(zx), dd_sqr(zy)), cx); + dd_real ty = dd_add_dd(dd_mul_pwr2(dd_mul_dd(zx, zy), 2), cy); + + zx = tx; + zy = ty; + + //z = (double2)(z.x * z.x - z.y * z.y, 2.0f * z.x * z.y) + c; + n++; + } while (dd_lt_d(dd_add_dd(dd_sqr(zx), dd_sqr(zy)), 4) && n < m); + + // we use a simple cosine palette to determine color: + // http://iquilezles.org/www/articles/palettes/palettes.htm + float t = n * 10.0f / m; + float3 d = (float3)(0.5f, 0.5f, 0.5f); + float3 e = (float3)(0.5f, 0.5f, 0.5f); + float3 f = (float3)(1.0f, 1.0f, 1.0f); + float3 g = (float3)(0.00f, 0.33f, 0.67f); + float4 colour = (float4)( d + e * cos(6.28318f * (f * t + g)), 1.0f); + + if (convert_int(n) == m) + colour = (float4)(0, 0, 0, 1); + + // store the rendered mandelbrot set into a storage buffer: + image[dx + dy * size.x] = convert_uchar4(colour * 255.0f); + } +} -- 2.39.5