From: Not Zed Date: Mon, 27 Jan 2020 04:13:02 +0000 (+1030) Subject: Add some microbenchmarks. X-Git-Url: https://code.zedzone.au/cvs?a=commitdiff_plain;h=916d96891fcc87e3a85fcd965b5fa01a2894273d;p=zcl Add some microbenchmarks. --- diff --git a/Makefile b/Makefile index 0a8b38f..7acaae1 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,10 @@ include java.make # ###################################################################### # Work in progress idea for java.make extension to create execution templates -notzed.zcl.demo_DEMOS=au.notzed.zcl.tools.clinfo +notzed.zcl.demo_DEMOS=au.notzed.zcl.tools.clinfo \ + au.notzed.zcl.test.TestObjects \ + au.notzed.zcl.test.TestCopies \ + au.notzed.zcl.test.TestMemory notzed.zcl.fxdemo_DEMOS=fxdemo.fract.Mandelbrot fxdemo.fract.Test DEMOFLAGS=--add-exports jdk.incubator.foreign/jdk.incubator.foreign.unsafe=notzed.zcl diff --git a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestCopies.java b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestCopies.java new file mode 100644 index 0000000..a4b02e4 --- /dev/null +++ b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestCopies.java @@ -0,0 +1,153 @@ +/* + License: public domain or equivalent. + */ +package au.notzed.zcl.test; + +import jdk.incubator.foreign.*; + +import api.*; + +import java.nio.ByteOrder; +import java.lang.invoke.VarHandle; +import java.util.HashMap; +import static java.lang.Math.*; + +public class TestCopies { + + final static VarHandle longHandle = MemoryHandles.varHandle(long.class, ByteOrder.nativeOrder()); + final static VarHandle longVHandle = MemoryHandles.withStride(longHandle, 8); + + public static long getLong(MemoryAddress p) { + return (long)longHandle.get(p); + } + + public static long getLong(MemoryAddress p, long i) { + return (long)longVHandle.get(p, i); + } + + public static void setLong(MemoryAddress p, long v) { + longHandle.set(p, v); + } + + public static void setLong(MemoryAddress p, long i, long v) { + longVHandle.set(p, i, v); + } + + + static void copyLoop(long[] src, MemoryAddress dst) { + for (int i=0;i results = new HashMap<>(); + + int X = 1024*1024; + for (int c = 0; c < 5; c++) { + for (int len: lengths) { + long[] data = new long[len]; + long now; + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (MemorySegment seg = MemorySegment.allocateNative(data.length * 8)) { + MemoryAddress add = seg.baseAddress(); + copyLoop(data, add); + } + } + long t = System.nanoTime() - now; + System.out.printf(" %12.9f %3d copyLoop\n", t * 1E-9, len); + results.compute(String.format("%3d copyLoop", len), (k, v)-> v == null ? t : min(v, t)); + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (MemorySegment seg = MemorySegment.allocateNative(data.length * 8)) { + MemoryAddress add = seg.baseAddress(); + copyBulk(data, add); + } + } + long s = System.nanoTime() - now; + System.out.printf(" %12.9f %3d copyBulk\n", s * 1E-9, len); + results.compute(String.format("%3d copyBulk", len), (k, v)-> v == null ? s : min(v, s)); + } + + for (int len: lengths) { + long[] data = new long[len]; + long now; + + now = System.nanoTime(); + try (MemorySegment seg = MemorySegment.allocateNative(data.length * 8)) { + MemoryAddress add = seg.baseAddress(); + for (int l = 0; l < X; l++) { + copyLoop(data, add); + } + } + long t = System.nanoTime() - now; + System.out.printf(" %12.9f %3d copyLoop pre-alloc\n", t * 1E-9, len); + results.compute(String.format("%3d copyLoop pre-alloc", len), (k, v)-> v == null ? t : min(v, t)); + + now = System.nanoTime(); + try (MemorySegment seg = MemorySegment.allocateNative(data.length * 8)) { + MemoryAddress add = seg.baseAddress(); + for (int l = 0; l < X; l++) { + copyBulk(data, add); + } + } + long s = System.nanoTime() - now; + System.out.printf(" %12.9f %3d copyBulk pre-alloc\n", s * 1E-9, len); + results.compute(String.format("%3d copyBulk pre-alloc", len), (k, v)-> v == null ? s : min(v, s)); + } + + // if have stack allocator: + for (int len: lengths) { + long[] data = new long[len]; + long now; + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack()) { + MemoryAddress add = a.alloca(data.length * 8); + copyLoop(data, add); + } + } + long t = System.nanoTime() - now; + System.out.printf(" %12.9f %3d copyLoop stack\n", t * 1E-9, len); + results.compute(String.format("%3d copyLoop stack", len), (k, v)-> v == null ? t : min(v, t)); + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack()) { + MemoryAddress add = a.alloca(data.length * 8); + copyBulk(data, add); + } + } + long s = System.nanoTime() - now; + System.out.printf(" %12.9f %3d copyBulk stack\n", s * 1E-9, len); + results.compute(String.format("%3d copyBulk stack", len), (k, v)-> v == null ? s : min(v, s)); + } + + System.out.println(); + } + + results.entrySet().stream().map((e) -> { + return String.format("%s %12.9f %s", e.getKey().substring(0, 3), 1E-9 * e.getValue(), e.getKey().substring(4)); + }).sorted().forEach(System.out::println); + } +} diff --git a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemory.java b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemory.java new file mode 100644 index 0000000..24a2414 --- /dev/null +++ b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemory.java @@ -0,0 +1,123 @@ +/* + License: public domain or equivalent. + */ +package au.notzed.zcl.test; + +import api.Native; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.FloatBuffer; +import jdk.incubator.foreign.MemoryAddress; +import jdk.incubator.foreign.MemorySegment; + +/** + * Some memory tests. + */ +public class TestMemory { + + static void check(MemorySegment seg) { + int len = (int)(seg.byteSize() >>> 3); + float sum = 0; + + MemoryAddress add = seg.baseAddress(); + for (int i = 0; i < len; i++) + sum += Native.getFloat(add, i); + Native.setFloat(add, sum); + } + + static void check(float[] seg) { + float sum = 0; + for (int i = 0; i < seg.length; i++) + sum += seg[i]; + seg[0] = sum; + } + + static void check(FloatBuffer seg) { + float sum = 0; + + while (seg.hasRemaining()) + sum += seg.get(); + seg.rewind(); + seg.put(0, sum); + } + + static void check2(FloatBuffer seg) { + float sum = 0; + int len = seg.limit(); + + for (int i = 0; i < len; i++) + sum += seg.get(i); + seg.put(0, sum); + } + + public static void main(String[] args) { + int[] sizes = { + 1024, + 1024 * 1024, + 16, + 64, + 31, + 17 + }; + float[][] arrays = new float[sizes.length][]; + MemorySegment segments[] = new MemorySegment[sizes.length]; + ByteBuffer[] buffers = new ByteBuffer[sizes.length]; + + for (int i = 0; i < sizes.length; i++) { + arrays[i] = new float[sizes[i]]; + segments[i] = MemorySegment.allocateNative(sizes[i] * 4, 16); + buffers[i] = ByteBuffer.allocateDirect(sizes[i] * 4).order(ByteOrder.nativeOrder()); + } + + for (int c = 0; c < 10; c++) { + long now; + + now = System.nanoTime(); + for (int l = 0; l < 1000; l++) { + for (float[] a: arrays) + check(a); + } + System.out.printf(" %12.9f array\n", (System.nanoTime() - now) * 1E-9); + now = System.nanoTime(); + for (int l = 0; l < 1000; l++) { + for (ByteBuffer a: buffers) + check(a.asFloatBuffer()); + } + System.out.printf(" %12.9f bb stream\n", (System.nanoTime() - now) * 1E-9); + now = System.nanoTime(); + for (int l = 0; l < 1000; l++) { + for (MemorySegment a: segments) + check(a); + } + System.out.printf(" %12.9f segment\n", (System.nanoTime() - now) * 1E-9); + now = System.nanoTime(); + for (int l = 0; l < 1000; l++) { + for (ByteBuffer a: buffers) + check2(a.asFloatBuffer()); + } + System.out.printf(" %12.9f bb index\n", (System.nanoTime() - now) * 1E-9); + now = System.nanoTime(); + for (int l = 0; l < 1000; l++) { + for (MemorySegment a: segments) + check(a.asByteBuffer().order(ByteOrder.nativeOrder()).asFloatBuffer()); + } + System.out.printf(" %12.9f bb over segment\n", (System.nanoTime() - now) * 1E-9); + + now = System.nanoTime(); + for (int l = 0; l < 1000; l++) { + for (MemorySegment a: segments) + check(a); + } + System.out.printf(" %12.9f segment\n", (System.nanoTime() - now) * 1E-9); + + now = System.nanoTime(); + for (int l = 0; l < 1000; l++) { + for (MemorySegment a: segments) + check(a); + } + System.out.printf(" %12.9f segment\n", (System.nanoTime() - now) * 1E-9); + + } + + } +} diff --git a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestObjects.java b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestObjects.java new file mode 100644 index 0000000..4b0db81 --- /dev/null +++ b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestObjects.java @@ -0,0 +1,38 @@ +/* + License: public domain or equivalent. + */ +package au.notzed.zcl.test; + +import au.notzed.zcl.CL; +import au.notzed.zcl.CLContext; +import au.notzed.zcl.CLDevice; +import au.notzed.zcl.CLEvent; +import au.notzed.zcl.CLException; +import au.notzed.zcl.CLPlatform; + +public class TestObjects { + + static CLContext cl; + static int id = 0; + static CLEvent[] events = new CLEvent[256]; + + static void check() throws CLException { + // CLPlatform.getPlatforms(); + events[(id++) & 255] = cl.createUserEvent(); + } + + public static void main(String[] args) throws CLException { + CLDevice dev = CLPlatform.getBestDevice(CL.CL_DEVICE_TYPE_ALL); + cl = CLContext.createContext(null, new CLDevice[]{dev}); + + for (int c = 0; c < 10; c++) { + long now; + + now = System.nanoTime(); + for (int l = 0; l < 1000000; l++) { + check(); + } + System.out.printf(" %12.9f check\n", (System.nanoTime() - now) * 1E-9); + } + } +} diff --git a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/package-info.java b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/package-info.java new file mode 100644 index 0000000..5be2181 --- /dev/null +++ b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/package-info.java @@ -0,0 +1,8 @@ +/* + License: public domain or equivalent. + */ + +/** + * Some experiments and perf microbenchmarks. + */ +package au.notzed.zcl.test; diff --git a/src/notzed.zcl/classes/au/notzed/zcl/CLEvent.java b/src/notzed.zcl/classes/au/notzed/zcl/CLEvent.java index d921e6e..201c35c 100644 --- a/src/notzed.zcl/classes/au/notzed/zcl/CLEvent.java +++ b/src/notzed.zcl/classes/au/notzed/zcl/CLEvent.java @@ -51,7 +51,6 @@ public class CLEvent extends CLObject { private static void release(MemoryAddress p) { try { - System.err.printf("** release event %016x\n", api.Memory.toLong(p)); clReleaseEvent(p); } catch (Throwable t) { }