Add some microbenchmarks.
authorNot Zed <notzed@gmail.com>
Mon, 27 Jan 2020 04:13:02 +0000 (14:43 +1030)
committerNot Zed <notzed@gmail.com>
Mon, 27 Jan 2020 04:13:02 +0000 (14:43 +1030)
Makefile
src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestCopies.java [new file with mode: 0644]
src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemory.java [new file with mode: 0644]
src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestObjects.java [new file with mode: 0644]
src/notzed.zcl.demo/classes/au/notzed/zcl/test/package-info.java [new file with mode: 0644]
src/notzed.zcl/classes/au/notzed/zcl/CLEvent.java

index 0a8b38f..7acaae1 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -21,7 +21,10 @@ include java.make
 # ######################################################################
 # Work in progress idea for java.make extension to create execution templates
 
-notzed.zcl.demo_DEMOS=au.notzed.zcl.tools.clinfo
+notzed.zcl.demo_DEMOS=au.notzed.zcl.tools.clinfo \
+       au.notzed.zcl.test.TestObjects \
+       au.notzed.zcl.test.TestCopies \
+       au.notzed.zcl.test.TestMemory
 notzed.zcl.fxdemo_DEMOS=fxdemo.fract.Mandelbrot fxdemo.fract.Test
 
 DEMOFLAGS=--add-exports jdk.incubator.foreign/jdk.incubator.foreign.unsafe=notzed.zcl
diff --git a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestCopies.java b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestCopies.java
new file mode 100644 (file)
index 0000000..a4b02e4
--- /dev/null
@@ -0,0 +1,153 @@
+/*
+  License: public domain or equivalent.
+ */
+package au.notzed.zcl.test;
+
+import jdk.incubator.foreign.*;
+
+import api.*;
+
+import java.nio.ByteOrder;
+import java.lang.invoke.VarHandle;
+import java.util.HashMap;
+import static java.lang.Math.*;
+
+public class TestCopies {
+
+       final static VarHandle longHandle = MemoryHandles.varHandle(long.class, ByteOrder.nativeOrder());
+       final static VarHandle longVHandle = MemoryHandles.withStride(longHandle, 8);
+
+       public static long getLong(MemoryAddress p) {
+               return (long)longHandle.get(p);
+       }
+
+       public static long getLong(MemoryAddress p, long i) {
+               return (long)longVHandle.get(p, i);
+       }
+
+       public static void setLong(MemoryAddress p, long v) {
+               longHandle.set(p, v);
+       }
+
+       public static void setLong(MemoryAddress p, long i, long v) {
+               longVHandle.set(p, i, v);
+       }
+
+
+       static void copyLoop(long[] src, MemoryAddress dst) {
+               for (int i=0;i<src.length;i++) {
+                       setLong(dst, i, src[i]);
+               }
+       }
+
+       static void copyBulk(long[] src, MemoryAddress dst) {
+               MemoryAddress.copy(MemorySegment.ofArray(src).baseAddress(), dst, src.length);
+       }
+
+       public static void main(String[] args) {
+               int[] lengths = {
+                       //0, memory segment cannot allocate 0, sigh
+                       1,
+                       2,
+                       4,
+                       8,
+                       16,
+                       32,
+                       64,
+                       128,
+                       256,
+               };
+               HashMap<String,Long> results = new HashMap<>();
+
+               int X = 1024*1024;
+               for (int c = 0; c < 5; c++) {
+                       for (int len: lengths) {
+                               long[] data = new long[len];
+                               long now;
+
+                               now = System.nanoTime();
+                               for (int l = 0; l < X; l++) {
+                                       try (MemorySegment seg = MemorySegment.allocateNative(data.length * 8)) {
+                                               MemoryAddress add = seg.baseAddress();
+                                               copyLoop(data, add);
+                                       }
+                               }
+                               long t = System.nanoTime() - now;
+                               System.out.printf(" %12.9f %3d copyLoop\n", t * 1E-9, len);
+                               results.compute(String.format("%3d copyLoop", len), (k, v)-> v == null ? t : min(v, t));
+
+                               now = System.nanoTime();
+                               for (int l = 0; l < X; l++) {
+                                       try (MemorySegment seg = MemorySegment.allocateNative(data.length * 8)) {
+                                               MemoryAddress add = seg.baseAddress();
+                                               copyBulk(data, add);
+                                       }
+                               }
+                               long s = System.nanoTime() - now;
+                               System.out.printf(" %12.9f %3d copyBulk\n", s * 1E-9, len);
+                               results.compute(String.format("%3d copyBulk", len), (k, v)-> v == null ? s : min(v, s));
+                       }
+
+                       for (int len: lengths) {
+                               long[] data = new long[len];
+                               long now;
+
+                               now = System.nanoTime();
+                               try (MemorySegment seg = MemorySegment.allocateNative(data.length * 8)) {
+                                       MemoryAddress add = seg.baseAddress();
+                                       for (int l = 0; l < X; l++) {
+                                               copyLoop(data, add);
+                                       }
+                               }
+                               long t = System.nanoTime() - now;
+                               System.out.printf(" %12.9f %3d copyLoop pre-alloc\n", t * 1E-9, len);
+                               results.compute(String.format("%3d copyLoop pre-alloc", len), (k, v)-> v == null ? t : min(v, t));
+
+                               now = System.nanoTime();
+                               try (MemorySegment seg = MemorySegment.allocateNative(data.length * 8)) {
+                                       MemoryAddress add = seg.baseAddress();
+                                       for (int l = 0; l < X; l++) {
+                                               copyBulk(data, add);
+                                       }
+                               }
+                               long s = System.nanoTime() - now;
+                               System.out.printf(" %12.9f %3d copyBulk pre-alloc\n", s * 1E-9, len);
+                               results.compute(String.format("%3d copyBulk pre-alloc", len), (k, v)-> v == null ? s : min(v, s));
+                       }
+
+                       // if have stack allocator:
+                       for (int len: lengths) {
+                               long[] data = new long[len];
+                               long now;
+
+                               now = System.nanoTime();
+                               for (int l = 0; l < X; l++) {
+                                       try (Allocator a = api.Memory.stack()) {
+                                               MemoryAddress add = a.alloca(data.length * 8);
+                                               copyLoop(data, add);
+                                       }
+                               }
+                               long t = System.nanoTime() - now;
+                               System.out.printf(" %12.9f %3d copyLoop stack\n", t * 1E-9, len);
+                               results.compute(String.format("%3d copyLoop stack", len), (k, v)-> v == null ? t : min(v, t));
+
+                               now = System.nanoTime();
+                               for (int l = 0; l < X; l++) {
+                                       try (Allocator a = api.Memory.stack()) {
+                                               MemoryAddress add = a.alloca(data.length * 8);
+                                               copyBulk(data, add);
+                                       }
+                               }
+                               long s = System.nanoTime() - now;
+                               System.out.printf(" %12.9f %3d copyBulk stack\n", s * 1E-9, len);
+                               results.compute(String.format("%3d copyBulk stack", len), (k, v)-> v == null ? s : min(v, s));
+                       }
+
+                       System.out.println();
+               }
+
+               results.entrySet().stream().map((e) -> {
+                               return String.format("%s %12.9f %s", e.getKey().substring(0, 3), 1E-9 * e.getValue(), e.getKey().substring(4));
+                       }).sorted().forEach(System.out::println);
+       }
+}
diff --git a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemory.java b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemory.java
new file mode 100644 (file)
index 0000000..24a2414
--- /dev/null
@@ -0,0 +1,123 @@
+/*
+  License: public domain or equivalent.
+ */
+package au.notzed.zcl.test;
+
+import api.Native;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.FloatBuffer;
+import jdk.incubator.foreign.MemoryAddress;
+import jdk.incubator.foreign.MemorySegment;
+
+/**
+ * Some memory tests.
+ */
+public class TestMemory {
+
+       static void check(MemorySegment seg) {
+               int len = (int)(seg.byteSize() >>> 3);
+               float sum = 0;
+
+               MemoryAddress add = seg.baseAddress();
+               for (int i = 0; i < len; i++)
+                       sum += Native.getFloat(add, i);
+               Native.setFloat(add, sum);
+       }
+
+       static void check(float[] seg) {
+               float sum = 0;
+               for (int i = 0; i < seg.length; i++)
+                       sum += seg[i];
+               seg[0] = sum;
+       }
+
+       static void check(FloatBuffer seg) {
+               float sum = 0;
+
+               while (seg.hasRemaining())
+                       sum += seg.get();
+               seg.rewind();
+               seg.put(0, sum);
+       }
+
+       static void check2(FloatBuffer seg) {
+               float sum = 0;
+               int len = seg.limit();
+
+               for (int i = 0; i < len; i++)
+                       sum += seg.get(i);
+               seg.put(0, sum);
+       }
+
+       public static void main(String[] args) {
+               int[] sizes = {
+                       1024,
+                       1024 * 1024,
+                       16,
+                       64,
+                       31,
+                       17
+               };
+               float[][] arrays = new float[sizes.length][];
+               MemorySegment segments[] = new MemorySegment[sizes.length];
+               ByteBuffer[] buffers = new ByteBuffer[sizes.length];
+
+               for (int i = 0; i < sizes.length; i++) {
+                       arrays[i] = new float[sizes[i]];
+                       segments[i] = MemorySegment.allocateNative(sizes[i] * 4, 16);
+                       buffers[i] = ByteBuffer.allocateDirect(sizes[i] * 4).order(ByteOrder.nativeOrder());
+               }
+
+               for (int c = 0; c < 10; c++) {
+                       long now;
+
+                       now = System.nanoTime();
+                       for (int l = 0; l < 1000; l++) {
+                               for (float[] a: arrays)
+                                       check(a);
+                       }
+                       System.out.printf(" %12.9f array\n", (System.nanoTime() - now) * 1E-9);
+                       now = System.nanoTime();
+                       for (int l = 0; l < 1000; l++) {
+                               for (ByteBuffer a: buffers)
+                                       check(a.asFloatBuffer());
+                       }
+                       System.out.printf(" %12.9f bb stream\n", (System.nanoTime() - now) * 1E-9);
+                       now = System.nanoTime();
+                       for (int l = 0; l < 1000; l++) {
+                               for (MemorySegment a: segments)
+                                       check(a);
+                       }
+                       System.out.printf(" %12.9f segment\n", (System.nanoTime() - now) * 1E-9);
+                       now = System.nanoTime();
+                       for (int l = 0; l < 1000; l++) {
+                               for (ByteBuffer a: buffers)
+                                       check2(a.asFloatBuffer());
+                       }
+                       System.out.printf(" %12.9f bb index\n", (System.nanoTime() - now) * 1E-9);
+                       now = System.nanoTime();
+                       for (int l = 0; l < 1000; l++) {
+                               for (MemorySegment a: segments)
+                                       check(a.asByteBuffer().order(ByteOrder.nativeOrder()).asFloatBuffer());
+                       }
+                       System.out.printf(" %12.9f bb over segment\n", (System.nanoTime() - now) * 1E-9);
+
+                                               now = System.nanoTime();
+                       for (int l = 0; l < 1000; l++) {
+                               for (MemorySegment a: segments)
+                                       check(a);
+                       }
+                       System.out.printf(" %12.9f segment\n", (System.nanoTime() - now) * 1E-9);
+
+                                               now = System.nanoTime();
+                       for (int l = 0; l < 1000; l++) {
+                               for (MemorySegment a: segments)
+                                       check(a);
+                       }
+                       System.out.printf(" %12.9f segment\n", (System.nanoTime() - now) * 1E-9);
+
+               }
+
+       }
+}
diff --git a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestObjects.java b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestObjects.java
new file mode 100644 (file)
index 0000000..4b0db81
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+  License: public domain or equivalent.
+ */
+package au.notzed.zcl.test;
+
+import au.notzed.zcl.CL;
+import au.notzed.zcl.CLContext;
+import au.notzed.zcl.CLDevice;
+import au.notzed.zcl.CLEvent;
+import au.notzed.zcl.CLException;
+import au.notzed.zcl.CLPlatform;
+
+public class TestObjects {
+
+       static CLContext cl;
+       static int id = 0;
+       static CLEvent[] events = new CLEvent[256];
+
+       static void check() throws CLException {
+               //      CLPlatform.getPlatforms();
+               events[(id++) & 255] = cl.createUserEvent();
+       }
+
+       public static void main(String[] args) throws CLException {
+               CLDevice dev = CLPlatform.getBestDevice(CL.CL_DEVICE_TYPE_ALL);
+               cl = CLContext.createContext(null, new CLDevice[]{dev});
+
+               for (int c = 0; c < 10; c++) {
+                       long now;
+
+                       now = System.nanoTime();
+                       for (int l = 0; l < 1000000; l++) {
+                               check();
+                       }
+                       System.out.printf(" %12.9f check\n", (System.nanoTime() - now) * 1E-9);
+               }
+       }
+}
diff --git a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/package-info.java b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/package-info.java
new file mode 100644 (file)
index 0000000..5be2181
--- /dev/null
@@ -0,0 +1,8 @@
+/*
+  License: public domain or equivalent.
+ */
+
+/**
+ * Some experiments and perf microbenchmarks.
+ */
+package au.notzed.zcl.test;
index d921e6e..201c35c 100644 (file)
@@ -51,7 +51,6 @@ public class CLEvent extends CLObject {
 
        private static void release(MemoryAddress p) {
                try {
-                       System.err.printf("** release event %016x\n", api.Memory.toLong(p));
                        clReleaseEvent(p);
                } catch (Throwable t) {
                }