From 6fb6786406ab08d0beed3a36a50b3a666b64c8b9 Mon Sep 17 00:00:00 2001 From: Not Zed Date: Thu, 9 Dec 2021 08:25:37 +1030 Subject: [PATCH] Checkpoint panama code --- .../au/notzed/zcl/test/TestAllocate.java | 194 ++++++++++++++ .../au/notzed/zcl/test/TestCopies.java | 248 +++++++++++++----- .../au/notzed/zcl/test/TestMemory.java | 6 +- .../au/notzed/zcl/test/TestMemoryLong.java | 8 +- src/notzed.zcl/classes/api/Memory.java | 134 +++++++++- src/notzed.zcl/classes/api/Native.java | 173 +----------- .../classes/au/notzed/zcl/CLBuffer.java | 9 +- .../classes/au/notzed/zcl/CLBufferInfo.java | 14 +- .../classes/au/notzed/zcl/CLCommandQueue.java | 58 ++-- .../classes/au/notzed/zcl/CLContext.java | 98 ++++--- .../classes/au/notzed/zcl/CLImageDesc.java | 28 +- .../classes/au/notzed/zcl/CLImageFormat.java | 40 +-- .../classes/au/notzed/zcl/CLKernel.java | 156 +++++------ .../classes/au/notzed/zcl/CLMemory.java | 8 +- .../classes/au/notzed/zcl/CLObject.java | 18 +- .../classes/au/notzed/zcl/CLPlatform.java | 19 +- .../tests/au/notzed/zcl/CLBufferTest.java | 6 +- .../tests/au/notzed/zcl/CLEventTest.java | 6 +- 18 files changed, 720 insertions(+), 503 deletions(-) create mode 100644 src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestAllocate.java diff --git a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestAllocate.java b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestAllocate.java new file mode 100644 index 0000000..9789955 --- /dev/null +++ b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestAllocate.java @@ -0,0 +1,194 @@ +/* + License: public domain or equivalent. + */ +package au.notzed.zcl.test; + +import jdk.incubator.foreign.*; + +import api.*; + +import java.util.HashMap; +import static java.lang.Math.*; + +public class TestAllocate { + + public static void main(String[] args) { + HashMap results = new HashMap<>(); + int X = 1024 * 1024 * 10; + for (int c = 0; c < 5; c++) { + + long now; + if (true) { + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment arg0 = MemorySegment.allocateNative(8, 8, scope); + MemorySegment arg1 = MemorySegment.allocateNative(8, 8, scope); + MemorySegment arg2 = MemorySegment.allocateNative(8, 8, scope); + MemorySegment arg3 = MemorySegment.allocateNative(8, 8, scope); + } + } + long t = System.nanoTime() - now; + System.out.printf(" %12.9f 4x8 scope\n", t * 1E-9); + results.compute(String.format("4x8 scope"), (k, v) -> v == null ? t : min(v, t)); + } + if (false) { + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment arg0 = MemorySegment.allocateNative(8, 8, scope); + MemorySegment arg1 = MemorySegment.allocateNative(8, 8, scope); + } + } + long n = System.nanoTime() - now; + System.out.printf(" %12.9f 2x8 scope\n", n * 1E-9); + results.compute(String.format("2x8 scope"), (k, v) -> v == null ? n : min(v, n)); + } + if (false) { + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment arg0 = MemorySegment.allocateNative(CLinker.C_POINTER, scope); + MemorySegment arg1 = MemorySegment.allocateNative(CLinker.C_POINTER, scope); + MemorySegment arg2 = MemorySegment.allocateNative(CLinker.C_POINTER, scope); + MemorySegment arg3 = MemorySegment.allocateNative(CLinker.C_POINTER, scope); + } + } + long u = System.nanoTime() - now; + System.out.printf(" %12.9f 4xC_POINTER\n", u * 1E-9); + results.compute(String.format("4xC_POINTER"), (k, v) -> v == null ? u : min(v, u)); + } + if (false) { + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment arg0 = MemorySegment.allocateNative(CLinker.C_POINTER, scope); + MemorySegment arg1 = MemorySegment.allocateNative(CLinker.C_POINTER, scope); + } + } + long m = System.nanoTime() - now; + System.out.printf(" %12.9f 2xC_POINTER\n", m * 1E-9); + results.compute(String.format("2xC_POINTER"), (k, v) -> v == null ? m : min(v, m)); + } + if (true) { + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack()) { + MemorySegment arg0 = a.allocs(8); + MemorySegment arg1 = a.allocs(8); + MemorySegment arg2 = a.allocs(8); + MemorySegment arg3 = a.allocs(8); + } + } + long s = System.nanoTime() - now; + System.out.printf(" %12.9f 4x8 stack\n", s * 1E-9); + results.compute(String.format("4x8 stack"), (k, v) -> v == null ? s : min(v, s)); + } + if (false) { + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack()) { + MemorySegment arg0 = a.allocs(8); + MemorySegment arg1 = a.allocs(8); + } + } + long h = System.nanoTime() - now; + System.out.printf(" %12.9f 2x8 stack\n", h * 1E-9); + results.compute(String.format("2x8 stack"), (k, v) -> v == null ? h : min(v, h)); + } + if (true) { + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack2()) { + MemorySegment arg0 = a.allocs(8); + MemorySegment arg1 = a.allocs(8); + MemorySegment arg2 = a.allocs(8); + MemorySegment arg3 = a.allocs(8); + } + } + long r = System.nanoTime() - now; + System.out.printf(" %12.9f 4x8 stack2\n", r * 1E-9); + results.compute(String.format("4x8 stack2"), (k, v) -> v == null ? r : min(v, r)); + } + if (false) { + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack2()) { + MemorySegment arg0 = a.allocs(8); + MemorySegment arg1 = a.allocs(8); + } + } + long p = System.nanoTime() - now; + System.out.printf(" %12.9f 2x8 stack2\n", p * 1E-9); + results.compute(String.format("2x8 stack2"), (k, v) -> v == null ? p : min(v, p)); + } + if (false) { + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack3()) { + MemorySegment arg0 = a.allocs(8); + MemorySegment arg1 = a.allocs(8); + } + } + long g = System.nanoTime() - now; + System.out.printf(" %12.9f 2x8 stack3\n", g * 1E-9); + results.compute(String.format("2x8 stack3"), (k, v) -> v == null ? g : min(v, g)); + } + if (false) { + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack3()) { + MemorySegment arg0 = a.allocs(8); + MemorySegment arg1 = a.allocs(8); + MemorySegment arg2 = a.allocs(8); + MemorySegment arg3 = a.allocs(8); + } + } + long q = System.nanoTime() - now; + System.out.printf(" %12.9f 4x8 stack3\n", q * 1E-9); + results.compute(String.format("4x8 stack3"), (k, v) -> v == null ? q : min(v, q)); + } + if (false) { + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Frame a = api.Memory.createFrame()) { + MemorySegment arg0 = a.allocate(8, 8); + MemorySegment arg1 = a.allocate(8, 8); + } + } + long o = System.nanoTime() - now; + System.out.printf(" %12.9f 2x8 stack4\n", o * 1E-9); + results.compute(String.format("2x8 stack4"), (k, v) -> v == null ? o : min(v, o)); + } + if (true) { + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Frame a = api.Memory.createFrame()) { + MemorySegment arg0 = a.allocate(8, 8); + MemorySegment arg1 = a.allocate(8, 8); + MemorySegment arg2 = a.allocate(8, 8); + MemorySegment arg3 = a.allocate(8, 8); + } + } + long l = System.nanoTime() - now; + System.out.printf(" %12.9f 4x8 stack4\n", l * 1E-9); + results.compute(String.format("4x8 stack4"), (k, v) -> v == null ? l : min(v, l)); + } + + System.out.println(); + } + + results.entrySet().stream().map((e) -> { + return String.format("%s %12.9f %s", e.getKey().substring(0, 3), 1E-9 * e.getValue(), e.getKey().substring(4)); + }).sorted().forEach(System.out::println); + } +} diff --git a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestCopies.java b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestCopies.java index 570c802..7ce89e9 100644 --- a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestCopies.java +++ b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestCopies.java @@ -18,26 +18,25 @@ public class TestCopies { final static SequenceLayout longVLayout = MemoryLayout.sequenceLayout(CLinker.C_LONG); final static VarHandle longVHandle = longVLayout.varHandle(long.class, MemoryLayout.PathElement.sequenceElement()); - public static long getLong(MemoryAddress p) { + public static long getLong(MemorySegment p) { return (long)longHandle.get(p); } - public static long getLong(MemoryAddress p, long i) { + public static long getLong(MemorySegment p, long i) { return (long)longVHandle.get(p, i); } - public static void setLong(MemoryAddress p, long v) { + public static void setLong(MemorySegment p, long v) { longHandle.set(p, v); } - public static void setLong(MemoryAddress p, long i, long v) { + public static void setLong(MemorySegment p, long i, long v) { longVHandle.set(p, i, v); } static void copyLoop(long[] src, MemorySegment dst) { - MemoryAddress base = dst.address(); for (int i = 0; i < src.length; i++) { - setLong(base, i, src[i]); + setLong(dst, i, src[i]); } } @@ -48,99 +47,228 @@ public class TestCopies { public static void main(String[] args) { int[] lengths = { //0, memory segment cannot allocate 0, sigh - 1, - 2, - 4, - 8, - 16, - 32, - 64, - 128, - 256,}; + 1}; + //2, 4, 8, 16, 32, 64, 128, 256,}; HashMap results = new HashMap<>(); - int X = 1024 * 1024; + int X = 1024 * 1024 * 10; for (int c = 0; c < 5; c++) { - for (int len: lengths) { - long[] data = new long[len]; + + { long now; + if (true) { + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment arg0 = MemorySegment.allocateNative(8, 8, scope); + MemorySegment arg1 = MemorySegment.allocateNative(8, 8, scope); + MemorySegment arg2 = MemorySegment.allocateNative(8, 8, scope); + MemorySegment arg3 = MemorySegment.allocateNative(8, 8, scope); + } + } + long t = System.nanoTime() - now; + System.out.printf(" %12.9f 4x8 scope\n", t * 1E-9); + results.compute(String.format("4x8 scope"), (k, v) -> v == null ? t : min(v, t)); + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment arg0 = MemorySegment.allocateNative(8, 8, scope); + MemorySegment arg1 = MemorySegment.allocateNative(8, 8, scope); + } + } + long n = System.nanoTime() - now; + System.out.printf(" %12.9f 2x8 scope\n", n * 1E-9); + results.compute(String.format("2x8 scope"), (k, v) -> v == null ? n : min(v, n)); + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment arg0 = MemorySegment.allocateNative(CLinker.C_POINTER, scope); + MemorySegment arg1 = MemorySegment.allocateNative(CLinker.C_POINTER, scope); + MemorySegment arg2 = MemorySegment.allocateNative(CLinker.C_POINTER, scope); + MemorySegment arg3 = MemorySegment.allocateNative(CLinker.C_POINTER, scope); + } + } + long u = System.nanoTime() - now; + System.out.printf(" %12.9f 4xC_POINTER\n", u * 1E-9); + results.compute(String.format("4xC_POINTER"), (k, v) -> v == null ? u : min(v, u)); + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment arg0 = MemorySegment.allocateNative(CLinker.C_POINTER, scope); + MemorySegment arg1 = MemorySegment.allocateNative(CLinker.C_POINTER, scope); + } + } + long m = System.nanoTime() - now; + System.out.printf(" %12.9f 2xC_POINTER\n", m * 1E-9); + results.compute(String.format("2xC_POINTER"), (k, v) -> v == null ? m : min(v, m)); + } now = System.nanoTime(); for (int l = 0; l < X; l++) { - try ( ResourceScope scope = ResourceScope.newConfinedScope()) { - MemorySegment seg = MemorySegment.allocateNative(data.length * 8, scope); - copyLoop(data, seg); + try (Allocator a = api.Memory.stack()) { + MemorySegment arg0 = a.allocs(8); + MemorySegment arg1 = a.allocs(8); + MemorySegment arg2 = a.allocs(8); + MemorySegment arg3 = a.allocs(8); } } - long t = System.nanoTime() - now; - System.out.printf(" %12.9f %3d copyLoop\n", t * 1E-9, len); - results.compute(String.format("%3d copyLoop", len), (k, v) -> v == null ? t : min(v, t)); + long s = System.nanoTime() - now; + System.out.printf(" %12.9f 4x8 stack\n", s * 1E-9); + results.compute(String.format("4x8 stack"), (k, v) -> v == null ? s : min(v, s)); now = System.nanoTime(); for (int l = 0; l < X; l++) { - try ( ResourceScope scope = ResourceScope.newConfinedScope()) { - MemorySegment seg = MemorySegment.allocateNative(data.length * 8, scope); - copyBulk(data, seg); + try (Allocator a = api.Memory.stack()) { + MemorySegment arg0 = a.allocs(8); + MemorySegment arg1 = a.allocs(8); } } - long s = System.nanoTime() - now; - System.out.printf(" %12.9f %3d copyBulk\n", s * 1E-9, len); - results.compute(String.format("%3d copyBulk", len), (k, v) -> v == null ? s : min(v, s)); + long h = System.nanoTime() - now; + System.out.printf(" %12.9f 2x8 stack\n", h * 1E-9); + results.compute(String.format("2x8 stack"), (k, v) -> v == null ? h : min(v, h)); + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack2()) { + MemorySegment arg0 = a.allocs(8); + MemorySegment arg1 = a.allocs(8); + MemorySegment arg2 = a.allocs(8); + MemorySegment arg3 = a.allocs(8); + } + } + long r = System.nanoTime() - now; + System.out.printf(" %12.9f 4x8 stack2\n", r * 1E-9); + results.compute(String.format("4x8 stack2"), (k, v) -> v == null ? r : min(v, r)); + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack2()) { + MemorySegment arg0 = a.allocs(8); + MemorySegment arg1 = a.allocs(8); + } + } + long p = System.nanoTime() - now; + System.out.printf(" %12.9f 2x8 stack2\n", p * 1E-9); + results.compute(String.format("2x8 stack2"), (k, v) -> v == null ? p : min(v, p)); + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack3()) { + MemorySegment arg0 = a.allocs(8); + MemorySegment arg1 = a.allocs(8); + } + } + long g = System.nanoTime() - now; + System.out.printf(" %12.9f 2x8 stack3\n", g * 1E-9); + results.compute(String.format("2x8 stack3"), (k, v) -> v == null ? g : min(v, g)); + + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack3()) { + MemorySegment arg0 = a.allocs(8); + MemorySegment arg1 = a.allocs(8); + MemorySegment arg2 = a.allocs(8); + MemorySegment arg3 = a.allocs(8); + } + } + long q = System.nanoTime() - now; + System.out.printf(" %12.9f 4x8 stack3\n", q * 1E-9); + results.compute(String.format("4x8 stack3"), (k, v) -> v == null ? q : min(v, q)); + } for (int len: lengths) { long[] data = new long[len]; long now; - now = System.nanoTime(); - try ( ResourceScope scope = ResourceScope.newConfinedScope()) { - MemorySegment seg = MemorySegment.allocateNative(data.length * 8, scope); + if (false) { + now = System.nanoTime(); for (int l = 0; l < X; l++) { - copyLoop(data, seg); + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment seg = MemorySegment.allocateNative(data.length * 8, scope); + copyLoop(data, seg); + } } + long t = System.nanoTime() - now; + System.out.printf(" %12.9f %3d copyLoop\n", t * 1E-9, len); + results.compute(String.format("%3d copyLoop", len), (k, v) -> v == null ? t : min(v, t)); } - long t = System.nanoTime() - now; - System.out.printf(" %12.9f %3d copyLoop pre-alloc\n", t * 1E-9, len); - results.compute(String.format("%3d copyLoop pre-alloc", len), (k, v) -> v == null ? t : min(v, t)); - now = System.nanoTime(); - try ( ResourceScope scope = ResourceScope.newConfinedScope()) { - MemorySegment seg = MemorySegment.allocateNative(data.length * 8, scope); + if (false) { + now = System.nanoTime(); for (int l = 0; l < X; l++) { - copyBulk(data, seg); + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment seg = MemorySegment.allocateNative(data.length * 8, scope); + //copyBulk(data, seg); + } } + long s = System.nanoTime() - now; + System.out.printf(" %12.9f %3d copyBulk\n", s * 1E-9, len); + results.compute(String.format("%3d copyBulk", len), (k, v) -> v == null ? s : min(v, s)); } - long s = System.nanoTime() - now; - System.out.printf(" %12.9f %3d copyBulk pre-alloc\n", s * 1E-9, len); - results.compute(String.format("%3d copyBulk pre-alloc", len), (k, v) -> v == null ? s : min(v, s)); } + if (false) + for (int len: lengths) { + long[] data = new long[len]; + long now; + + now = System.nanoTime(); + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment seg = MemorySegment.allocateNative(data.length * 8, scope); + for (int l = 0; l < X; l++) { + copyLoop(data, seg); + } + } + long t = System.nanoTime() - now; + System.out.printf(" %12.9f %3d copyLoop pre-alloc\n", t * 1E-9, len); + results.compute(String.format("%3d copyLoop pre-alloc", len), (k, v) -> v == null ? t : min(v, t)); + + now = System.nanoTime(); + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment seg = MemorySegment.allocateNative(data.length * 8, scope); + for (int l = 0; l < X; l++) { + copyBulk(data, seg); + } + } + long s = System.nanoTime() - now; + System.out.printf(" %12.9f %3d copyBulk pre-alloc\n", s * 1E-9, len); + results.compute(String.format("%3d copyBulk pre-alloc", len), (k, v) -> v == null ? s : min(v, s)); + } + // if have stack allocator: for (int len: lengths) { long[] data = new long[len]; long now; - now = System.nanoTime(); - for (int l = 0; l < X; l++) { - try ( Allocator a = api.Memory.stack()) { - MemorySegment seg = a.allocs(data.length * 8); - copyLoop(data, seg); + if (false) { + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack()) { + MemorySegment seg = a.allocs(data.length * 8); + copyLoop(data, seg); + } } + long t = System.nanoTime() - now; + System.out.printf(" %12.9f %3d copyLoop stack\n", t * 1E-9, len); + results.compute(String.format("%3d copyLoop stack", len), (k, v) -> v == null ? t : min(v, t)); } - long t = System.nanoTime() - now; - System.out.printf(" %12.9f %3d copyLoop stack\n", t * 1E-9, len); - results.compute(String.format("%3d copyLoop stack", len), (k, v) -> v == null ? t : min(v, t)); - now = System.nanoTime(); - for (int l = 0; l < X; l++) { - try ( Allocator a = api.Memory.stack()) { - MemorySegment seg = a.allocs(data.length * 8); - copyBulk(data, seg); + if (false) { + now = System.nanoTime(); + for (int l = 0; l < X; l++) { + try (Allocator a = api.Memory.stack()) { + MemorySegment seg = a.allocs(data.length * 8); + //copyBulk(data, seg); + } } + long s = System.nanoTime() - now; + System.out.printf(" %12.9f %3d copyBulk stack\n", s * 1E-9, len); + results.compute(String.format("%3d copyBulk stack", len), (k, v) -> v == null ? s : min(v, s)); } - long s = System.nanoTime() - now; - System.out.printf(" %12.9f %3d copyBulk stack\n", s * 1E-9, len); - results.compute(String.format("%3d copyBulk stack", len), (k, v) -> v == null ? s : min(v, s)); } System.out.println(); diff --git a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemory.java b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemory.java index 9d5c5a6..276c1fe 100644 --- a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemory.java +++ b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemory.java @@ -7,6 +7,7 @@ import api.Native; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.FloatBuffer; +import jdk.incubator.foreign.MemoryAccess; import jdk.incubator.foreign.MemoryAddress; import jdk.incubator.foreign.MemorySegment; import jdk.incubator.foreign.ResourceScope; @@ -20,10 +21,9 @@ public class TestMemory { int len = (int)(seg.byteSize() >>> 2); float sum = 0; - MemoryAddress add = seg.address(); for (int i = 0; i < len; i++) - sum += Native.getFloat(add, i); - Native.setFloat(add, sum); + sum += MemoryAccess.getFloatAtIndex(seg, i); + MemoryAccess.setFloat(seg, sum); } static void check(float[] seg) { diff --git a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemoryLong.java b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemoryLong.java index 5193abb..bc8581e 100644 --- a/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemoryLong.java +++ b/src/notzed.zcl.demo/classes/au/notzed/zcl/test/TestMemoryLong.java @@ -3,11 +3,10 @@ */ package au.notzed.zcl.test; -import api.Native; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.LongBuffer; -import jdk.incubator.foreign.MemoryAddress; +import jdk.incubator.foreign.MemoryAccess; import jdk.incubator.foreign.MemorySegment; import jdk.incubator.foreign.ResourceScope; @@ -20,10 +19,9 @@ public class TestMemoryLong { int len = (int)(seg.byteSize() >>> 3); long sum = 0; - MemoryAddress add = seg.address(); for (int i = 0; i < len; i++) - sum += Native.getLong(add, i); - Native.setLong(add, sum); + sum += MemoryAccess.getLongAtIndex(seg, i); + MemoryAccess.setLong(seg, sum); } static void check(long[] seg) { diff --git a/src/notzed.zcl/classes/api/Memory.java b/src/notzed.zcl/classes/api/Memory.java index 68080e6..86dbe17 100644 --- a/src/notzed.zcl/classes/api/Memory.java +++ b/src/notzed.zcl/classes/api/Memory.java @@ -17,9 +17,9 @@ package api; import jdk.incubator.foreign.*; -import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandle; import java.lang.invoke.MethodType; +import java.lang.ref.Cleaner; /** * A utility library for memory operations including a stack allocator. @@ -219,4 +219,136 @@ public class Memory { throw new RuntimeException(t); } } + + interface ThreadAllocator { + + Allocator create(); + } + // segment based stack without using malloc, locked to single thread + private static final ThreadLocal stacks2 = ThreadLocal.withInitial(() -> new ThreadAllocator() { + private final ResourceScope scope = ResourceScope.newConfinedScope(); + private final MemorySegment root = MemorySegment.allocateNative(4096, 4096, scope); + private MemorySegment base = root; + + //{ + // System.out.printf("0x%016x init stack\n", root.address().toRawLongValue()); + //} + @Override + public Allocator create() { + MemorySegment here = base; + + //System.out.printf("0x%016x new frame\n", here.address().toRawLongValue()); + return new Allocator() { + @Override + public void close() { + base = here; + //System.out.printf("0x%016x close frame\n", here.address().toRawLongValue()); + } + + @Override + public MemoryAddress alloca(long size) { + return allocs(size).address(); + } + + @Override + public MemorySegment allocs(long size) { + long alloc = (size + 7) & ~7; + MemorySegment seg = base.asSlice(0, size); + + //System.out.printf("0x%016x alloc %d\n", base.address().toRawLongValue(), size); + base = base.asSlice(alloc); + return seg; + } + }; + } + }); + + public static Allocator stack2() { + return stacks2.get().create(); + } + + public static Allocator stack3() { + return new Allocator() { + private final ResourceScope scope = ResourceScope.newConfinedScope(); + private final MemorySegment root = MemorySegment.allocateNative(4096, 4096, scope); + private MemorySegment base = root; + + @Override + public void close() { + scope.close(); + } + + @Override + public MemoryAddress alloca(long size) { + return allocs(size).address(); + } + + @Override + public MemorySegment allocs(long size) { + long alloc = (size + 7) & ~7; + MemorySegment seg = base.asSlice(0, size); + + //System.out.printf("0x%016x alloc %d\n", base.address().toRawLongValue(), size); + base = base.asSlice(alloc); + return seg; + } + }; + + } + + static class Stack4 { + + private final MemorySegment stack; + private long sp; + private Thread thread = Thread.currentThread(); + + Stack4(ResourceScope scope) { + stack = MemorySegment.allocateNative(4096, 4096, scope); + sp = 4096; + } + + Frame createFrame() { + + return new Frame() { + private final long tos = sp; + private Thread self = thread; + private ResourceScope scope; + + @Override + public MemorySegment allocate(long size, long alignment) { + if (self != Thread.currentThread()) + throw new IllegalStateException(); + if (alignment != Long.highestOneBit(alignment)) + throw new IllegalArgumentException(); + if (sp >= size) { + sp = (sp - size) & ~(alignment - 1); + return stack.asSlice(sp, size); + } else { + if (scope == null) + scope = ResourceScope.newConfinedScope(); + return MemorySegment.allocateNative(size, alignment, scope); + } + } + + @Override + public void close() { + sp = tos; + self = null; + if (scope != null) { + scope.close(); + scope = null; + } + } + }; + } + + } + + static final ResourceScope scope4 = ResourceScope.newSharedScope(Cleaner.create()); + private static final ThreadLocal stacks4 = ThreadLocal.withInitial(() -> new Stack4(scope4)); + + public static Frame createFrame() { + return stacks4.get().createFrame(); + } + } diff --git a/src/notzed.zcl/classes/api/Native.java b/src/notzed.zcl/classes/api/Native.java index 6d81f00..68dde60 100644 --- a/src/notzed.zcl/classes/api/Native.java +++ b/src/notzed.zcl/classes/api/Native.java @@ -96,151 +96,8 @@ public class Native { public static MemoryAddress addr(MemorySegment o) { return o != null ? o.address() : MemoryAddress.NULL; } - - public static byte getByte(MemoryAddress p) { - return (byte)byteHandle.get(p); - } - - public static byte getByte(MemoryAddress p, long i) { - return (byte)byteVHandle.get(p, i); - } - - public static void setByte(MemoryAddress p, byte v) { - byteHandle.set(p, v); - } - - public static void setByte(MemoryAddress p, long i, byte v) { - byteVHandle.set(p, i, v); - } - - public static int getInt(MemoryAddress p) { - return (int)intHandle.get(p); - } - - public static int getInt(MemoryAddress p, long i) { - return (int)intVHandle.get(p, i); - } - - public static void setInt(MemoryAddress p, int v) { - intHandle.set(p, v); - } - - public static void setInt(MemoryAddress p, long i, int v) { - intVHandle.set(p, i, v); - } - - public static short getShort(MemoryAddress p) { - return (short)shortHandle.get(p); - } - - public static void setShort(MemoryAddress p, short v) { - shortHandle.set(p, v); - } - - public static void setShort(MemoryAddress p, long i, short v) { - shortVHandle.set(p, i, v); - } - - public static long getLong(MemoryAddress p) { - return (long)longHandle.get(p); - } - - public static long getLong(MemoryAddress p, long i) { - return (long)longVHandle.get(p, i); - } - - public static void setLong(MemoryAddress p, long v) { - longHandle.set(p, v); - } - - public static void setLong(MemoryAddress p, long i, long v) { - longVHandle.set(p, i, v); - } - - public static float getFloat(MemoryAddress p) { - return (float)floatHandle.get(p); - } - - public static float getFloat(MemoryAddress p, long i) { - return (float)floatVHandle.get(p, i); - } - - public static void setFloat(MemoryAddress p, float v) { - floatHandle.set(p, v); - } - - public static void setFloat(MemoryAddress p, long i, float v) { - floatVHandle.set(p, i, v); - } - - public static double getDouble(MemoryAddress p) { - return (int)doubleHandle.get(p); - } - - public static double getDouble(MemoryAddress p, long i) { - return (int)doubleVHandle.get(p, i); - } - - public static void setDouble(MemoryAddress p, double v) { - doubleHandle.set(p, v); - } - - public static void setDouble(MemoryAddress p, long i, double v) { - doubleVHandle.set(p, i, v); - } - - public static MemoryAddress getAddr(MemoryAddress p) { - return (MemoryAddress)addrHandle.get(p); - } - - public static MemoryAddress getAddr(MemoryAddress p, long i) { - return (MemoryAddress)addrVHandle.get(p, i); - } - - public static void setAddr(MemoryAddress p, MemoryAddress v) { - addrHandle.set(p, v); - } - - public static void setAddr(MemoryAddress p, long i, MemoryAddress v) { - addrVHandle.set(p, i, v); - } - + /* helpers - java to native */ - public static MemoryAddress toAddrV(Allocator frame, T[] array, int len) { - MemoryAddress list = frame.alloca(8 * len); - - for (int i = 0; i < len; i++) - setAddr(list, i, array[i].addr()); - - return list; - } - - public static MemoryAddress toAddrV(Allocator frame, T[] array) { - return toAddrV(frame, array, array.length); - } - - public static MemoryAddress toAddrV(Allocator frame, String[] array) { - if (array != null) { - MemoryAddress list = frame.alloca(8 * array.length); - - for (int i = 0; i < array.length; i++) - setAddr(list, i, toByteV(frame, array[i])); - - return list; - } else { - return MemoryAddress.NULL; - } - } - - public static MemoryAddress toLongV(Allocator frame, long[] array) { - MemoryAddress list = frame.alloca(8 * array.length); - - for (int i = 0; i < array.length; i++) - setLong(list, i, array[i]); - - return list; - } - public static MemorySegment allocAddrV(ResourceScope frame, int len) { return MemorySegment.allocateNative(CLinker.C_POINTER.byteSize() * len, CLinker.C_POINTER.byteAlignment(), frame); } @@ -278,34 +135,6 @@ public class Native { } return list; } - - public static MemoryAddress toByteV(Allocator frame, byte[] data) { - if (data != null) { - MemoryAddress list = frame.alloca(data.length); - - for (int i = 0; i < data.length; i++) - setByte(list, i, data[i]); - - return list; - } else { - return MemoryAddress.NULL; - } - } - - public static MemoryAddress toByteV(Allocator frame, String string) { - if (string != null) { - byte[] data = string.getBytes(); - MemoryAddress list = frame.alloca(data.length + 1); - - for (int i = 0; i < data.length; i++) - setByte(list, i, data[i]); - setByte(list, data.length, (byte)0); - - return list; - } else { - return MemoryAddress.NULL; - } - } /* helpers - native to java */ public static T[] toObjectV(MemorySegment list, T[] array, Function create) { diff --git a/src/notzed.zcl/classes/au/notzed/zcl/CLBuffer.java b/src/notzed.zcl/classes/au/notzed/zcl/CLBuffer.java index bd580d5..f2c339f 100644 --- a/src/notzed.zcl/classes/au/notzed/zcl/CLBuffer.java +++ b/src/notzed.zcl/classes/au/notzed/zcl/CLBuffer.java @@ -66,14 +66,13 @@ public class CLBuffer extends CLMemory { requireAPIVersion(CLPlatform.VERSION_1_1); try (Allocator frame = Memory.stack()) { - MemoryAddress pres = frame.alloca(8); - MemoryAddress pinfo = info.toNative(frame); + MemorySegment pres = frame.allocs(8); + MemorySegment pinfo = info.toNative(frame); MemoryAddress b; int res; - b = clCreateSubBuffer(addr(), flags, CL_BUFFER_CREATE_TYPE_REGION, pinfo, pres); - res = getInt(pres); - if (res != 0) + b = clCreateSubBuffer(addr(), flags, CL_BUFFER_CREATE_TYPE_REGION, pinfo.address(), pres.address()); + if ((res = MemoryAccess.getInt(pres)) != 0) throw new CLRuntimeException(res); return Native.resolve(b, (c) -> new CLBuffer(c, getObjectPlatform())); diff --git a/src/notzed.zcl/classes/au/notzed/zcl/CLBufferInfo.java b/src/notzed.zcl/classes/au/notzed/zcl/CLBufferInfo.java index 82a4764..2fa4bf8 100644 --- a/src/notzed.zcl/classes/au/notzed/zcl/CLBufferInfo.java +++ b/src/notzed.zcl/classes/au/notzed/zcl/CLBufferInfo.java @@ -17,8 +17,8 @@ package au.notzed.zcl; import api.Allocator; -import api.Native; -import jdk.incubator.foreign.MemoryAddress; +import jdk.incubator.foreign.MemoryAccess; +import jdk.incubator.foreign.MemorySegment; /** * Parameters for Buffer.createSubBuffer() @@ -38,15 +38,15 @@ public abstract class CLBufferInfo { this.size = size; } - MemoryAddress toNative(Allocator frame) { - MemoryAddress addr = frame.alloca(2*8); // FIXME: size_t + MemorySegment toNative(Allocator frame) { + MemorySegment addr = frame.allocs(2*8); // FIXME: size_t - Native.setLong(addr, origin); - Native.setLong(addr, 1, size); + MemoryAccess.setLong(addr, origin); + MemoryAccess.setLongAtIndex(addr, 1, size); return addr; } } - abstract MemoryAddress toNative(Allocator frame); + abstract MemorySegment toNative(Allocator frame); } diff --git a/src/notzed.zcl/classes/au/notzed/zcl/CLCommandQueue.java b/src/notzed.zcl/classes/au/notzed/zcl/CLCommandQueue.java index 26464dd..a6ea924 100644 --- a/src/notzed.zcl/classes/au/notzed/zcl/CLCommandQueue.java +++ b/src/notzed.zcl/classes/au/notzed/zcl/CLCommandQueue.java @@ -723,11 +723,8 @@ public class CLCommandQueue extends CLObject { CLEventList event) throws CLException { try (ResourceScope scope = ResourceScope.newConfinedScope()) { MemorySegment seg = MemorySegment.allocateNative(pattern.length, scope); - MemoryAddress add = seg.address(); - - for (int i = 0; i < pattern.length; i++) - setByte(add, i, pattern[i]); + seg.copyFrom(MemorySegment.ofArray(pattern)); enqueueFillBuffer(buffer, seg, offset, size, wait, event); } } @@ -752,11 +749,8 @@ public class CLCommandQueue extends CLObject { CLEventList event) throws CLException { try (ResourceScope scope = ResourceScope.newConfinedScope()) { MemorySegment seg = MemorySegment.allocateNative(pattern.length * 2, scope); - MemoryAddress add = seg.address(); - - for (int i = 0; i < pattern.length; i++) - setShort(add, i, pattern[i]); + seg.copyFrom(MemorySegment.ofArray(pattern)); enqueueFillBuffer(buffer, seg, offset * 2, size * 2, wait, event); } } @@ -781,11 +775,8 @@ public class CLCommandQueue extends CLObject { CLEventList event) throws CLException { try (ResourceScope scope = ResourceScope.newConfinedScope()) { MemorySegment seg = MemorySegment.allocateNative(pattern.length * 4, scope); - MemoryAddress add = seg.address(); - - for (int i = 0; i < pattern.length; i++) - setInt(add, i, pattern[i]); + seg.copyFrom(MemorySegment.ofArray(pattern)); enqueueFillBuffer(buffer, seg, offset * 4, size * 4, wait, event); } } @@ -829,11 +820,8 @@ public class CLCommandQueue extends CLObject { CLEventList event) throws CLException { try (ResourceScope scope = ResourceScope.newConfinedScope()) { MemorySegment seg = MemorySegment.allocateNative(pattern.length * 4, scope); - MemoryAddress add = seg.address(); - - for (int i = 0; i < pattern.length; i++) - setFloat(add, i, pattern[i]); + seg.copyFrom(MemorySegment.ofArray(pattern)); enqueueFillBuffer(buffer, seg, offset * 4, size * 4, wait, event); } } @@ -845,17 +833,24 @@ public class CLCommandQueue extends CLObject { * @param pattern pattern to fill * @param offset offset in multiples of the pattern size. * @param size number of elements in multiples of the pattern size. - * @param waiters - * @param events + * @param wait + * @param event * @since OpenCL 1.2 * @throws CLException */ - public native void enqueueFillBuffer(CLBuffer buffer, + public void enqueueFillBuffer(CLBuffer buffer, double[] pattern, long offset, long size, - CLEventList waiters, - CLEventList events) throws CLException; + CLEventList wait, + CLEventList event) throws CLException { + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment seg = MemorySegment.allocateNative(pattern.length * 4, scope); + + seg.copyFrom(MemorySegment.ofArray(pattern)); + enqueueFillBuffer(buffer, seg, offset * 4, size * 4, wait, event); + } + } public void enqueueCopyBuffer(CLBuffer srcmem, CLBuffer dstmem, long srcoffset, long dstoffset, long size, CLEventList wait, @@ -1367,8 +1362,7 @@ public class CLCommandQueue extends CLObject { flags, offset, size, info.nwait, info.wait, info.event, pres.address()); - res = MemoryAccess.getInt(pres); - if (res != 0) + if ((res = MemoryAccess.getInt(pres)) != 0) throw new CLException(res); return Memory.ofNative(cmap, size).asByteBuffer(); @@ -1412,8 +1406,7 @@ public class CLCommandQueue extends CLObject { cstride.address(), cslice.address(), info.nwait, info.wait, info.event, pres.address()); - res = MemoryAccess.getInt(pres); - if (res != 0) + if ((res = MemoryAccess.getInt(pres)) != 0) throw new CLException(res); stride = MemoryAccess.getLong(cstride); @@ -1584,18 +1577,27 @@ public class CLCommandQueue extends CLObject { for (Object a: args) { if (a instanceof CLMemory) { MemoryAccess.setAddressAtIndex(memstage, nmem, ((CLMemory)a).addr()); - MemoryAccess.setAddressAtIndex(memptrs, nmem, memstage.asSlice(nmem * 8)); + MemoryAccess.setAddressAtIndex(memptrs, nmem, memstage.asSlice(nmem * CLinker.C_POINTER.byteSize())); nmem++; } } Callback call = Native.resolve( Call_pv_v.stub((MemoryAddress memargs) -> { + int tmem = 0; int xmem = 0; + // for-fucks-sake + for (int i = 0; i < args.length; i++) { + if (args[i] instanceof CLMemory) + tmem++; + } + + MemorySegment seg = memargs.asSegment(tmem * CLinker.C_POINTER.byteSize(), ResourceScope.globalScope()); + for (int i = 0; i < args.length; i++) { if (args[i] instanceof CLMemory) { - MemoryAddress mem = getAddr(memargs, xmem); + MemoryAddress mem = MemoryAccess.getAddressAtIndex(seg, xmem); long size = ((CLMemory)args[i]).getSize(); save[i] = Memory.ofNative(mem, size).asByteBuffer().order(ByteOrder.nativeOrder()); @@ -1607,7 +1609,7 @@ public class CLCommandQueue extends CLObject { }), (p) -> new Callback<>(p, kernel)); - res = clEnqueueNativeKernel(addr(), call.addr(), memstage.address(), nmem * 8, nmem, memstage.address(), memptrs.address(), info.nwait, info.wait, info.event); + res = clEnqueueNativeKernel(addr(), call.addr(), memstage.address(), nmem * CLinker.C_POINTER.byteSize(), nmem, memstage.address(), memptrs.address(), info.nwait, info.wait, info.event); if (res != 0) throw new CLException(res); diff --git a/src/notzed.zcl/classes/au/notzed/zcl/CLContext.java b/src/notzed.zcl/classes/au/notzed/zcl/CLContext.java index f1b84f4..3035dcb 100644 --- a/src/notzed.zcl/classes/au/notzed/zcl/CLContext.java +++ b/src/notzed.zcl/classes/au/notzed/zcl/CLContext.java @@ -449,32 +449,31 @@ public class CLContext extends CLObject { */ public CLImage createImage(long flags, CLImageFormat fmt, CLImageDesc desc, MemorySegment hostseg) throws CLRuntimeException, UnsupportedOperationException { try (Allocator frame = Memory.stack()) { - MemoryAddress cfmt = CLImageFormat.toNative(frame, fmt); - MemoryAddress cres = frame.alloca(8); + MemorySegment cfmt = CLImageFormat.toNative(frame, fmt); + MemorySegment cres = frame.allocs(8); MemoryAddress ci; int res; // FIXME: perform range checks if (haveAPIVersion(CLPlatform.VERSION_1_2)) { - MemoryAddress cdesc = CLImageDesc.toNative(frame, desc); + MemorySegment cdesc = CLImageDesc.toNative(frame, desc); - ci = clCreateImage(addr(), flags, cfmt, cdesc, addr(hostseg), cres); + ci = clCreateImage(addr(), flags, cfmt.address(), cdesc.address(), addr(hostseg), cres.address()); } else { switch (desc.imageType) { case CL_MEM_OBJECT_IMAGE2D: - ci = clCreateImage2D(addr(), flags, cfmt, desc.imageWidth, desc.imageHeight, desc.imageRowPitch, addr(hostseg), cres); + ci = clCreateImage2D(addr(), flags, cfmt.address(), desc.imageWidth, desc.imageHeight, desc.imageRowPitch, addr(hostseg), cres.address()); break; case CL_MEM_OBJECT_IMAGE3D: - ci = clCreateImage3D(addr(), flags, cfmt, desc.imageWidth, desc.imageHeight, desc.imageDepth, - desc.imageRowPitch, desc.imageSlicePitch, addr(hostseg), cres); + ci = clCreateImage3D(addr(), flags, cfmt.address(), desc.imageWidth, desc.imageHeight, desc.imageDepth, + desc.imageRowPitch, desc.imageSlicePitch, addr(hostseg), cres.address()); break; default: throw new UnsupportedOperationException("Requires OpenCL 1.2"); } } - res = getInt(cres); - if (res != 0) + if ((res = MemoryAccess.getInt(cres)) != 0) throw new CLRuntimeException(res); if (hostseg != null && (flags & CL_MEM_USE_HOST_PTR) != 0) @@ -581,23 +580,23 @@ public class CLContext extends CLObject { */ public CLImageFormat[] getSupportedImageFormats(long flags, int type) throws CLRuntimeException { try (Allocator frame = Memory.stack()) { - MemoryAddress cnum = frame.alloca(8); - MemoryAddress list; + MemorySegment cnum = frame.allocs(8); + MemorySegment list; int num; int res; - res = clGetSupportedImageFormats(addr(), flags, type, 0, MemoryAddress.NULL, cnum); + res = clGetSupportedImageFormats(addr(), flags, type, 0, MemoryAddress.NULL, cnum.address()); if (res != 0) throw new CLRuntimeException(res); - num = getInt(cnum); - list = frame.alloca(num * 8); + num = MemoryAccess.getInt(cnum); + list = frame.allocs(num * 8); - res = clGetSupportedImageFormats(addr(), flags, type, num, list, cnum); + res = clGetSupportedImageFormats(addr(), flags, type, num, list.address(), cnum.address()); CLImageFormat[] out = new CLImageFormat[num]; for (int i = 0; i < out.length; i++) - out[i] = CLImageFormat.fromNative(getAddr(list, i)); + out[i] = CLImageFormat.fromNative(MemoryAccess.getAddressAtIndex(list, i).asSegment(CLImageFormat.layout().byteSize(), ResourceScope.globalScope())); return out; } catch (RuntimeException | Error t) { @@ -650,13 +649,12 @@ public class CLContext extends CLObject { //@Deprecated public CLSampler createSampler(boolean norm, int addr_mode, int filter_mode) throws CLRuntimeException { try (Allocator frame = Memory.stack()) { - MemoryAddress cres = frame.alloca(8); + MemorySegment cres = frame.allocs(8); int res; MemoryAddress cs; - cs = clCreateSampler(addr(), norm ? 1 : 0, addr_mode, filter_mode, cres); - res = getInt(cres); - if (res != 0) + cs = clCreateSampler(addr(), norm ? 1 : 0, addr_mode, filter_mode, cres.address()); + if ((res = MemoryAccess.getInt(cres)) != 0) throw new CLRuntimeException(res); return resolve(cs, (x) -> new CLSampler(x, getObjectPlatform())); @@ -706,7 +704,7 @@ public class CLContext extends CLObject { } static void copy(MemorySegment addr, byte[][] list) { - for (int i = 0, k = 0; i < list.length; i++) { + for (int i = 0, k = 0; i < list.length; i++) { addr.asSlice(k).copyFrom(MemorySegment.ofArray(list[i])); k += list[i].length; } @@ -865,17 +863,15 @@ public class CLContext extends CLObject { public CLProgram createProgramWithBuiltInKernels(CLDevice[] devices, String names) throws CLException, UnsupportedOperationException { requireAPIVersion(CLPlatform.VERSION_1_2); - try (Allocator frame = Memory.stack()) { - MemoryAddress cdevs = toAddrV(frame, devices); - MemoryAddress cnames = toByteV(frame, names); - MemoryAddress cret = frame.alloca(8); + try (ResourceScope frame = ResourceScope.newConfinedScope()) { + MemorySegment cdevs = toAddrV(frame, devices); + MemorySegment cnames = CLinker.toCString(names, frame); + MemorySegment pres = MemorySegment.allocateNative(CLinker.C_INT, frame); MemoryAddress cp; int res; - cp = clCreateProgramWithBuiltInKernels(addr(), devices.length, cdevs, cnames, cret); - - res = getInt(cret); - if (res != 0) + cp = clCreateProgramWithBuiltInKernels(addr(), devices.length, cdevs.address(), cnames.address(), pres.address()); + if ((res = MemoryAccess.getInt(pres)) != 0) throw new CLException(res); return resolve(cp, (x) -> new CLProgram(x, getObjectPlatform())); @@ -902,19 +898,17 @@ public class CLContext extends CLObject { public CLProgram linkProgram(CLDevice[] devices, String options, CLProgram[] programs, CLNotify notify) throws CLException, UnsupportedOperationException { requireAPIVersion(CLPlatform.VERSION_1_2); - try (Allocator frame = Memory.stack(); + try (ResourceScope frame = ResourceScope.newConfinedScope(); Callback> cnotify = CLNotify.call(notify, (x) -> new CLProgram(x, getObjectPlatform()))) { - MemoryAddress cdevs = toAddrV(frame, devices); - MemoryAddress coptions = toByteV(frame, options); - MemoryAddress cprogs = toAddrV(frame, programs); - MemoryAddress cret = frame.alloca(8); + MemorySegment cdevs = toAddrV(frame, devices); + MemorySegment coptions = options != null ? CLinker.toCString(options, frame) : MemorySegment.globalNativeSegment(); + MemorySegment cprogs = toAddrV(frame, programs); + MemorySegment pres = MemorySegment.allocateNative(CLinker.C_INT, frame); MemoryAddress cp; int res; - cp = clLinkProgram(addr(), devices.length, cdevs, coptions, programs.length, cprogs, cnotify.addr(), MemoryAddress.NULL, cret); - - res = getInt(cret); - if (res != 0) + cp = clLinkProgram(addr(), devices.length, cdevs.address(), coptions.address(), programs.length, cprogs.address(), cnotify.addr(), MemoryAddress.NULL, pres.address()); + if ((res = MemoryAccess.getInt(pres)) != 0) throw new CLException(res); return resolve(cp, (x) -> new CLProgram(x, getObjectPlatform())); @@ -1018,13 +1012,12 @@ public class CLContext extends CLObject { int bufobj) { GLext gl = getGLext(); try (Allocator frame = Memory.stack()) { - MemoryAddress cres = frame.alloca(8); + MemorySegment cres = frame.allocs(8); MemoryAddress ce; int res; - ce = gl.clCreateFromGLBuffer(addr(), flags, bufobj, cres); - res = Native.getInt(cres); - if (res != 0) + ce = gl.clCreateFromGLBuffer(addr(), flags, bufobj, cres.address()); + if ((res = MemoryAccess.getInt(cres)) != 0) throw new CLRuntimeException(res); return Native.resolve(ce, (b) -> new CLBuffer(b, getObjectPlatform())); } catch (RuntimeException | Error t) { @@ -1044,13 +1037,12 @@ public class CLContext extends CLObject { int texture) { GLext gl = getGLext(); try (Allocator frame = Memory.stack()) { - MemoryAddress cres = frame.alloca(8); + MemorySegment cres = frame.allocs(8); MemoryAddress ce; int res; - ce = gl.clCreateFromGLTexture(addr(), flags, target, miplevel, texture, cres); - res = Native.getInt(cres); - if (res != 0) + ce = gl.clCreateFromGLTexture(addr(), flags, target, miplevel, texture, cres.address()); + if ((res = MemoryAccess.getInt(cres)) != 0) throw new CLRuntimeException(res); return Native.resolve(ce, (x) -> new CLImage(x, getObjectPlatform())); } catch (RuntimeException | Error t) { @@ -1068,13 +1060,12 @@ public class CLContext extends CLObject { int renderbuffer) { GLext gl = getGLext(); try (Allocator frame = Memory.stack()) { - MemoryAddress cres = frame.alloca(8); + MemorySegment cres = frame.allocs(8); MemoryAddress ce; int res; - ce = gl.clCreateFromGLRenderbuffer(addr(), flags, renderbuffer, cres); - res = Native.getInt(cres); - if (res != 0) + ce = gl.clCreateFromGLRenderbuffer(addr(), flags, renderbuffer, cres.address()); + if ((res = MemoryAccess.getInt(cres)) != 0) throw new CLRuntimeException(res); return Native.resolve(ce, (x) -> new CLImage(x, getObjectPlatform())); } catch (RuntimeException | Error t) { @@ -1112,13 +1103,12 @@ public class CLContext extends CLObject { public CLEvent clCreateEventFromGLsyncKHR(MemoryAddress glsync) { GLext gl = getGLext(); try (Allocator frame = Memory.stack()) { - MemoryAddress cret = frame.alloca(8); + MemorySegment cret = frame.allocs(8); MemoryAddress ce; int res; - ce = gl.clCreateEventFromGLsyncKHR(addr(), glsync, cret); - res = Native.getInt(cret); - if (res != 0) + ce = gl.clCreateEventFromGLsyncKHR(addr(), glsync, cret.address()); + if ((res = MemoryAccess.getInt(cret)) != 0) throw new CLRuntimeException(res); return Native.resolve(ce, CLEvent::new); diff --git a/src/notzed.zcl/classes/au/notzed/zcl/CLImageDesc.java b/src/notzed.zcl/classes/au/notzed/zcl/CLImageDesc.java index f12e299..55b02cc 100644 --- a/src/notzed.zcl/classes/au/notzed/zcl/CLImageDesc.java +++ b/src/notzed.zcl/classes/au/notzed/zcl/CLImageDesc.java @@ -86,25 +86,25 @@ public class CLImageDesc { * This is just hand-rolled for now. I'm not really sure how to approach it * since these are just going to be used temporarily */ - public static MemoryAddress toNative(Allocator frame, CLImageDesc d) { - MemoryAddress addr = frame.alloca(sizeof); + public static MemorySegment toNative(Allocator frame, CLImageDesc d) { + MemorySegment addr = frame.allocs(sizeof); - Native.setInt(addr, d.imageType); - Native.setLong(addr, 1, d.imageWidth); - Native.setLong(addr, 2, d.imageHeight); - Native.setLong(addr, 3, d.imageDepth); - Native.setLong(addr, 4, d.imageArraySize); - Native.setLong(addr, 5, d.imageRowPitch); - Native.setLong(addr, 6, d.imageSlicePitch); - Native.setLong(addr, 7, d.numMipLevels); - Native.setLong(addr, 8, d.numSamples); - Native.setAddr(addr, 9, Native.addr(d.memObject)); + MemoryAccess.setInt(addr, d.imageType); + MemoryAccess.setLongAtIndex(addr, 1, d.imageWidth); + MemoryAccess.setLongAtIndex(addr, 2, d.imageHeight); + MemoryAccess.setLongAtIndex(addr, 3, d.imageDepth); + MemoryAccess.setLongAtIndex(addr, 4, d.imageArraySize); + MemoryAccess.setLongAtIndex(addr, 5, d.imageRowPitch); + MemoryAccess.setLongAtIndex(addr, 6, d.imageSlicePitch); + MemoryAccess.setLongAtIndex(addr, 7, d.numMipLevels); + MemoryAccess.setLongAtIndex(addr, 8, d.numSamples); + MemoryAccess.setAddressAtIndex(addr, 9, d.memObject.addr()); return addr; } - public static CLImageFormat fromNative(MemoryAddress addr) { - return new CLImageFormat(Native.getInt(addr), Native.getInt(addr, 1)); + public static CLImageFormat fromNative(MemorySegment addr) { + return new CLImageFormat(MemoryAccess.getInt(addr), MemoryAccess.getIntAtIndex(addr, 1)); } public static final long sizeof = 72; diff --git a/src/notzed.zcl/classes/au/notzed/zcl/CLImageFormat.java b/src/notzed.zcl/classes/au/notzed/zcl/CLImageFormat.java index e702637..ea47a6c 100644 --- a/src/notzed.zcl/classes/au/notzed/zcl/CLImageFormat.java +++ b/src/notzed.zcl/classes/au/notzed/zcl/CLImageFormat.java @@ -22,14 +22,14 @@ import api.Allocator; /** * Holder for cl_image_format equivalent. - * + *

*

panama notes

- To maintain compatability with the - * previous api this remains as a simple pojo and marshalling is - * done as required. -

-This also means it doesn't have to deal with allocation and deallocation and so on. - + * To maintain compatability with the + * previous api this remains as a simple pojo and marshalling is + * done as required. + *

+ * This also means it doesn't have to deal with allocation and deallocation and so on. + *

*/ public class CLImageFormat { @@ -81,37 +81,37 @@ public class CLImageFormat { if (getClass() != obj.getClass()) { return false; } - final CLImageFormat other = (CLImageFormat) obj; + final CLImageFormat other = (CLImageFormat)obj; return this.channelOrder == other.channelOrder - && this.channelDataType == other.channelDataType; + && this.channelDataType == other.channelDataType; } @Override public String toString() { return String.format("CLImageFormat: %s,%s", - getChannelOrder(), getChannelDataType()); + getChannelOrder(), getChannelDataType()); } /* * This is just hand-rolled for now. I'm not really sure how to approach it * since these are just going to be used temporarily */ - static MemoryAddress toNative(Allocator frame, CLImageFormat fmt) { - MemoryAddress addr = frame.alloca(2*4); + static MemorySegment toNative(Allocator frame, CLImageFormat fmt) { + MemorySegment addr = frame.allocs(2 * 4); - Native.setInt(addr, fmt.channelOrder); - Native.setInt(addr, 1, fmt.channelDataType); + MemoryAccess.setInt(addr, fmt.channelOrder); + MemoryAccess.setIntAtIndex(addr, 1, fmt.channelDataType); return addr; } - static CLImageFormat fromNative(MemoryAddress addr) { - return new CLImageFormat(Native.getInt(addr), Native.getInt(addr, 1)); - } - static CLImageFormat fromNative(MemorySegment addr) { - return fromNative(addr.address()); + return new CLImageFormat(MemoryAccess.getInt(addr), MemoryAccess.getIntAtIndex(addr, 1)); } - static MemoryLayout layout() { return Native.parseStruct("[u32(image_channel_order)u32(image_channel_data_type)]"); } + static final MemoryLayout LAYOUT = Native.parseStruct("[u32(image_channel_order)u32(image_channel_data_type)]"); + + static MemoryLayout layout() { + return LAYOUT; + } } diff --git a/src/notzed.zcl/classes/au/notzed/zcl/CLKernel.java b/src/notzed.zcl/classes/au/notzed/zcl/CLKernel.java index 103e1db..bfdd96b 100644 --- a/src/notzed.zcl/classes/au/notzed/zcl/CLKernel.java +++ b/src/notzed.zcl/classes/au/notzed/zcl/CLKernel.java @@ -57,11 +57,11 @@ public class CLKernel extends CLObject { requireAPIVersion(CLPlatform.VERSION_2_1); try (Allocator a = Memory.stack()) { - MemoryAddress cres = a.alloca(8); - MemoryAddress ck = clCloneKernel(addr(), cres); - int res = getInt(cres); - - if (res != 0) + MemorySegment cres = a.allocs(8); + MemoryAddress ck = clCloneKernel(addr(), cres.address()); + int res; + + if ((res = MemoryAccess.getInt(cres)) != 0) throw new CLRuntimeException(res); return Native.resolve(ck, (x) -> new CLKernel(x, getObjectPlatform())); @@ -74,7 +74,7 @@ public class CLKernel extends CLObject { /** * Calls clGetKernelArg. - * pval is a pointer to the value. + * pval is a pointer to the value. */ private void setKernelArg(int index, long size, MemoryAddress pval) { try { @@ -101,7 +101,6 @@ public class CLKernel extends CLObject { * @param offset Offset in buffer. */ //public native void setArg(int index, long size, Buffer buffer, long offset); - /** * Set the size of a parameter that is of a local scope. * @@ -119,11 +118,11 @@ public class CLKernel extends CLObject { * @param o */ public void setArg(int index, CLObject o) { - try (Allocator frame = Memory.stack()) { - MemoryAddress pval = frame.alloca(8); + try (ResourceScope frame = ResourceScope.newConfinedScope()) { + MemorySegment pval = MemorySegment.allocateNative(CLinker.C_POINTER, frame); - setAddr(pval, o.addr()); - setKernelArg(index, 8, pval); + MemoryAccess.setAddress(pval, o.addr()); + setKernelArg(index, CLinker.C_POINTER.byteSize(), pval.address()); } } @@ -135,11 +134,11 @@ public class CLKernel extends CLObject { * @param val */ public void setArg(int index, byte val) { - try (Allocator frame = Memory.stack()) { - MemoryAddress pval = frame.alloca(8); + try (ResourceScope frame = ResourceScope.newConfinedScope()) { + MemorySegment pval = MemorySegment.allocateNative(1, frame); - setByte(pval, val); - setKernelArg(index, 1, pval); + MemoryAccess.setByte(pval, val); + setKernelArg(index, 1, pval.address()); } } @@ -151,11 +150,11 @@ public class CLKernel extends CLObject { * @param val */ public void setArg(int index, short val) { - try (Allocator frame = Memory.stack()) { - MemoryAddress pval = frame.alloca(8); + try (ResourceScope frame = ResourceScope.newConfinedScope()) { + MemorySegment pval = MemorySegment.allocateNative(CLinker.C_SHORT, frame); - setShort(pval, val); - setKernelArg(index, 2, pval); + MemoryAccess.setShort(pval, val); + setKernelArg(index, CLinker.C_SHORT.byteSize(), pval.address()); } } @@ -167,11 +166,11 @@ public class CLKernel extends CLObject { * @param val */ public void setArg(int index, int val) { - try (Allocator frame = Memory.stack()) { - MemoryAddress pval = frame.alloca(8); + try (ResourceScope frame = ResourceScope.newConfinedScope()) { + MemorySegment pval = MemorySegment.allocateNative(CLinker.C_INT, frame); - setInt(pval, val); - setKernelArg(index, 4, pval); + MemoryAccess.setInt(pval, val); + setKernelArg(index, CLinker.C_INT.byteSize(), pval.address()); } } @@ -183,11 +182,11 @@ public class CLKernel extends CLObject { * @param val */ public void setArg(int index, long val) { - try (Allocator frame = Memory.stack()) { - MemoryAddress pval = frame.alloca(8); + try (ResourceScope frame = ResourceScope.newConfinedScope()) { + MemorySegment pval = MemorySegment.allocateNative(CLinker.C_LONG, frame); - setLong(pval, val); - setKernelArg(index, 8, pval); + MemoryAccess.setLong(pval, val); + setKernelArg(index, CLinker.C_LONG.byteSize(), pval.address()); } } @@ -198,11 +197,11 @@ public class CLKernel extends CLObject { * @param val */ public void setArg(int index, float val) { - try (Allocator frame = Memory.stack()) { - MemoryAddress pval = frame.alloca(8); + try (ResourceScope frame = ResourceScope.newConfinedScope()) { + MemorySegment pval = MemorySegment.allocateNative(CLinker.C_FLOAT, frame); - setFloat(pval, val); - setKernelArg(index, 4, pval); + MemoryAccess.setFloat(pval, val); + setKernelArg(index, CLinker.C_FLOAT.byteSize(), pval.address()); } } @@ -213,15 +212,14 @@ public class CLKernel extends CLObject { * @param val */ public void setArg(int index, double val) { - try (Allocator frame = Memory.stack()) { - MemoryAddress pval = frame.alloca(8); + try (ResourceScope frame = ResourceScope.newConfinedScope()) { + MemorySegment pval = MemorySegment.allocateNative(CLinker.C_DOUBLE, frame); - setDouble(pval, val); - setKernelArg(index, 8, pval); + MemoryAccess.setDouble(pval, val); + setKernelArg(index, CLinker.C_DOUBLE.byteSize(), pval.address()); } } - /** * Set SVM argument. * @@ -232,6 +230,14 @@ public class CLKernel extends CLObject { throw new UnsupportedOperationException("not yet"); } + public void setArg(int index, MemorySegment val) { + try (ResourceScope frame = ResourceScope.newConfinedScope()) { + MemorySegment pval = MemorySegment.allocateNative(val.byteSize(), 16, frame); + + setKernelArg(index, pval.byteSize(), pval.address()); + } + } + /** * Set a multi-element byte argument. This may be used to setArg vector * types. @@ -240,13 +246,7 @@ public class CLKernel extends CLObject { * @param val */ public void setArg(int index, byte... val) { - try (Allocator frame = Memory.stack()) { - MemoryAddress pval = frame.alloca(val.length); - - for (int i=0;i T getInfoJava(int id, MethodHandle getInfo, Function create) { + protected T getInfoJava(int id, MethodHandle getInfo, Function create) { try (ResourceScope a = ResourceScope.newConfinedScope()) { - return create.apply(MemoryAccess.getAddress(getInfo(addr(), id, getInfo, a, 8))); + return create.apply(getInfo(addr(), id, getInfo, a, 8)); } } @@ -275,19 +275,7 @@ public abstract class CLObject extends Native { // clGet*Info includes terminating 0 static String infoToString(MemorySegment seg) { - if (true) { - return CLinker.toJavaString(seg); - } else if (false) { - MemoryAddress valp = seg.address(); - byte[] val = new byte[(int)(seg.byteSize() - 1)]; - - for (int i = 0; i < val.length; i++) - val[i] = getByte(valp, i); - return new String(val); - } else { - byte[] val = seg.toByteArray(); - return new String(val, 0, val.length - 1); - } + return CLinker.toJavaString(seg); } protected String getInfoString(T ctx, int id, MethodHandle getInfo) { diff --git a/src/notzed.zcl/classes/au/notzed/zcl/CLPlatform.java b/src/notzed.zcl/classes/au/notzed/zcl/CLPlatform.java index dec2b75..d2614ef 100644 --- a/src/notzed.zcl/classes/au/notzed/zcl/CLPlatform.java +++ b/src/notzed.zcl/classes/au/notzed/zcl/CLPlatform.java @@ -21,9 +21,6 @@ import static au.notzed.zcl.CLLib.*; import java.util.function.ToDoubleFunction; import java.util.function.Function; import jdk.incubator.foreign.*; -import api.Native; -import api.Memory; -import api.Allocator; import java.lang.invoke.*; /** @@ -216,13 +213,13 @@ public class CLPlatform extends CLObject { * @return MemoryAddress of function entry point, or MemoryAddress.NULL. */ public MemoryAddress clGetExtensionFunctionAddressForPlatform(String name) { - try (Allocator frame = Memory.stack()) { - MemoryAddress cname = toByteV(frame, name); + try (ResourceScope frame = ResourceScope.newConfinedScope()) { + MemorySegment cname = CLinker.toCString(name, frame); if (apiVersion >= VERSION_1_2) { - return CLLib.clGetExtensionFunctionAddressForPlatform(addr(), cname); + return CLLib.clGetExtensionFunctionAddressForPlatform(addr(), cname.address()); } else { - return clGetExtensionFunctionAddress(cname); + return clGetExtensionFunctionAddress(cname.address()); } } catch (RuntimeException | Error t) { throw t; @@ -320,12 +317,4 @@ public class CLPlatform extends CLObject { return x; } } - - public static void main(String[] args) { - - for (CLPlatform p: getPlatforms()) { - System.out.println(p.getName()); - p.getDevices(CL_DEVICE_TYPE_ALL); - } - } } diff --git a/src/notzed.zcl/tests/au/notzed/zcl/CLBufferTest.java b/src/notzed.zcl/tests/au/notzed/zcl/CLBufferTest.java index 5689eac..a2504d4 100644 --- a/src/notzed.zcl/tests/au/notzed/zcl/CLBufferTest.java +++ b/src/notzed.zcl/tests/au/notzed/zcl/CLBufferTest.java @@ -112,10 +112,9 @@ public class CLBufferTest { static boolean equal(ByteBuffer a, MemorySegment b) { boolean same = true; - MemoryAddress d = b.address(); for (long i = 0; same && i < a.capacity(); i++) { - same &= a.get((int)i) == Native.getByte(d, i); + same &= a.get((int)i) == MemoryAccess.getByteAtOffset(b, i); } return same; } @@ -295,9 +294,8 @@ public class CLBufferTest { q.enqueueReadBuffer(b, true, 0, seg.byteSize(), seg, null, null); boolean same = true; - MemoryAddress add = seg.address(); for (int i = 0; same && i < 1024; i++) { - same = Native.getByte(add, i) == data[i % data.length]; + same = MemoryAccess.getByteAtOffset(seg, i) == data[i % data.length]; } assertTrue(same); diff --git a/src/notzed.zcl/tests/au/notzed/zcl/CLEventTest.java b/src/notzed.zcl/tests/au/notzed/zcl/CLEventTest.java index 125e405..2790c14 100644 --- a/src/notzed.zcl/tests/au/notzed/zcl/CLEventTest.java +++ b/src/notzed.zcl/tests/au/notzed/zcl/CLEventTest.java @@ -144,9 +144,9 @@ public class CLEventTest { int countEvent(MemoryAddress x) { int res = -1; try (Allocator a = Memory.stack()) { - MemoryAddress rc = a.alloca(8); - clGetEventInfo(x, CL_EVENT_REFERENCE_COUNT, 4, rc, MemoryAddress.NULL); - res = Native.getInt(rc); + MemorySegment rc = a.allocs(8); + clGetEventInfo(x, CL_EVENT_REFERENCE_COUNT, 4, rc.address(), MemoryAddress.NULL); + res = MemoryAccess.getInt(rc); } catch (Throwable T) { } return res; -- 2.39.2