+/*
+ * Copyright (C) 2022 Michael Zucchi
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation +either version 3 of the License +or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not +see <http://www.gnu.org/licenses/>.
+ */
+package vulkan.test;
+
+import au.notzed.nativez.Memory;
+import java.lang.foreign.MemoryLayout;
+
+/**
+ * Partial mat4 implementation.
+ */
+public class mat4 extends mat {
+
+ public mat4() {
+ super(4, 4);
+ }
+
+ public mat4(float... v) {
+ super(4, 4, v);
+
+ if (v.length != 16)
+ throw new IllegalArgumentException();
+ }
+
+ public static final MemoryLayout LAYOUT = MemoryLayout.sequenceLayout(4 * 4, Memory.FLOAT).withBitAlignment(16 * 8);
+
+ final static float I[] = {
+ 1, 0, 0, 0,
+ 0, 1, 0, 0,
+ 0, 0, 1, 0,
+ 0, 0, 0, 1
+ };
+
+ public static mat4 I() {
+ return new mat4(I.clone());
+ }
+
+ public static mat4 translate(float dx, float dy, float dz) {
+ return new mat4(
+ 1, 0, 0, dx,
+ 0, 1, 0, dy,
+ 0, 0, 1, dz,
+ 0, 0, 0, 1);
+ }
+
+ public static mat4 postTranslate(mat4 M, float dx, float dy, float dz) {
+ float[] a = M.v;
+ float a00 = a[0], a01 = a[1], a02 = a[2], a03 = a[3];
+ float a10 = a[4], a11 = a[5], a12 = a[6], a13 = a[7];
+ float a20 = a[8], a21 = a[9], a22 = a[10], a23 = a[11];
+ float a30 = a[12], a31 = a[13], a32 = a[14], a33 = a[15];
+ // 1 0 0 dx a b c d
+ // 0 1 0 dy x e f g h
+ // 0 0 1 dz i j k l
+ // 0 0 0 1 m n o p
+ return new mat4(
+ a00 + a30 * dx, a01 + a31 * dx, a02 + a32 * dx, a03 + a33 * dx,
+ a10 + a30 * dy, a11 + a31 * dy, a12 + a32 * dy, a13 + a33 * dy,
+ a20 + a30 * dz, a21 + a31 * dz, a22 + a32 * dz, a23 + a33 * dz,
+ a30, a31, a32, a33);
+ }
+
+ public static mat4 preTranslate(mat4 M, float dx, float dy, float dz) {
+ float[] a = M.v;
+ float a00 = a[0], a01 = a[1], a02 = a[2], a03 = a[3];
+ float a10 = a[4], a11 = a[5], a12 = a[6], a13 = a[7];
+ float a20 = a[8], a21 = a[9], a22 = a[10], a23 = a[11];
+ float a30 = a[12], a31 = a[13], a32 = a[14], a33 = a[15];
+ // a b c d 1 0 0 dx
+ // e f g h x 0 1 0 dy
+ // i j k l 0 0 1 dz
+ // m n o p 0 0 0 1
+ return new mat4(
+ a00, a01, a02, a00 * dx + a01 * dy + a02 * dz + a03,
+ a10, a11, a12, a10 * dx + a11 * dy + a12 * dz + a13,
+ a20, a21, a22, a20 * dx + a21 * dy + a22 * dz + a23,
+ a30, a31, a32, a30 * dx + a31 * dy + a32 * dz + a33);
+ }
+
+ /*
+ array-of-struct (aos) version. i.e. packed vec4 array.
+
+ This is inefficient for SIMD.
+
+ a b c d xyzw xyzw ...
+ e f g h
+ i j k l
+ m n o p
+
+ x' = a*x+b*y+c*z+d*w
+ y' = e*x+f*y+g*z+h*w
+ z' = i*x+j*y+k*z+l*w
+ w' = m*x+n*y+o*z+p*w
+ */
+ public static void apply(mat4 A, int n, float[] vec, float[] res) {
+ float a00 = A.v[0];
+ float a01 = A.v[1];
+ float a02 = A.v[2];
+ float a03 = A.v[3];
+ float a10 = A.v[4];
+ float a11 = A.v[5];
+ float a12 = A.v[6];
+ float a13 = A.v[7];
+ float a20 = A.v[8];
+ float a21 = A.v[9];
+ float a22 = A.v[10];
+ float a23 = A.v[11];
+ float a30 = A.v[12];
+ float a31 = A.v[13];
+ float a32 = A.v[14];
+ float a33 = A.v[15];
+
+ for (int i = 0; i < n * 4; i += 4) {
+ float v0 = vec[i + 0];
+ float v1 = vec[i + 1];
+ float v2 = vec[i + 2];
+ float v3 = vec[i + 3];
+ res[i + 0] = a00 * v0 + a01 * v1 + a02 * v2 + a03 * v3;
+ res[i + 1] = a10 * v0 + a11 * v1 + a12 * v2 + a13 * v3;
+ res[i + 2] = a20 * v0 + a21 * v1 + a22 * v2 + a23 * v3;
+ res[i + 3] = a30 * v0 + a31 * v1 + a32 * v2 + a33 * v3;
+ }
+ }
+
+
+ /*
+
+ struct-of-array (soa) version
+
+ a b c d xxxx ...
+ e f g h yyyy ...
+ i j k l zzzz ...
+ m n o p wwww ...
+
+ x' = a*x+b*y+c*z+d*w
+ y' = e*x+f*y+g*z+h*w
+ z' = i*x+j*y+k*z+l*w
+ w' = m*x+n*y+o*z+p*w
+
+ */
+ public static void apply4(mat4 A, int n, float[] vec, int stride, float[] res) {
+ float a00 = A.v[0];
+ float a01 = A.v[1];
+ float a02 = A.v[2];
+ float a03 = A.v[3];
+ float a10 = A.v[4];
+ float a11 = A.v[5];
+ float a12 = A.v[6];
+ float a13 = A.v[7];
+ float a20 = A.v[8];
+ float a21 = A.v[9];
+ float a22 = A.v[10];
+ float a23 = A.v[11];
+ float a30 = A.v[12];
+ float a31 = A.v[13];
+ float a32 = A.v[14];
+ float a33 = A.v[15];
+
+ for (int i = 0; i < n; i += 1) {
+ float v0 = vec[i + stride * 0];
+ float v1 = vec[i + stride * 1];
+ float v2 = vec[i + stride * 2];
+ float v3 = vec[i + stride * 3];
+
+ res[i + stride * 0] = a00 * v0 + a01 * v1 + a02 * v2 + a03 * v3;
+ res[i + stride * 1] = a10 * v0 + a11 * v1 + a12 * v2 + a13 * v3;
+ res[i + stride * 2] = a20 * v0 + a21 * v1 + a22 * v2 + a23 * v3;
+ res[i + stride * 3] = a30 * v0 + a31 * v1 + a32 * v2 + a33 * v3;
+ }
+ }
+
+ /*
+ a b c d x
+ e f g h y
+ i j k l z
+ m n o p w
+
+ x' = a*x+b*y+c*z+d*w
+ y' = e*x+f*y+g*z+h*w
+ z' = i*x+j*y+k*z+l*w
+ w' = m*x+n*y+o*z+p*w
+
+ */
+ public static mat4 mul(mat4 A, mat4 B) {
+ float[] a = A.v;
+ float[] b = B.v;
+ float[] c = new float[16];
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ c[i * 4 + j] = a[i * 4] * b[j] + a[i * 4 + 1] * b[j + 4] + a[i * 4 + 2] * b[j + 8] + a[i * 4 + 3] * b[j + 12];
+ }
+ }
+
+ return new mat4(c);
+ }
+
+ // calculate C' = A x B'
+ // - B is in col-major order
+ // - C is in col-major order
+ public static mat mul_NT_T(mat4 A, mat B) {
+ float[] a = A.v;
+ float[] b = B.v;
+ float[] c = new float[b.length];
+ float a00 = a[0], a01 = a[1], a02 = a[2], a03 = a[3];
+ float a10 = a[4], a11 = a[5], a12 = a[6], a13 = a[7];
+ float a20 = a[8], a21 = a[9], a22 = a[10], a23 = a[11];
+ float a30 = a[12], a31 = a[13], a32 = a[14], a33 = a[15];
+
+ for (int x = 0; x < c.length; x += 4) {
+ float b0 = b[x + 0];
+ float b1 = b[x + 1];
+ float b2 = b[x + 2];
+ float b3 = b[x + 3];
+
+ c[x + 0] = a00 * b0 + a01 * b1 + a02 * b2 + a03 * b3;
+ c[x + 1] = a10 * b0 + a11 * b1 + a12 * b2 + a13 * b3;
+ c[x + 2] = a20 * b0 + a21 * b1 + a22 * b2 + a23 * b3;
+ c[x + 3] = a30 * b0 + a31 * b1 + a32 * b2 + a33 * b3;
+ }
+
+ return new mat(B.m, B.n, c);
+ }
+
+ public static mat4 invert(mat4 A) {
+ // Augmentation is in separate matrix, this is a bit faster
+ // Loop bounds are constant, this is quite a bit faster
+ mat4 m = new mat4(A.v.clone());
+ //mat4 n = new mat4();
+
+ //for (int i = 0; i < 4; i++) {
+ // n.v[i * 4 + i] = 1;
+ //}
+ mat4 n = new mat4(I.clone());
+
+ if (m.v.length < 16 || n.v.length < 16)
+ throw new ArrayIndexOutOfBoundsException();
+
+ //Loop over columns to reduce.
+ for (int col = 0; col < 4; col++) {
+ //Reduce the current column to a single element
+ {
+ int piv = col;
+ float max = Math.abs(m.v[piv * 4 + col]);
+ for (int p = col + 1; p < 4; p++)
+ if (Math.abs(m.v[p * 4 + col]) > max) {
+ piv = p;
+ max = Math.abs(m.v[piv * 4 + col]);
+ }
+
+ if (col != piv) {
+ m.swapRow(col, piv);
+ n.swapRow(col, piv);
+ }
+ }
+
+ //Reduce the current column in every row to zero, excluding elements on
+ //the leading diagonal.
+ for (int row = 0; row < 4; row++) {
+ if (row != col) {
+ float multiple = m.v[row * 4 + col] / m.v[col * 4 + col];
+
+ //Subtract the pivot row from all other rows, to make
+ //column col zero.
+ //m.v[row * 4 + col] = 0;
+ for (int c = col + 1; c < 4; c++)
+ m.v[row * 4 + c] = m.v[row * 4 + c] - m.v[col * 4 + c] * multiple;
+ for (int c = 0; c < 4; c++)
+ n.v[row * 4 + c] = n.v[row * 4 + c] - n.v[col * 4 + c] * multiple;
+ }
+ }
+ }
+
+ //Final pass to make diagonal elements one. Performing this in a final
+ //pass allows us to avoid any significant computations on the left-hand
+ //square matrix, since it is diagonal, and ends up as the identity.
+ for (int row = 0; row < 4; row++) {
+ float mul = 1 / m.v[row * 4 + row];
+
+ for (int col = 0; col < 4; col++) {
+ n.v[row * 4 + col] *= mul;
+ }
+ }
+
+ return n;
+ }
+
+ // from some gl library
+ public static mat4 inverse(mat4 m) {
+ float[] v = m.v;
+ float a00 = v[0], a01 = v[1], a02 = v[2], a03 = v[3],
+ a10 = v[4], a11 = v[5], a12 = v[6], a13 = v[7],
+ a20 = v[8], a21 = v[9], a22 = v[10], a23 = v[11],
+ a30 = v[12], a31 = v[13], a32 = v[14], a33 = v[15],
+ b00 = a00 * a11 - a01 * a10,
+ b01 = a00 * a12 - a02 * a10,
+ b02 = a00 * a13 - a03 * a10,
+ b03 = a01 * a12 - a02 * a11,
+ b04 = a01 * a13 - a03 * a11,
+ b05 = a02 * a13 - a03 * a12,
+ b06 = a20 * a31 - a21 * a30,
+ b07 = a20 * a32 - a22 * a30,
+ b08 = a20 * a33 - a23 * a30,
+ b09 = a21 * a32 - a22 * a31,
+ b10 = a21 * a33 - a23 * a31,
+ b11 = a22 * a33 - a23 * a32,
+ det = b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06;
+
+ return new mat4(
+ (a11 * b11 - a12 * b10 + a13 * b09) / det,
+ (a02 * b10 - a01 * b11 - a03 * b09) / det,
+ (a31 * b05 - a32 * b04 + a33 * b03) / det,
+ (a22 * b04 - a21 * b05 - a23 * b03) / det,
+ (a12 * b08 - a10 * b11 - a13 * b07) / det,
+ (a00 * b11 - a02 * b08 + a03 * b07) / det,
+ (a32 * b02 - a30 * b05 - a33 * b01) / det,
+ (a20 * b05 - a22 * b02 + a23 * b01) / det,
+ (a10 * b10 - a11 * b08 + a13 * b06) / det,
+ (a01 * b08 - a00 * b10 - a03 * b06) / det,
+ (a30 * b04 - a31 * b02 + a33 * b00) / det,
+ (a21 * b02 - a20 * b04 - a23 * b00) / det,
+ (a11 * b07 - a10 * b09 - a12 * b06) / det,
+ (a00 * b09 - a01 * b07 + a02 * b06) / det,
+ (a31 * b01 - a30 * b03 - a32 * b00) / det,
+ (a20 * b03 - a21 * b01 + a22 * b00) / det);
+ }
+
+}