Load some initial examples.
authorNot Zed <notzed@gmail.com>
Wed, 11 Dec 2019 05:38:49 +0000 (16:08 +1030)
committerNot Zed <notzed@gmail.com>
Wed, 11 Dec 2019 05:38:49 +0000 (16:08 +1030)
16 files changed:
README [new file with mode: 0644]
config.make.in [new file with mode: 0644]
cube/Makefile [new file with mode: 0644]
cube/cube-data.h [new file with mode: 0644]
cube/cube.c [new file with mode: 0644]
cube/cube.frag [new file with mode: 0644]
cube/cube.vert [new file with mode: 0644]
mandelbrot-cl/Makefile [new file with mode: 0644]
mandelbrot-cl/mandelbrot.c [new file with mode: 0644]
mandelbrot-cl/mandelbrot.cl [new file with mode: 0644]
mandelbrot-cs/Makefile [new file with mode: 0644]
mandelbrot-cs/mandelbrot.c [new file with mode: 0644]
mandelbrot-cs/mandelbrot.comp [new file with mode: 0644]
util/glmaths.h [new file with mode: 0644]
util/pam.h [new file with mode: 0644]
util/timing.h [new file with mode: 0644]

diff --git a/README b/README
new file mode 100644 (file)
index 0000000..ecb8cb9
--- /dev/null
+++ b/README
@@ -0,0 +1,53 @@
+
+INTRODUCTION
+------------
+
+This is a collection of learning experiments with Vulkan in the C
+programming language.
+
+COMPILING
+---------
+
+Apart from the basic GNU tool suite one needs glslangValidator and
+clsvp.  The former is possibly intalled or available as a package but
+see LINKS for further information.
+
+Copy config.make.in to config.make before compiling.
+
+Each example has it's own Makefile and they must be compiled
+from within their respective directories.
+
+EXAMPLES
+---------
+
+cube/
+
+ A basic render-cube demo from https://vulkan.lunarg.com/sdk/home,
+ converted to plain C.
+
+mandebrot-cs/
+mandelbrot-cl/
+
+  One is a mandelbrot generator using GLSL compute shader, the other
+  uses an OpenCL shader compiled via clspv.  Because of the simplicity
+  of the data buffers the code is identical apart from which shader it
+  loads.
+
+  By far most of the execution time is spent on the CPU side,
+  converting the float4 array to uchar3 for output.
+
+LINKS
+-----
+
+ * https://www.lunarg.com/vulkan-sdk - vulkan sdk.
+ * https://github.com/google/clspv - OpenCL C to SPIR-V compiler.
+
+LICENSES
+--------
+
+Various permissive licenses, see individual source files.
+
+Stuff in util/ is (intended to be) public domain, although i'm
+unsure of some of the glmaths.h.
+
+
diff --git a/config.make.in b/config.make.in
new file mode 100644 (file)
index 0000000..cf81135
--- /dev/null
@@ -0,0 +1,42 @@
+
+# command used to compile .frag, .vert, and .comp files.
+GLSPV=glslangValidator
+# command used to compile .cl files.
+CLSPV=clspv
+
+# On linux, clock_gettime can be used for timing
+CPPFLAGS=-DHAVE_CLOCK_GETTIME -I../util
+
+# compile flags for graphics examples
+present_LDLIBS=-lm -lvulkan -lX11
+present_CFLAGS=-DVK_USE_PLATFORM_XLIB_KHR -O2 -march=native
+
+# compile flags for compute-only examples
+compute_LDLIBS=-lm -lvulkan
+compute_CFLAGS=-O2 -march=native
+
+# Some implicit rules that might need changing based on
+# the configuration above.
+
+# compile a glsl .vert file into a C include file
+%.vert.inc: %.vert
+       echo $<x
+       $(GLSPV) -V -x --vn $$(basename $< .vert)_vs -o $@ $<
+
+# compile a glsl .frag file into a C include file
+%.frag.inc: %.frag
+       glslangValidator -V -x --vn $$(basename $< .frag)_fs -o $@ $<
+
+# compile a glsl .comp file into a C include file
+%.comp.inc: %.comp
+       glslangValidator -V -x --vn $$(basename $< .comp)_cs -o $@ $<
+
+# convert a .cl.spv file into a C include file (uint32_t array)
+%.cl.inc: %.cl.spv
+       echo "const uint32_t $$(basename $< .cl.spv)_cl[] = {" > $@
+       od -A none -X < $< | sed 's/\([0-9a-f]\+\)/0x\1,/g' >> $@
+       echo "};" >> $@
+
+# compile a .cl file into SPIR-V
+%.cl.spv %.cl.csv: %.cl
+       $(CLSPV) -o $*.cl.spv --descriptormap $*.cl.csv $<
diff --git a/cube/Makefile b/cube/Makefile
new file mode 100644 (file)
index 0000000..4e6d243
--- /dev/null
@@ -0,0 +1,12 @@
+
+include ../config.make
+
+LDLIBS=$(present_LDLIBS)
+CFLAGS=$(present_CFLAGS)
+
+all: cube
+cube: cube.o
+cube.o: cube.c cube.vert.inc cube.frag.inc
+
+clean:
+       rm -f *.o cube cube.*.inc
diff --git a/cube/cube-data.h b/cube/cube-data.h
new file mode 100644 (file)
index 0000000..a9626cd
--- /dev/null
@@ -0,0 +1,52 @@
+
+typedef struct Vertex Vertex;
+
+struct Vertex {
+    float posX, posY, posZ, posW;  // Position data
+    float r, g, b, a;              // Color
+};
+
+static const Vertex g_vb_solid_face_colours_Data[] = {
+    // red face
+    {-1, -1, 1, 1.f, 1.f, 0.f, 0.f, 1.f},
+    {-1, 1, 1, 1.f, 1.f, 0.f, 0.f, 1.f},
+    {1, -1, 1, 1.f, 1.f, 0.f, 0.f, 1.f},
+    {1, -1, 1, 1.f, 1.f, 0.f, 0.f, 1.f},
+    {-1, 1, 1, 1.f, 1.f, 0.f, 0.f, 1.f},
+    {1, 1, 1, 1.f, 1.f, 0.f, 0.f, 1.f},
+    // green face
+    {-1, -1, -1, 1.f, 0.f, 1.f, 0.f, 1.f},
+    {1, -1, -1, 1.f, 0.f, 1.f, 0.f, 1.f},
+    {-1, 1, -1, 1.f, 0.f, 1.f, 0.f, 1.f},
+    {-1, 1, -1, 1.f, 0.f, 1.f, 0.f, 1.f},
+    {1, -1, -1, 1.f, 0.f, 1.f, 0.f, 1.f},
+    {1, 1, -1, 1.f, 0.f, 1.f, 0.f, 1.f},
+    // blue face
+    {-1, 1, 1, 1.f, 0.f, 0.f, 1.f, 1.f},
+    {-1, -1, 1, 1.f, 0.f, 0.f, 1.f, 1.f},
+    {-1, 1, -1, 1.f, 0.f, 0.f, 1.f, 1.f},
+    {-1, 1, -1, 1.f, 0.f, 0.f, 1.f, 1.f},
+    {-1, -1, 1, 1.f, 0.f, 0.f, 1.f, 1.f},
+    {-1, -1, -1, 1.f, 0.f, 0.f, 1.f, 1.f},
+    // yellow face
+    {1, 1, 1, 1.f, 1.f, 1.f, 0.f, 1.f},
+    {1, 1, -1, 1.f, 1.f, 1.f, 0.f, 1.f},
+    {1, -1, 1, 1.f, 1.f, 1.f, 0.f, 1.f},
+    {1, -1, 1, 1.f, 1.f, 1.f, 0.f, 1.f},
+    {1, 1, -1, 1.f, 1.f, 1.f, 0.f, 1.f},
+    {1, -1, -1, 1.f, 1.f, 1.f, 0.f, 1.f},
+    // magenta face
+    {1, 1, 1, 1.f, 1.f, 0.f, 1.f, 1.f},
+    {-1, 1, 1, 1.f, 1.f, 0.f, 1.f, 1.f},
+    {1, 1, -1, 1.f, 1.f, 0.f, 1.f, 1.f},
+    {1, 1, -1, 1.f, 1.f, 0.f, 1.f, 1.f},
+    {-1, 1, 1, 1.f, 1.f, 0.f, 1.f, 1.f},
+    {-1, 1, -1, 1.f, 1.f, 0.f, 1.f, 1.f},
+    // cyan face
+    {1, -1, 1, 1.f, 0.f, 1.f, 1.f, 1.f},
+    {1, -1, -1, 1.f, 0.f, 1.f, 1.f, 1.f},
+    {-1, -1, 1, 1.f, 0.f, 1.f, 1.f, 1.f},
+    {-1, -1, 1, 1.f, 0.f, 1.f, 1.f, 1.f},
+    {1, -1, -1, 1.f, 0.f, 1.f, 1.f, 1.f},
+    {-1, -1, -1, 1.f, 0.f, 1.f, 1.f, 1.f},
+};
diff --git a/cube/cube.c b/cube/cube.c
new file mode 100644 (file)
index 0000000..0020d9b
--- /dev/null
@@ -0,0 +1,1181 @@
+/*
+ * Vulkan Samples
+ *
+ * Copyright (C) 2015-2016 Valve Corporation
+ * Copyright (C) 2015-2016 LunarG, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+  This based on the tutorial in the vulkansdk:
+
+  https://vulkan.lunarg.com/sdk/home
+
+  But it has been made stand-alone and converted to C.  C struct
+  initialisers do something - but not much - toward cleaning up the
+  mess.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <vulkan/vulkan.h>
+
+#if defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_XCB_KHR)
+#include <X11/Xutil.h>
+#endif
+
+#include <GL/glu.h>
+#include <math.h>
+#include "glmaths.h"
+
+// These can't be changed
+#define NUM_SAMPLES VK_SAMPLE_COUNT_1_BIT
+#define NUM_DESCRIPTOR_SETS 1
+
+#include "cube.vert.inc"
+#include "cube.frag.inc"
+#include "cube-data.h"
+
+typedef struct mat4x4 {
+       float m[16];
+} mat4x4;
+
+struct state {
+       VkInstance vk;
+
+       VkPhysicalDevice physical;
+       VkPhysicalDeviceMemoryProperties memory_properties;
+       VkPhysicalDeviceFeatures device_features;
+
+       VkCommandPool cmd_pool;
+       VkDescriptorPool desc_pool;
+
+       VkDevice device;
+       VkCommandBuffer cmd;
+       VkSwapchainKHR chain;
+       int device_id;
+       int graphics_queue_index;
+       int present_queue_index;
+
+       VkQueue graphics_queue;
+       VkQueue present_queue;
+
+       int shaderCount;
+       VkShaderModule shader[2];
+
+       VkRenderPass render_pass;
+       VkFramebuffer *framebuffers;
+
+       int width, height;
+       VkSurfaceKHR surface;
+
+       float projection[16];
+       float view[16];
+       float model[16];
+       float clip[16];
+       float mvp[16];
+
+       VkBuffer uniformBuffer;
+       VkDeviceMemory uniformMemory;
+       VkDescriptorBufferInfo uniformInfo;
+
+       VkPipeline pipeline[1];
+
+       VkDescriptorSetLayout desc_layout[NUM_DESCRIPTOR_SETS];
+       VkDescriptorSet desc_set[NUM_DESCRIPTOR_SETS];
+       VkPipelineLayout pipeline_layout;
+
+       VkBuffer vertexBuffer;
+       VkDeviceMemory vertexMemory;
+       VkVertexInputBindingDescription vi_binding;
+       VkVertexInputAttributeDescription vi_attribs[2];
+
+       VkFence drawFence;
+
+       VkSemaphore chainSemaphore;
+       uint32_t chainIndex;
+
+       VkFormat chainImageFormat;
+       uint32_t chainImageCount;
+       VkImage *chainImage;
+       VkImageView *chainImageView;
+
+       VkFormat depthFormat;
+       VkImage depthImage;
+       VkImageView depthView;
+       VkDeviceMemory depthMemory;
+
+#if defined(VK_USE_PLATFORM_XLIB_KHR)
+       Display *display;
+       Window window;
+       Atom wm_delete_window;
+#endif
+};
+
+static void checkFatal(VkResult res, const char *cmd)
+{
+       if (res != VK_SUCCESS) {
+               fprintf(stderr, "%s: %d\n", cmd, res);
+               exit(EXIT_FAILURE);
+       }
+}
+
+static int find_memory_type(VkPhysicalDeviceMemoryProperties *memory, uint32_t typeMask, VkMemoryPropertyFlagBits query)
+{
+       for (int i = 0; i < memory->memoryTypeCount; i++) {
+               if (((1 << i) & typeMask) && (memory->memoryTypes[i].propertyFlags & query) == query)
+                       return i;
+       }
+       return -1;
+}
+
+void execute_begin_command_buffer(struct state *g)
+{
+       /* DEPENDS on init_command_buffer() */
+       VkResult res;
+
+       VkCommandBufferBeginInfo cmd_buf_info = {};
+       cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+       cmd_buf_info.pNext = NULL;
+       cmd_buf_info.flags = 0;
+       cmd_buf_info.pInheritanceInfo = NULL;
+
+       res = vkBeginCommandBuffer(g->cmd, &cmd_buf_info);
+}
+
+void execute_end_command_buffer(struct state *g)
+{
+       VkResult res;
+
+       res = vkEndCommandBuffer(g->cmd);
+}
+
+#define FENCE_TIMEOUT 100000000
+
+void execute_queue_command_buffer(struct state *g)
+{
+       VkResult res;
+
+       /* Queue the command buffer for execution */
+       const VkCommandBuffer cmd_bufs[] = {g->cmd};
+       VkFenceCreateInfo fenceInfo;
+       VkFence drawFence;
+       fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+       fenceInfo.pNext = NULL;
+       fenceInfo.flags = 0;
+       vkCreateFence(g->device, &fenceInfo, NULL, &drawFence);
+
+       VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+       VkSubmitInfo submit_info[] = {
+               {
+                       .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+                       .waitSemaphoreCount = 0,
+                       .pWaitSemaphores = NULL,
+                       .pWaitDstStageMask = &pipe_stage_flags,
+                       .commandBufferCount = 1,
+                       .pCommandBuffers = cmd_bufs,
+                       .signalSemaphoreCount = 0,
+                       .pSignalSemaphores = NULL,
+               }
+       };
+       res = vkQueueSubmit(g->graphics_queue, 1, submit_info, drawFence);
+
+       do {
+               res = vkWaitForFences(g->device, 1, &drawFence, VK_TRUE, FENCE_TIMEOUT);
+       } while (res == VK_TIMEOUT);
+
+       vkDestroyFence(g->device, drawFence, NULL);
+}
+
+static void init_instance(struct state *g)
+{
+       VkResult res;
+       VkApplicationInfo app = {
+               .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+               .pApplicationName = "basic",
+               .pEngineName = "none",
+               .apiVersion = VK_API_VERSION_1_0,
+       };
+       const char * const extensions[] = {
+               VK_KHR_SURFACE_EXTENSION_NAME,
+               VK_KHR_XLIB_SURFACE_EXTENSION_NAME
+       };
+       VkInstanceCreateInfo info = {
+               .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+               .pApplicationInfo = &app,
+               .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions),
+               .ppEnabledExtensionNames = extensions,
+       };
+
+       res = vkCreateInstance(&info, NULL, &g->vk);
+       if (res != VK_SUCCESS) {
+               fprintf(stderr, "vkCreateInstance: %d\n", res);
+               exit(EXIT_FAILURE);
+       }
+}
+
+static int clampi(int v, int min, int max)
+{
+       return v < min ? min : v < max ? v : max;
+}
+
+#if defined(VK_USE_PLATFORM_XLIB_KHR)
+static void init_surface(struct state *g)
+{
+       XInitThreads();
+       g->display = XOpenDisplay(NULL);
+       long visualMask = VisualScreenMask;
+       int numberOfVisuals;
+       XVisualInfo vInfoTemplate = {
+               .screen = DefaultScreen(g->display)
+       };
+       XVisualInfo *visualInfo = XGetVisualInfo(g->display, visualMask, &vInfoTemplate, &numberOfVisuals);
+       Colormap colormap = XCreateColormap(g->display, RootWindow(g->display, vInfoTemplate.screen), visualInfo->visual, AllocNone);
+       XSetWindowAttributes windowAttributes = {
+               .colormap = colormap,
+               .background_pixel = 0xFFFFFFFF,
+               .border_pixel = 0,
+               .event_mask = KeyPressMask | KeyReleaseMask | StructureNotifyMask | ExposureMask
+       };
+
+       g->window = XCreateWindow(g->display, RootWindow(g->display, vInfoTemplate.screen),
+               0, 0, g->width, g->height,
+               0, visualInfo->depth, InputOutput, visualInfo->visual,
+               CWBackPixel | CWBorderPixel | CWEventMask | CWColormap, &windowAttributes);
+
+       XSelectInput(g->display, g->window, ExposureMask | KeyPressMask);
+       XMapWindow(g->display, g->window);
+       XFlush(g->display);
+       g->wm_delete_window = XInternAtom(g->display, "WM_DELETE_WINDOW", False);
+
+       //
+       VkXlibSurfaceCreateInfoKHR surfaceinfo = {
+               .sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR,
+               .pNext = NULL,
+               .flags = 0,
+               .dpy = g->display,
+               .window = g->window
+       };
+
+       VkResult res;
+
+       res = vkCreateXlibSurfaceKHR(g->vk, &surfaceinfo, NULL, &g->surface);
+       checkFatal(res, "vkCrateXlibSurfaceKHR");
+}
+#else
+#error "Only XLIB is implemented"
+#endif
+
+static void init_device(struct state *g)
+{
+       VkResult res;
+       uint32_t devcount;
+
+       res = vkEnumeratePhysicalDevices(g->vk, &devcount, NULL);
+       VkPhysicalDevice devs[devcount];
+       res = vkEnumeratePhysicalDevices(g->vk, &devcount, devs);
+
+       // Search for device and queue indices
+       int devid = -1;
+       int present_queue = -1;
+       int graphics_queue = -1;
+       for (int i = 0; i < devcount; i++) {
+               uint32_t family_count;
+               vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, NULL);
+               VkQueueFamilyProperties famprops[family_count];
+               vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, famprops);
+
+               for (uint32_t j = 0; j < family_count; j++) {
+                       VkBool32 present;
+
+                       vkGetPhysicalDeviceSurfaceSupportKHR(devs[i], j, g->surface, &present);
+
+                       if (present && present_queue == -1)
+                               present_queue = j;
+                       if ((famprops[j].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) {
+                               graphics_queue = j;
+                               if (present) {
+                                       present_queue = j;
+                                       break;
+                               }
+                       }
+               }
+               if (present_queue != -1 && graphics_queue != -1) {
+                       devid = i;
+                       break;
+               }
+       }
+
+       if (devid == -1) {
+               fprintf(stderr, "Unable to find suitable device\n");
+               exit(1);
+       }
+       g->device_id = devid;
+       g->present_queue_index = present_queue;
+       g->graphics_queue_index = graphics_queue;
+
+       vkGetPhysicalDeviceMemoryProperties(devs[devid], &g->memory_properties);
+       vkGetPhysicalDeviceFeatures(devs[devid], &g->device_features);
+
+       /* ************************************************************** */
+
+       float qpri[] = {0.0f};
+       VkDeviceQueueCreateInfo qinfo = {
+               .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+               .queueCount = 1,
+               .pQueuePriorities = qpri,
+               .queueFamilyIndex = g->graphics_queue_index,
+       };
+
+       const char * const extensions[] = {
+               VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+       };
+
+       VkPhysicalDeviceFeatures features = {
+               .depthClamp = VK_TRUE,
+       };
+       VkDeviceCreateInfo devinfo = {
+               .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+               .queueCreateInfoCount = 1,
+               .pQueueCreateInfos = &qinfo,
+               .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions),
+               .ppEnabledExtensionNames = extensions,
+               .pEnabledFeatures = &features,
+       };
+
+       res = vkCreateDevice(devs[devid], &devinfo, NULL, &g->device);
+       checkFatal(res, "vkCreateDevice");
+
+       /* ************************************************************** */
+       VkFormat format;
+       uint32_t formatCount;
+       res = vkGetPhysicalDeviceSurfaceFormatsKHR(devs[devid], g->surface, &formatCount, NULL);
+       VkSurfaceFormatKHR surfFormats[formatCount];
+       res = vkGetPhysicalDeviceSurfaceFormatsKHR(devs[devid], g->surface, &formatCount, surfFormats);
+       // If the format list includes just one entry of VK_FORMAT_UNDEFINED,
+       // the surface has no preferred format.  Otherwise, at least one
+       // supported format will be returned.
+       if (formatCount == 1 && surfFormats[0].format == VK_FORMAT_UNDEFINED) {
+               format = VK_FORMAT_B8G8R8A8_UNORM;
+       } else {
+               format = surfFormats[0].format;
+       }
+
+       VkSurfaceCapabilitiesKHR surfCapabilities;
+
+       res = vkGetPhysicalDeviceSurfaceCapabilitiesKHR(devs[devid], g->surface, &surfCapabilities);
+       checkFatal(res, "vkGetPhysicalDeviceSurfaceCapabilitiesKHR");
+
+       uint32_t presentModeCount;
+       res = vkGetPhysicalDeviceSurfacePresentModesKHR(devs[devid], g->surface, &presentModeCount, NULL);
+       VkPresentModeKHR presentModes[presentModeCount];
+       res = vkGetPhysicalDeviceSurfacePresentModesKHR(devs[devid], g->surface, &presentModeCount, presentModes);
+       checkFatal(res, "vkGetPhysicalDeviceSurfacePresentModesKHR");
+
+       VkExtent2D swapchainExtent;
+       // width and height are either both 0xFFFFFFFF, or both not 0xFFFFFFFF.
+       if (surfCapabilities.currentExtent.width == 0xFFFFFFFF) {
+               // If the surface size is undefined, the size is set to
+               // the size of the images requested.
+               swapchainExtent.width = clampi(g->width, surfCapabilities.minImageExtent.width, surfCapabilities.maxImageExtent.width);
+               swapchainExtent.height = clampi(g->height, surfCapabilities.minImageExtent.height, surfCapabilities.maxImageExtent.height);
+       } else {
+               // If the surface size is defined, the swap chain size must match
+               swapchainExtent = surfCapabilities.currentExtent;
+       }
+
+       VkCompositeAlphaFlagBitsKHR compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+       VkCompositeAlphaFlagBitsKHR compositeAlphaFlags[4] = {
+               VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
+               VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR,
+               VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR,
+               VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR,
+       };
+       for (uint32_t i = 0; i < sizeof(compositeAlphaFlags) / sizeof(compositeAlphaFlags[0]); i++) {
+               if (surfCapabilities.supportedCompositeAlpha & compositeAlphaFlags[i]) {
+                       compositeAlpha = compositeAlphaFlags[i];
+                       break;
+               }
+       }
+
+       VkSwapchainCreateInfoKHR chaininfo = {
+               .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
+               .surface = g->surface,
+               .minImageCount = surfCapabilities.minImageCount,
+               .imageFormat = format,
+               .imageExtent = swapchainExtent,
+               .preTransform = (surfCapabilities.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR)
+               ? VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR : surfCapabilities.currentTransform,
+               .compositeAlpha = compositeAlpha,
+               .imageArrayLayers = 1,
+               .presentMode = VK_PRESENT_MODE_FIFO_KHR,
+               .clipped = VK_TRUE,
+               .imageColorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
+               .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+               .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
+               // assumes queues are same.
+               .queueFamilyIndexCount = 0,
+       };
+
+       res = vkCreateSwapchainKHR(g->device, &chaininfo, NULL, &g->chain);
+       checkFatal(res, "vkCreateSwapchainKHR");
+
+       res = vkGetSwapchainImagesKHR(g->device, g->chain, &g->chainImageCount, NULL);
+       checkFatal(res, "vkGetSwapchainImagesKHR");
+
+       g->chainImage = malloc(sizeof(*g->chainImage) * g->chainImageCount);
+       g->chainImageView = malloc(sizeof(*g->chainImage) * g->chainImageCount);
+
+       res = vkGetSwapchainImagesKHR(g->device, g->chain, &g->chainImageCount, g->chainImage);
+
+       VkImageViewCreateInfo viewinfo = {
+               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+               .viewType = VK_IMAGE_VIEW_TYPE_2D,
+               .format = format,
+               .components.r = VK_COMPONENT_SWIZZLE_R,
+               .components.g = VK_COMPONENT_SWIZZLE_G,
+               .components.b = VK_COMPONENT_SWIZZLE_B,
+               .components.a = VK_COMPONENT_SWIZZLE_A,
+               .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+               .subresourceRange.levelCount = 1,
+               .subresourceRange.layerCount = 1,
+       };
+       for (uint32_t i = 0; i < g->chainImageCount; i++) {
+               viewinfo.image = g->chainImage[i];
+
+               res = vkCreateImageView(g->device, &viewinfo, NULL, &g->chainImageView[i]);
+               checkFatal(res, "vkCreateImageView");
+       }
+
+       g->chainImageFormat = format;
+}
+
+static void init_device_queue(struct state *g)
+{
+       vkGetDeviceQueue(g->device, g->graphics_queue_index, 0, &g->graphics_queue);
+       if (g->graphics_queue_index == g->present_queue_index) {
+               g->present_queue = g->graphics_queue;
+       } else {
+               vkGetDeviceQueue(g->device, g->present_queue_index, 0, &g->present_queue);
+       }
+}
+
+static void init_command(struct state *g)
+{
+       VkResult res;
+       VkCommandPoolCreateInfo poolinfo = {
+               .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+               .queueFamilyIndex = g->graphics_queue_index,
+       };
+
+       res = vkCreateCommandPool(g->device, &poolinfo, NULL, &g->cmd_pool);
+       checkFatal(res, "vkCreateCommandPool");
+
+       VkCommandBufferAllocateInfo cmdinfo = {
+               .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+               .commandPool = g->cmd_pool,
+               .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+               .commandBufferCount = 1,
+       };
+
+       res = vkAllocateCommandBuffers(g->device, &cmdinfo, &g->cmd);
+       checkFatal(res, "vkCAllocateCommandBuffers");
+}
+
+static void init_depth(struct state *g)
+{
+       VkResult res;
+       VkMemoryRequirements req;
+       VkImageCreateInfo imageinfo = {
+               .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+               .pNext = NULL,
+               .imageType = VK_IMAGE_TYPE_2D,
+               .format = VK_FORMAT_D16_UNORM,
+               .extent.width = g->width,
+               .extent.height = g->height,
+               .extent.depth = 1,
+               .mipLevels = 1,
+               .arrayLayers = 1,
+               .samples = NUM_SAMPLES,
+               .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+               .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+               .queueFamilyIndexCount = 0,
+               .pQueueFamilyIndices = NULL,
+               .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+               .flags = 0,
+       };
+       res = vkCreateImage(g->device, &imageinfo, NULL, &g->depthImage);
+       checkFatal(res, "vkCreateImage");
+
+       vkGetImageMemoryRequirements(g->device, g->depthImage, &req);
+       VkMemoryAllocateInfo alloc = {
+               .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+               .allocationSize = req.size,
+               .memoryTypeIndex = find_memory_type(&g->memory_properties, req.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
+       };
+       res = vkAllocateMemory(g->device, &alloc, NULL, &g->depthMemory);
+       checkFatal(res, "vkAllocateMemory");
+
+       res = vkBindImageMemory(g->device, g->depthImage, g->depthMemory, 0);
+       checkFatal(res, "vkBindImageMemory");
+
+       VkImageViewCreateInfo view_info = {
+               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+               .image = g->depthImage,
+               .viewType = VK_IMAGE_VIEW_TYPE_2D,
+               .format = VK_FORMAT_D16_UNORM,
+               .components =
+               { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A},
+               .subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
+               .subresourceRange.levelCount = 1,
+               .subresourceRange.layerCount = 1,
+       };
+       res = vkCreateImageView(g->device, &view_info, NULL, &g->depthView);
+       checkFatal(res, "vkCreateImageView");
+
+       g->depthFormat = imageinfo.format;
+}
+
+static void init_uniform(struct state *g)
+{
+       VkResult res;
+       VkBufferCreateInfo buf_info = {
+               .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+               .usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+               .size = sizeof(g->mvp),
+               .sharingMode = VK_SHARING_MODE_EXCLUSIVE
+       };
+       res = vkCreateBuffer(g->device, &buf_info, NULL, &g->uniformBuffer);
+       checkFatal(res, "vkCreateBuffer (uniform)");
+
+       VkMemoryRequirements req;
+       vkGetBufferMemoryRequirements(g->device, g->uniformBuffer, &req);
+
+       VkMemoryAllocateInfo alloc = {
+               .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+               .allocationSize = req.size,
+               .memoryTypeIndex = find_memory_type(&g->memory_properties, req.memoryTypeBits,
+               VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
+       };
+       res = vkAllocateMemory(g->device, &alloc, NULL, &g->uniformMemory);
+       checkFatal(res, "vkAllocateMemory (uniform)");
+
+       void *mem;
+       res = vkMapMemory(g->device, g->uniformMemory, 0, req.size, 0, &mem);
+       checkFatal(res, "vkMapMemory (uniform)");
+       memcpy(mem, g->mvp, sizeof(g->mvp));
+       vkUnmapMemory(g->device, g->uniformMemory);
+
+       res = vkBindBufferMemory(g->device, g->uniformBuffer, g->uniformMemory, 0);
+       checkFatal(res, "vkBindBufferMemory (uniform)");
+
+       g->uniformInfo.buffer = g->uniformBuffer;
+       g->uniformInfo.offset = 0;
+       g->uniformInfo.range = sizeof(g->mvp);
+}
+
+static void init_descriptor(struct state *g)
+{
+       VkResult res;
+       VkDescriptorSetLayoutBinding layout_binding = {
+               .binding = 0,
+               .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+               .descriptorCount = 1,
+               .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+       };
+       VkDescriptorSetLayoutCreateInfo descriptor_layout = {
+               .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+               .bindingCount = 1,
+               .pBindings = &layout_binding,
+       };
+       res = vkCreateDescriptorSetLayout(g->device, &descriptor_layout, NULL, g->desc_layout);
+       checkFatal(res, "vkCreateDescriptorSetLayout");
+
+       VkPipelineLayoutCreateInfo pipeline_info = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+               .pNext = NULL,
+               .pushConstantRangeCount = 0,
+               .pPushConstantRanges = NULL,
+               .setLayoutCount = 1,
+               .pSetLayouts = g->desc_layout,
+       };
+
+       res = vkCreatePipelineLayout(g->device, &pipeline_info, NULL, &g->pipeline_layout);
+       checkFatal(res, "vkCreatePipelineLayout");
+
+       VkDescriptorPoolSize type_count[1];
+       type_count[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+       type_count[0].descriptorCount = 1;
+
+       VkDescriptorPoolCreateInfo descriptor_pool = {
+               .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+               .maxSets = 1,
+               .poolSizeCount = 1,
+               .pPoolSizes = type_count,
+       };
+
+       res = vkCreateDescriptorPool(g->device, &descriptor_pool, NULL, &g->desc_pool);
+       checkFatal(res, "vkCreateDescriptorPool");
+
+       VkDescriptorSetAllocateInfo alloc_info[] = {
+               {
+                       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+                       .descriptorPool = g->desc_pool,
+                       .descriptorSetCount = NUM_DESCRIPTOR_SETS,
+                       .pSetLayouts = g->desc_layout,
+               },
+       };
+       res = vkAllocateDescriptorSets(g->device, alloc_info, g->desc_set);
+       checkFatal(res, "vkAllocateDescriptorSets");
+
+       VkWriteDescriptorSet writes[] = {
+               {
+                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                       .dstSet = g->desc_set[0],
+                       .descriptorCount = 1,
+                       .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+                       .pBufferInfo = &g->uniformInfo,
+                       .dstArrayElement = 0,
+                       .dstBinding = 0,
+               }
+       };
+
+       vkUpdateDescriptorSets(g->device, 1, writes, 0, NULL);
+
+}
+
+static void init_render(struct state *g)
+{
+       VkResult res;
+       VkAttachmentDescription attachments[] = {
+               {
+                       .format = g->chainImageFormat,
+                       .samples = NUM_SAMPLES,
+                       .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+                       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+                       .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+                       .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+                       .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+                       .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+                       .flags = 0,
+               },
+               {
+                       .format = g->depthFormat,
+                       .samples = NUM_SAMPLES,
+                       .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+                       .storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+                       .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+                       .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+                       .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+                       .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+                       .flags = 0,
+               },
+       };
+       VkAttachmentReference color_reference = {
+               .attachment = 0,
+               .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+       };
+
+       VkAttachmentReference depth_reference = {
+               .attachment = 1,
+               .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+       };
+       VkSubpassDescription subpass = {
+               .flags = 0,
+               .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+               .colorAttachmentCount = 1,
+               .pColorAttachments = &color_reference,
+               .pDepthStencilAttachment = &depth_reference,
+       };
+       VkRenderPassCreateInfo rp_info = {
+               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+               .attachmentCount = sizeof(attachments) / sizeof(attachments[0]),
+               .pAttachments = attachments,
+               .subpassCount = 1,
+               .pSubpasses = &subpass,
+       };
+       res = vkCreateRenderPass(g->device, &rp_info, NULL, &g->render_pass);
+       checkFatal(res, "vkCreateRenderPass");
+}
+
+static void init_framebuffer(struct state *g)
+{
+       VkResult res;
+       VkImageView attachments[] = {
+               NULL,
+               g->depthView,
+       };
+
+       VkFramebufferCreateInfo fb_info = {
+               .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+               .renderPass = g->render_pass,
+               .attachmentCount = 2,
+               .pAttachments = attachments,
+               .width = g->width,
+               .height = g->height,
+               .layers = 1,
+       };
+
+       g->framebuffers = malloc(sizeof(*g->framebuffers) * g->chainImageCount);
+       for (int i = 0; i < g->chainImageCount; i++) {
+               attachments[0] = g->chainImageView[i];
+               res = vkCreateFramebuffer(g->device, &fb_info, NULL, &g->framebuffers[i]);
+       }
+}
+
+static void init_vertexbuffer(struct state *g)
+{
+       // mostly same as uniform
+       VkResult res;
+       const void *data = g_vb_solid_face_colours_Data;
+       size_t dataSize = sizeof(g_vb_solid_face_colours_Data);
+       VkBufferCreateInfo buf_info = {
+               .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+               .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+               .size = dataSize,
+               .sharingMode = VK_SHARING_MODE_EXCLUSIVE
+       };
+       res = vkCreateBuffer(g->device, &buf_info, NULL, &g->vertexBuffer);
+       checkFatal(res, "vkCreateBuffer (vertex)");
+
+       VkMemoryRequirements req;
+       vkGetBufferMemoryRequirements(g->device, g->vertexBuffer, &req);
+
+       VkMemoryAllocateInfo alloc = {
+               .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+               .allocationSize = req.size,
+               .memoryTypeIndex = find_memory_type(&g->memory_properties, req.memoryTypeBits,
+               VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
+       };
+       res = vkAllocateMemory(g->device, &alloc, NULL, &g->vertexMemory);
+       checkFatal(res, "vkAllocateMemory (vertex)");
+
+       void *mem;
+       res = vkMapMemory(g->device, g->vertexMemory, 0, req.size, 0, &mem);
+       checkFatal(res, "vkMapMemory (vertex)");
+       memcpy(mem, data, dataSize);
+       vkUnmapMemory(g->device, g->vertexMemory);
+
+       res = vkBindBufferMemory(g->device, g->vertexBuffer, g->vertexMemory, 0);
+       checkFatal(res, "vkBindBufferMemory (vertex)");
+
+       /* ***************************************** */
+       g->vi_binding.binding = 0;
+       g->vi_binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
+       g->vi_binding.stride = sizeof(Vertex);
+
+       g->vi_attribs[0].binding = 0;
+       g->vi_attribs[0].location = 0;
+       g->vi_attribs[0].format = VK_FORMAT_R32G32B32A32_SFLOAT;
+       g->vi_attribs[0].offset = 0;
+       g->vi_attribs[1].binding = 0;
+       g->vi_attribs[1].location = 1;
+       g->vi_attribs[1].format = VK_FORMAT_R32G32B32A32_SFLOAT;
+       g->vi_attribs[1].offset = 16;
+}
+
+static void init_pipeline(struct state *g)
+{
+       VkResult res;
+       VkDynamicState dynamicStateEnables[VK_DYNAMIC_STATE_RANGE_SIZE] = {};
+
+       VkPipelineDynamicStateCreateInfo dynamicState = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+               .pDynamicStates = dynamicStateEnables,
+               .dynamicStateCount = 0,
+       };
+
+       VkPipelineVertexInputStateCreateInfo vi = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+               .flags = 0,
+               .vertexBindingDescriptionCount = 1,
+               .pVertexBindingDescriptions = &g->vi_binding,
+               .vertexAttributeDescriptionCount = 2,
+               .pVertexAttributeDescriptions = g->vi_attribs
+       };
+
+       VkPipelineInputAssemblyStateCreateInfo ia = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+               .flags = 0,
+               .primitiveRestartEnable = VK_FALSE,
+               .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+       };
+
+       VkPipelineRasterizationStateCreateInfo rs = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+               .flags = 0,
+               .polygonMode = VK_POLYGON_MODE_FILL,
+               .cullMode = VK_CULL_MODE_BACK_BIT,
+               .frontFace = VK_FRONT_FACE_CLOCKWISE,
+               .depthClampEnable = VK_TRUE,
+               .rasterizerDiscardEnable = VK_FALSE,
+               .depthBiasEnable = VK_FALSE,
+               .depthBiasConstantFactor = 0,
+               .depthBiasClamp = 0,
+               .depthBiasSlopeFactor = 0,
+               .lineWidth = 1.0f,
+       };
+
+       VkPipelineColorBlendAttachmentState att_state[] = {
+               {
+                       .colorWriteMask = 0xf,
+                       .blendEnable = VK_FALSE,
+                       .alphaBlendOp = VK_BLEND_OP_ADD,
+                       .colorBlendOp = VK_BLEND_OP_ADD,
+                       .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+                       .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+                       .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+                       .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+               }
+       };
+
+       VkPipelineColorBlendStateCreateInfo cb = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+               .flags = 0,
+               .attachmentCount = 1,
+               .pAttachments = att_state,
+               .logicOpEnable = VK_FALSE,
+               .logicOp = VK_LOGIC_OP_NO_OP,
+               .blendConstants[0] = 1.0f,
+               .blendConstants[1] = 1.0f,
+               .blendConstants[2] = 1.0f,
+               .blendConstants[3] = 1.0f,
+       };
+
+       VkPipelineViewportStateCreateInfo vp = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+               .flags = 0,
+               .viewportCount = 1,
+               .scissorCount = 1,
+       };
+       dynamicStateEnables[dynamicState.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT;
+       dynamicStateEnables[dynamicState.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR;
+
+       VkPipelineDepthStencilStateCreateInfo ds = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+               .flags = 0,
+               .depthTestEnable = VK_TRUE,
+               .depthWriteEnable = VK_TRUE,
+               .depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL,
+               .depthBoundsTestEnable = VK_FALSE,
+               .minDepthBounds = 0,
+               .maxDepthBounds = 0,
+               .stencilTestEnable = VK_FALSE,
+               .back.failOp = VK_STENCIL_OP_KEEP,
+               .back.passOp = VK_STENCIL_OP_KEEP,
+               .back.compareOp = VK_COMPARE_OP_ALWAYS,
+               .back.compareMask = 0,
+               .back.reference = 0,
+               .back.depthFailOp = VK_STENCIL_OP_KEEP,
+               .back.writeMask = 0,
+               .front.failOp = VK_STENCIL_OP_KEEP,
+               .front.passOp = VK_STENCIL_OP_KEEP,
+               .front.compareOp = VK_COMPARE_OP_ALWAYS,
+               .front.compareMask = 0,
+               .front.reference = 0,
+               .front.depthFailOp = VK_STENCIL_OP_KEEP,
+               .front.writeMask = 0,
+       };
+
+       VkPipelineMultisampleStateCreateInfo ms = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+               .flags = 0,
+               .pSampleMask = NULL,
+               .rasterizationSamples = NUM_SAMPLES,
+               .sampleShadingEnable = VK_FALSE,
+               .alphaToCoverageEnable = VK_FALSE,
+               .alphaToOneEnable = VK_FALSE,
+               .minSampleShading = 0.0,
+       };
+
+       {
+               VkShaderModuleCreateInfo vsInfo = {
+                       .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+                       .codeSize = sizeof(cube_vs),
+                       .pCode = cube_vs,
+               };
+               VkShaderModuleCreateInfo fsInfo = {
+                       .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+                       .codeSize = sizeof(cube_fs),
+                       .pCode = cube_fs,
+               };
+
+               res = vkCreateShaderModule(g->device, &vsInfo, NULL, &g->shader[0]);
+               checkFatal(res, "vkCreateShaderModule (vs)");
+               res = vkCreateShaderModule(g->device, &fsInfo, NULL, &g->shader[1]);
+               checkFatal(res, "vkCreateShaderModule (fs)");
+               g->shaderCount = 2;
+       }
+
+       VkPipelineShaderStageCreateInfo shaderStages[] = {
+               {
+                       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                       .stage = VK_SHADER_STAGE_VERTEX_BIT,
+                       .pName = "main",
+                       .module = g->shader[0],
+               },
+               {
+                       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+                       .pName = "main",
+                       .module = g->shader[1],
+               }
+       };
+
+       VkGraphicsPipelineCreateInfo pipeline = {
+               .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+               .layout = g->pipeline_layout,
+               .basePipelineHandle = VK_NULL_HANDLE,
+               .basePipelineIndex = 0,
+               .flags = 0,
+               .pVertexInputState = &vi,
+               .pInputAssemblyState = &ia,
+               .pRasterizationState = &rs,
+               .pColorBlendState = &cb,
+               .pTessellationState = NULL,
+               .pMultisampleState = &ms,
+               .pDynamicState = &dynamicState,
+               .pViewportState = &vp,
+               .pDepthStencilState = &ds,
+               .pStages = shaderStages,
+               .stageCount = 2,
+               .renderPass = g->render_pass,
+               .subpass = 0,
+       };
+
+       res = vkCreateGraphicsPipelines(g->device, NULL, 1, &pipeline, NULL, g->pipeline);
+       checkFatal(res, "vkCreateGraphicsPipeline");
+
+       VkSemaphoreCreateInfo seminfo = {
+               .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+       };
+       res = vkCreateSemaphore(g->device, &seminfo, NULL, &g->chainSemaphore);
+       checkFatal(res, "vkCreateSemaphore");
+
+       VkFenceCreateInfo fenceInfo = {
+               .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO
+       };
+       res = vkCreateFence(g->device, &fenceInfo, NULL, &g->drawFence);
+
+}
+
+void cmd_viewport(struct state *g)
+{
+       VkViewport viewport = {
+               .x = 0,
+               .y = 0,
+               .width = g->width,
+               .height = g->height,
+               .minDepth = 0.0f,
+               .maxDepth = 1.0f,
+       };
+       vkCmdSetViewport(g->cmd, 0, 1, &viewport);
+}
+
+void cmd_scissors(struct state *g)
+{
+       VkRect2D scissor = {
+               .offset.x = 0,
+               .offset.y = 0,
+               .extent.width = g->width,
+               .extent.height = g->height
+       };
+       vkCmdSetScissor(g->cmd, 0, 1, &scissor);
+}
+
+static void cmd_paint(struct state *g)
+{
+       VkResult res;
+
+       res = vkAcquireNextImageKHR(g->device, g->chain, UINT64_MAX, g->chainSemaphore, VK_NULL_HANDLE, &g->chainIndex);
+       checkFatal(res, "vkAcquireNextImageKHR");
+
+       const VkDeviceSize offsets[1] = {0};
+
+       VkClearValue clear_values[] = {
+          {
+           .color.float32[0] = 0.2f,
+           .color.float32[1] = 0.2f,
+           .color.float32[2] = 0.2f,
+           .color.float32[3] = 0.2f
+          },
+          {
+           .depthStencil.depth = 1.0f,
+           .depthStencil.stencil = 0
+          }
+       };
+       VkRenderPassBeginInfo rp_begin = {
+               .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+               .renderPass = g->render_pass,
+               .framebuffer = g->framebuffers[g->chainIndex],
+               .renderArea.offset.x = 0,
+               .renderArea.offset.y = 0,
+               .renderArea.extent.width = g->width,
+               .renderArea.extent.height = g->height,
+               .clearValueCount = 2,
+               .pClearValues = clear_values,
+       };
+
+       vkCmdBeginRenderPass(g->cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
+
+       vkCmdBindPipeline(g->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, g->pipeline[0]);
+       vkCmdBindDescriptorSets(g->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, g->pipeline_layout, 0, NUM_DESCRIPTOR_SETS, g->desc_set, 0, NULL);
+       vkCmdBindVertexBuffers(g->cmd, 0, 1, &g->vertexBuffer, offsets);
+
+       cmd_viewport(g);
+       cmd_scissors(g);
+
+       vkCmdDraw(g->cmd, 12 * 3, 1, 0, 0);
+       vkCmdEndRenderPass(g->cmd);
+
+       res = vkEndCommandBuffer(g->cmd);
+       checkFatal(res, "vkEndCommandBuffer");
+
+       const VkCommandBuffer cmd_bufs[] = {g->cmd};
+
+       VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+       VkSubmitInfo submit_info[] = {
+               {
+                       .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+                       .waitSemaphoreCount = 1,
+                       .pWaitSemaphores = &g->chainSemaphore,
+                       .pWaitDstStageMask = &pipe_stage_flags,
+                       .commandBufferCount = 1,
+                       .pCommandBuffers = cmd_bufs,
+                       .signalSemaphoreCount = 0,
+                       .pSignalSemaphores = NULL,
+               }
+       };
+       /* Queue the command buffer for execution */
+       vkResetFences(g->device, 1, &g->drawFence);
+
+       res = vkQueueSubmit(g->graphics_queue, 1, submit_info, g->drawFence);
+
+       // hmm, why is this needed, surely it can handle this itself?
+
+       /* Make sure command buffer is finished before presenting */
+       do {
+               res = vkWaitForFences(g->device, 1, &g->drawFence, VK_TRUE, FENCE_TIMEOUT);
+       } while (res == VK_TIMEOUT);
+
+       /* Now present the image in the window */
+       VkPresentInfoKHR present = {
+               .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+               .swapchainCount = 1,
+               .pSwapchains = &g->chain,
+               .pImageIndices = &g->chainIndex,
+               .pWaitSemaphores = NULL,
+               .waitSemaphoreCount = 0,
+               .pResults = NULL,
+       };
+
+       res = vkQueuePresentKHR(g->present_queue, &present);
+}
+
+static void shutdown(struct state *g)
+{
+       vkDestroyFence(g->device, g->drawFence, NULL);
+       vkDestroySemaphore(g->device, g->chainSemaphore, NULL);
+
+       vkDestroyPipeline(g->device, g->pipeline[0], NULL);
+       for (int i = 0; i < g->shaderCount; i++)
+               vkDestroyShaderModule(g->device, g->shader[i], NULL);
+
+       vkFreeMemory(g->device, g->vertexMemory, NULL);
+       vkDestroyBuffer(g->device, g->vertexBuffer, NULL);
+
+       for (int i = 0; i < g->chainImageCount; i++)
+               vkDestroyFramebuffer(g->device, g->framebuffers[i], NULL);
+
+       vkDestroyRenderPass(g->device, g->render_pass, NULL);
+
+       vkDestroyDescriptorPool(g->device, g->desc_pool, NULL);
+       vkDestroyPipelineLayout(g->device, g->pipeline_layout, NULL);
+       vkDestroyDescriptorSetLayout(g->device, g->desc_layout[0], NULL);
+
+       vkFreeMemory(g->device, g->uniformMemory, NULL);
+
+       vkDestroyImageView(g->device, g->depthView, NULL);
+       vkFreeMemory(g->device, g->depthMemory, NULL);
+       vkDestroyImage(g->device, g->depthImage, NULL);
+
+       vkDestroyBuffer(g->device, g->uniformBuffer, NULL);
+
+       for (int i = 0; i < g->chainImageCount; i++)
+               vkDestroyImageView(g->device, g->chainImageView[i], NULL);
+
+       vkDestroySwapchainKHR(g->device, g->chain, NULL);
+
+       vkDestroyCommandPool(g->device, g->cmd_pool, NULL);
+       vkDestroyDevice(g->device, NULL);
+
+       vkDestroySurfaceKHR(g->vk, g->surface, NULL);
+
+       vkDestroyInstance(g->vk, NULL);
+}
+
+int main(int argc, char** argv)
+{
+       struct state state = {
+               .width = 800,
+               .height = 800,
+               .clip =
+               {
+                       1.0f, 0.0f, 0.0f, 0.0f,
+                       0.0f, -1.0f, 0.0f, 0.0f,
+                       0.0f, 0.0f, 0.5f, 0.0f,
+                       0.0f, 0.0f, 0.5f, 1.0f
+               }
+       };
+       float eye[3] = {-5, 3, -10};
+       float centre[3] = {0, 0, 0};
+       float up[3] = {0, -1, 0};
+       float t0[16], t1[16];
+
+       perspective(state.projection, M_PI_4, 1.0f, 0.1f, 100.0f);
+       lookAt(state.view, eye, centre, up);
+       identity4f(state.model);
+       mult4x4f(t0, state.clip, state.projection);
+       mult4x4f(t1, t0, state.view);
+       mult4x4f(state.mvp, t1, state.model);
+
+       init_instance(&state);
+       init_surface(&state);
+
+       init_device(&state);
+       init_device_queue(&state);
+       init_command(&state);
+
+       init_depth(&state);
+       init_uniform(&state);
+       init_descriptor(&state);
+       init_render(&state);
+       init_framebuffer(&state);
+       init_vertexbuffer(&state);
+       init_pipeline(&state);
+
+       execute_begin_command_buffer(&state);
+
+       cmd_paint(&state);
+
+       printf("behold the prize!\n");
+       sleep(2);
+
+       shutdown(&state);
+
+       return(EXIT_SUCCESS);
+}
diff --git a/cube/cube.frag b/cube/cube.frag
new file mode 100644 (file)
index 0000000..de24544
--- /dev/null
@@ -0,0 +1,8 @@
+#version 400
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_ARB_shading_language_420pack : enable
+layout (location = 0) in vec4 color;
+layout (location = 0) out vec4 outColor;
+void main() {
+   outColor = color;
+}
diff --git a/cube/cube.vert b/cube/cube.vert
new file mode 100644 (file)
index 0000000..5d21e1e
--- /dev/null
@@ -0,0 +1,14 @@
+#version 400
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_ARB_shading_language_420pack : enable
+layout (std140, binding = 0) uniform bufferVals {
+    mat4 mvp;
+} data;
+layout (location = 0) in vec4 pos;
+layout (location = 1) in vec4 inColor;
+layout (location = 0) out vec4 outColor;
+
+void main() {
+   outColor = inColor;
+   gl_Position = data.mvp * pos;
+}
diff --git a/mandelbrot-cl/Makefile b/mandelbrot-cl/Makefile
new file mode 100644 (file)
index 0000000..93559ef
--- /dev/null
@@ -0,0 +1,12 @@
+
+include ../config.make
+
+LDLIBS=$(compute_LDLIBS)
+CFLAGS=$(compute_CFLAGS)
+
+all: mandelbrot
+mandelbrot: mandelbrot.o
+mandelbrot.o: mandelbrot.c mandelbrot.cl.inc
+
+clean:
+       rm -f *.o mandelbrot mandelbrot.cl.inc mandelbrot.cl.csv
diff --git a/mandelbrot-cl/mandelbrot.c b/mandelbrot-cl/mandelbrot.c
new file mode 100644 (file)
index 0000000..9ae7b11
--- /dev/null
@@ -0,0 +1,531 @@
+ /*
+The MIT License (MIT)
+
+Copyright (C) 2017 Eric Arnebäck
+Copyright (C) 2019 Michael Zucchi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+ */
+
+/*
+ * This is a C conversion of this:
+ * https://github.com/Erkaman/vulkan_minimal_compute
+ * 
+ * In addition it's been made completely stand-alone
+ * and simplified further.  C struct initialisation
+ * syntax is used heavily to reduce clutter.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <vulkan/vulkan.h>
+#include <string.h>
+
+// These 4 defines must match compute shader.  The local work size
+// must be a power of 2.  They can be other values (up to the device
+// limits) but they affect concurrency and efficiency of the gpu.
+#define WIDTH (1920*2)
+#define HEIGHT (1200*2)
+#define LWS_X 8
+#define LWS_Y 8
+
+// The compiled shader (SPIR-V binary)
+#include "mandelbrot.cl.inc"
+#define mandelbrot_entry "mandelbrot"
+
+#define ARRAY_SIZEOF(a) (sizeof(a)/sizeof(*a))
+
+struct Pixel {
+       float r, g, b, a;
+};
+
+struct state {
+       VkInstance instance;
+       VkPhysicalDevice physicalDevice;
+
+       VkDevice device;
+       VkQueue computeQueue;
+
+       VkDeviceSize dstBufferSize;
+       VkBuffer dstBuffer;
+       VkDeviceMemory dstMemory;
+
+       VkDescriptorSetLayout descriptorSetLayout;
+       VkDescriptorPool descriptorPool;
+       VkDescriptorSet descriptorSets[1];
+
+       VkShaderModule mandelbrotShader;
+       VkPipelineLayout pipelineLayout;
+       VkPipeline computePipeline[1];
+
+       VkCommandPool commandPool;
+       VkCommandBuffer commandBuffers[1];
+
+       uint32_t computeQueueIndex;
+       VkPhysicalDeviceMemoryProperties deviceMemoryProperties;
+};
+
+static void checkFatal(VkResult res, const char *cmd)
+{
+       if (res != VK_SUCCESS) {
+               fprintf(stderr, "%s: %d\n", cmd, res);
+               exit(EXIT_FAILURE);
+       }
+}
+
+/**
+ * This finds the memory type index for the memory on a specific device.
+ */
+static int find_memory_type(VkPhysicalDeviceMemoryProperties *memory, uint32_t typeMask, VkMemoryPropertyFlagBits query)
+{
+       for (int i = 0; i < memory->memoryTypeCount; i++) {
+               if (((1 << i) & typeMask) && (memory->memoryTypes[i].propertyFlags & query) == query)
+                       return i;
+       }
+       return -1;
+}
+
+/**
+ * Round up to next nearest value of step size.  Step must be a power of 2.
+ */
+static uint32_t round_up(uint32_t v, uint32_t step)
+{
+       return(v + step - 1)&~(step - 1);
+}
+
+/**
+ * Create vulkan instance.
+ * 
+ * Compute-only requires no extensions so this is simple.
+ * 
+ * To turn on debugging layers:
+ *  export VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_standard_validation
+ */
+static void init_instance(struct state *g)
+{
+       VkResult res;
+       VkApplicationInfo app = {
+               .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+               .pApplicationName = "mandelbrot",
+               .pEngineName = "none",
+               .apiVersion = VK_API_VERSION_1_0,
+       };
+       const char * const extensions[] = {
+       };
+       VkInstanceCreateInfo info = {
+               .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+               .pApplicationInfo = &app,
+               .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions),
+               .ppEnabledExtensionNames = extensions,
+       };
+
+       res = vkCreateInstance(&info, NULL, &g->instance);
+       checkFatal(res, "vkCreateInstance");
+}
+
+/**
+ * This finds a suitable device and queue family.
+ * 
+ * In this case it is a device that supports a compute queue.  It
+ * preferentially looks for a non-graphics compute queue.
+ * 
+ * It could also make use of:
+ *  VkPhysicalDeviceProperties props;
+ *  vkGetPhysicalDeviceProperties(devs[i], &props);
+ */
+static void init_device(struct state *g)
+{
+       VkResult res;
+       uint32_t devcount;
+
+       res = vkEnumeratePhysicalDevices(g->instance, &devcount, NULL);
+       VkPhysicalDevice devs[devcount];
+       res = vkEnumeratePhysicalDevices(g->instance, &devcount, devs);
+
+       int best = 0;
+       int devid = -1;
+       int queueid = -1;
+
+       for (int i = 0; i < devcount; i++) {
+               uint32_t family_count;
+
+               vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, NULL);
+               VkQueueFamilyProperties famprops[family_count];
+               vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, famprops);
+
+               for (uint32_t j = 0; j < family_count; j++) {
+                       int score = 0;
+
+                       if ((famprops[j].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
+                               score += 1;
+                       if ((famprops[j].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0)
+                               score += 1;
+
+                       if (score > best) {
+                               score = best;
+                               devid = i;
+                               queueid = j;
+                       }
+               }
+       }
+
+       if (devid == -1)
+               checkFatal(VK_ERROR_FEATURE_NOT_PRESENT, "init_device");
+
+       g->physicalDevice = devs[devid];
+       g->computeQueueIndex = queueid;
+
+       float qpri[] = {0.0f};
+       VkDeviceQueueCreateInfo qinfo = {
+               .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+               .queueCount = 1,
+               .pQueuePriorities = qpri, // Note: cannot be null
+               .queueFamilyIndex = g->computeQueueIndex,
+       };
+       VkDeviceCreateInfo devinfo = {
+               .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+               .queueCreateInfoCount = 1,
+               .pQueueCreateInfos = &qinfo,
+       };
+
+       res = vkCreateDevice(g->physicalDevice, &devinfo, NULL, &g->device);
+       checkFatal(res, "vkCreateDevice");
+
+       /* These values are cached for convenience */
+       vkGetPhysicalDeviceMemoryProperties(g->physicalDevice, &g->deviceMemoryProperties);
+       vkGetDeviceQueue(g->device, g->computeQueueIndex, 0, &g->computeQueue);
+}
+
+/**
+ * Buffers are created in three steps:
+ * 1) create buffer, specifying usage and size
+ * 2) allocate memory based on memory requirements
+ * 3) bind memory
+ * 
+ */
+static void init_buffer(struct state *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer *buffer, VkDeviceMemory *memory)
+{
+       VkResult res;
+       VkMemoryRequirements req;
+       VkBufferCreateInfo buf_info = {
+               .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+               .size = dataSize,
+               .usage = usage,
+               .sharingMode = VK_SHARING_MODE_EXCLUSIVE
+       };
+       res = vkCreateBuffer(g->device, &buf_info, NULL, buffer);
+       checkFatal(res, "vkCreateBuffer");
+
+       vkGetBufferMemoryRequirements(g->device, *buffer, &req);
+
+       VkMemoryAllocateInfo alloc = {
+               .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+               .allocationSize = req.size,
+               .memoryTypeIndex = find_memory_type(&g->deviceMemoryProperties, req.memoryTypeBits, properties)
+       };
+       res = vkAllocateMemory(g->device, &alloc, NULL, memory);
+       checkFatal(res, "vkAllocateMemory");
+
+       res = vkBindBufferMemory(g->device, *buffer, *memory, 0);
+       checkFatal(res, "vkBindBufferMemory");
+}
+
+/**
+ * Descriptors are used to bind and describe memory blocks
+ * to shaders.
+ * 
+ * *Pool is used to allocate descriptors, it is per-device.
+ * *Layout is used to group descriptors for a given pipeline,
+ * The descriptors describe individually-addressable blocks.
+ */
+static void init_descriptor(struct state *g)
+{
+       VkResult res;
+       /* Create descriptorset layout */
+       VkDescriptorSetLayoutBinding layout_binding = {
+               .binding = 0,
+               .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+               .descriptorCount = 1,
+               .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+       };
+       VkDescriptorSetLayoutCreateInfo descriptor_layout = {
+               .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+               .bindingCount = 1,
+               .pBindings = &layout_binding,
+       };
+       res = vkCreateDescriptorSetLayout(g->device, &descriptor_layout, NULL, &g->descriptorSetLayout);
+       checkFatal(res, "vkCreateDescriptorSetLayout");
+
+       /* Create descriptor pool */
+       VkDescriptorPoolSize type_count[] = {
+               {
+                       .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                       .descriptorCount = 1,
+               }
+       };
+       VkDescriptorPoolCreateInfo descriptor_pool = {
+               .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+               .maxSets = 1,
+               .poolSizeCount = ARRAY_SIZEOF(type_count),
+               .pPoolSizes = type_count,
+       };
+
+       res = vkCreateDescriptorPool(g->device, &descriptor_pool, NULL, &g->descriptorPool);
+       checkFatal(res, "vkCreateDescriptorPool");
+
+       /* Allocate from pool */
+       VkDescriptorSetAllocateInfo alloc_info[] = {
+               {
+                       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+                       .descriptorPool = g->descriptorPool,
+                       .descriptorSetCount = 1,
+                       .pSetLayouts = &g->descriptorSetLayout,
+               },
+       };
+       res = vkAllocateDescriptorSets(g->device, alloc_info, g->descriptorSets);
+       checkFatal(res, "vkAllocateDescriptorSets");
+
+       /* Bind a buffer to the descriptor */
+       VkDescriptorBufferInfo bufferInfo = {
+               .buffer = g->dstBuffer,
+               .offset = 0,
+               .range = g->dstBufferSize,
+       };
+       VkWriteDescriptorSet writeSet[] = {
+               {
+                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                       .dstSet = g->descriptorSets[0],
+                       .dstBinding = 0,
+                       .descriptorCount = 1,
+                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                       .pBufferInfo = &bufferInfo,
+               }
+       };
+
+       vkUpdateDescriptorSets(g->device, ARRAY_SIZEOF(writeSet), writeSet, 0, NULL);
+}
+
+/**
+ * Create the compute pipeline.  This is the shader and data layouts for it.
+ */
+static void init_pipeline(struct state *g)
+{
+       VkResult res;
+       /* Set shader code */
+       VkShaderModuleCreateInfo vsInfo = {
+               .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+               .codeSize = sizeof(mandelbrot_cl),
+               .pCode = mandelbrot_cl,
+       };
+
+       res = vkCreateShaderModule(g->device, &vsInfo, NULL, &g->mandelbrotShader);
+       checkFatal(res, "vkCreateShaderModule");
+
+       /* Link shader to layout */
+       VkPipelineLayoutCreateInfo pipelineinfo = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+               .setLayoutCount = 1,
+               .pSetLayouts = &g->descriptorSetLayout,
+       };
+
+       res = vkCreatePipelineLayout(g->device, &pipelineinfo, NULL, &g->pipelineLayout);
+       checkFatal(res, "vkCreatePipelineLayout");
+
+       /* Create pipeline */
+       VkComputePipelineCreateInfo pipeline = {
+               .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+               .stage =
+               {
+                       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+                       .module = g->mandelbrotShader,
+                       .pName = mandelbrot_entry,
+               },
+               .layout = g->pipelineLayout
+
+       };
+       res = vkCreateComputePipelines(g->device, NULL, 1, &pipeline, NULL, g->computePipeline);
+       checkFatal(res, "vkCreateComputePipeline");
+}
+
+/**
+ * Create a command buffer, this is somewhat like a display list.
+ */
+static void init_command_buffer(struct state *g)
+{
+       VkResult res;
+       VkCommandPoolCreateInfo poolinfo = {
+               .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+               .queueFamilyIndex = g->computeQueueIndex,
+       };
+
+       res = vkCreateCommandPool(g->device, &poolinfo, NULL, &g->commandPool);
+       checkFatal(res, "vkCreateCommandPool");
+
+       VkCommandBufferAllocateInfo cmdinfo = {
+               .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+               .commandPool = g->commandPool,
+               .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+               .commandBufferCount = 1,
+       };
+
+       res = vkAllocateCommandBuffers(g->device, &cmdinfo, g->commandBuffers);
+       checkFatal(res, "vkAllocateCommandBuffers");
+
+       /* Fill command buffer with commands for later operation */
+       VkCommandBufferBeginInfo beginInfo = {
+               .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+               .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+       };
+       res = vkBeginCommandBuffer(g->commandBuffers[0], &beginInfo);
+       checkFatal(res, "vkBeginCommandBuffer");
+
+       /* Bind the compute operation and data */
+       vkCmdBindPipeline(g->commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, g->computePipeline[0]);
+       vkCmdBindDescriptorSets(g->commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, g->pipelineLayout, 0, 1, g->descriptorSets, 0, NULL);
+
+       /* Run it */
+       vkCmdDispatch(g->commandBuffers[0], round_up(WIDTH, LWS_X), round_up(HEIGHT, LWS_Y), 1);
+
+       res = vkEndCommandBuffer(g->commandBuffers[0]);
+       checkFatal(res, "vkEndCommandBuffer");
+}
+
+/**
+ * Execute the pre-created command buffer.
+ * 
+ * A fence is used to wait for completion.
+ */
+static void execute(struct state *g)
+{
+       VkResult res;
+       VkSubmitInfo submitInfo[] = {
+               {
+                       .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+                       .commandBufferCount = 1,
+                       .pCommandBuffers = g->commandBuffers
+               },
+       };
+
+       /* Create fence to mark the task completion */
+       VkFence fence;
+       VkFenceCreateInfo fenceInfo = {
+               .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO
+       };
+       res = vkCreateFence(g->device, &fenceInfo, NULL, &fence);
+       checkFatal(res, "vkCreateFence");
+
+       /* Await completion */
+       res = vkQueueSubmit(g->computeQueue, ARRAY_SIZEOF(submitInfo), submitInfo, fence);
+       checkFatal(res, "vkQueueSubmit");
+
+       do {
+               res = vkWaitForFences(g->device, 1, &fence, VK_TRUE, 1000000);
+       } while (res == VK_TIMEOUT);
+
+       vkDestroyFence(g->device, fence, NULL);
+}
+
+/**
+ * Trivial pnm format image output.
+ */
+static void pam_save(const char *name, unsigned int width, unsigned int height, unsigned int depth, const uint8_t *pixels)
+{
+       FILE *fp = fopen(name, "wb");
+
+       if (fp) {
+               fprintf(fp, "P6\n%d\n%d\n255\n", width, height);
+               fwrite(pixels, depth, width*height, fp);
+               fclose(fp);
+               printf("wrote: %s\n", name);
+       }
+}
+
+/**
+ * Accesses the gpu buffer, converts it to RGB byte, and saves it as a pam file.
+ */
+void save_result(struct state *g)
+{
+       VkResult res;
+       struct Pixel *dst;
+
+       res = vkMapMemory(g->device, g->dstMemory, 0, g->dstBufferSize, 0, (void **)&dst);
+       checkFatal(res, "vkMapMemory");
+
+       uint8_t *pixels = malloc(WIDTH * HEIGHT * 3);
+
+       // this is super-slow!
+       for (int i = 0; i < WIDTH * HEIGHT; i++) {
+               pixels[i * 3 + 0] = (uint8_t)(255.0f * dst[i].r);
+               pixels[i * 3 + 1] = (uint8_t)(255.0f * dst[i].g);
+               pixels[i * 3 + 2] = (uint8_t)(255.0f * dst[i].b);
+       }
+
+       vkUnmapMemory(g->device, g->dstMemory);
+
+       pam_save("mandelbrot.pam", WIDTH, HEIGHT, 3, pixels);
+       free(pixels);
+}
+
+static void shutdown(struct state *g)
+{
+       vkDestroyCommandPool(g->device, g->commandPool, NULL);
+
+       vkDestroyPipeline(g->device, g->computePipeline[0], NULL);
+       vkDestroyPipelineLayout(g->device, g->pipelineLayout, NULL);
+       vkDestroyShaderModule(g->device, g->mandelbrotShader, NULL);
+
+       vkDestroyDescriptorPool(g->device, g->descriptorPool, NULL);
+       vkDestroyDescriptorSetLayout(g->device, g->descriptorSetLayout, NULL);
+       
+       vkFreeMemory(g->device, g->dstMemory, NULL);
+       vkDestroyBuffer(g->device, g->dstBuffer, NULL);
+
+       vkDestroyDevice(g->device, NULL);
+       vkDestroyInstance(g->instance, NULL);
+}
+
+int main(int argc, char** argv)
+{
+       struct state g = {
+               .dstBufferSize = WIDTH * HEIGHT * sizeof(struct Pixel),
+       };
+
+       init_instance(&g);
+       init_device(&g);
+
+       init_buffer(&g, g.dstBufferSize,
+                   VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+                   VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+                   &g.dstBuffer, &g.dstMemory);
+
+       init_descriptor(&g);
+       init_pipeline(&g);
+       init_command_buffer(&g);
+
+       execute(&g);
+
+       save_result(&g);
+
+       shutdown(&g);
+       
+       return(EXIT_SUCCESS);
+}
diff --git a/mandelbrot-cl/mandelbrot.cl b/mandelbrot-cl/mandelbrot.cl
new file mode 100644 (file)
index 0000000..7031cbe
--- /dev/null
@@ -0,0 +1,50 @@
+
+#define WIDTH (1920*2)
+#define HEIGHT (1200*2)
+#define LWS_X 8
+#define LWS_Y 8
+
+kernel void
+__attribute__((reqd_work_group_size(LWS_X, LWS_Y, 1)))
+mandelbrot(global float4 *imageData) {
+
+       /*
+         In order to fit the work into workgroups, some unnecessary threads are launched.
+         We terminate those threads here. 
+       */
+       if(get_global_id(0) >= WIDTH || get_global_id(1) >= HEIGHT)
+               return;
+
+       float x = convert_float(get_global_id(0)) / WIDTH;
+       float y = convert_float(get_global_id(1)) / HEIGHT;
+
+       /*
+         What follows is code for rendering the mandelbrot set. 
+       */
+       float2 uv = (float2)(x, (y - 0.5f) * (12.0f / 19.0f) + 0.5f);
+       float n = 0.0f;
+       float2 c = (float2)(-0.445f, 0.0f) + (uv - 0.5f)*(4.0f);
+       const int M = 1000;
+       
+       for (int i = 0; i<M; i++) {
+               float2 z = (float2)(z.x*z.x - z.y*z.y, 2.0f*z.x*z.y) + c;
+               if (dot(z, z) > 4)
+                       break;
+               n++;
+       }
+          
+       // we use a simple cosine palette to determine color:
+       // http://iquilezles.org/www/articles/palettes/palettes.htm         
+       float t = n * 10.0f / M;
+       float3 d = (float3)(0.5f, 0.5f, 0.5f);
+       float3 e = (float3)(0.5f, 0.5f, 0.5f);
+       float3 f = (float3)(1.0f, 1.0f, 1.0f);
+       float3 g = (float3)(0.00f, 0.33f, 0.67f);
+       float4 color = (float4)( d + e * cos(6.28318f * (f * t + g)), 1.0f);
+
+       if (convert_int(n) == M)
+               color = (float4)(0, 0, 0, 1);
+  
+       // store the rendered mandelbrot set into a storage buffer:
+       imageData[WIDTH * get_global_id(1) + get_global_id(0)] = color;
+}
diff --git a/mandelbrot-cs/Makefile b/mandelbrot-cs/Makefile
new file mode 100644 (file)
index 0000000..cb0d541
--- /dev/null
@@ -0,0 +1,13 @@
+
+include ../config.make
+
+LDLIBS=$(compute_LDLIBS)
+CFLAGS=$(compute_CFLAGS)
+
+all: mandelbrot
+mandelbrot: mandelbrot.o
+mandelbrot.o: mandelbrot.c mandelbrot.comp.inc
+
+clean:
+       rm -f *.o mandelbrot mandelbrot.comp.inc
+
diff --git a/mandelbrot-cs/mandelbrot.c b/mandelbrot-cs/mandelbrot.c
new file mode 100644 (file)
index 0000000..64c14f2
--- /dev/null
@@ -0,0 +1,531 @@
+ /*
+The MIT License (MIT)
+
+Copyright (C) 2017 Eric Arnebäck
+Copyright (C) 2019 Michael Zucchi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+ */
+
+/*
+ * This is a C conversion of this:
+ * https://github.com/Erkaman/vulkan_minimal_compute
+ * 
+ * In addition it's been made completely stand-alone
+ * and simplified further.  C struct initialisation
+ * syntax is used heavily to reduce clutter.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <vulkan/vulkan.h>
+#include <string.h>
+
+// These 4 defines must match compute shader.  The local work size
+// must be a power of 2.  They can be other values (up to the device
+// limits) but they affect concurrency and efficiency of the gpu.
+#define WIDTH (1920*2)
+#define HEIGHT (1200*2)
+#define LWS_X 8
+#define LWS_Y 8
+
+// The compiled shader (SPIR-V binary)
+#include "mandelbrot.comp.inc"
+#define mandelbrot_entry "main"
+
+#define ARRAY_SIZEOF(a) (sizeof(a)/sizeof(*a))
+
+struct Pixel {
+       float r, g, b, a;
+};
+
+struct state {
+       VkInstance instance;
+       VkPhysicalDevice physicalDevice;
+
+       VkDevice device;
+       VkQueue computeQueue;
+
+       VkDeviceSize dstBufferSize;
+       VkBuffer dstBuffer;
+       VkDeviceMemory dstMemory;
+
+       VkDescriptorSetLayout descriptorSetLayout;
+       VkDescriptorPool descriptorPool;
+       VkDescriptorSet descriptorSets[1];
+
+       VkShaderModule mandelbrotShader;
+       VkPipelineLayout pipelineLayout;
+       VkPipeline computePipeline[1];
+
+       VkCommandPool commandPool;
+       VkCommandBuffer commandBuffers[1];
+
+       uint32_t computeQueueIndex;
+       VkPhysicalDeviceMemoryProperties deviceMemoryProperties;
+};
+
+static void checkFatal(VkResult res, const char *cmd)
+{
+       if (res != VK_SUCCESS) {
+               fprintf(stderr, "%s: %d\n", cmd, res);
+               exit(EXIT_FAILURE);
+       }
+}
+
+/**
+ * This finds the memory type index for the memory on a specific device.
+ */
+static int find_memory_type(VkPhysicalDeviceMemoryProperties *memory, uint32_t typeMask, VkMemoryPropertyFlagBits query)
+{
+       for (int i = 0; i < memory->memoryTypeCount; i++) {
+               if (((1 << i) & typeMask) && (memory->memoryTypes[i].propertyFlags & query) == query)
+                       return i;
+       }
+       return -1;
+}
+
+/**
+ * Round up to next nearest value of step size.  Step must be a power of 2.
+ */
+static uint32_t round_up(uint32_t v, uint32_t step)
+{
+       return(v + step - 1)&~(step - 1);
+}
+
+/**
+ * Create vulkan instance.
+ * 
+ * Compute-only requires no extensions so this is simple.
+ * 
+ * To turn on debugging layers:
+ *  export VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_standard_validation
+ */
+static void init_instance(struct state *g)
+{
+       VkResult res;
+       VkApplicationInfo app = {
+               .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+               .pApplicationName = "mandelbrot",
+               .pEngineName = "none",
+               .apiVersion = VK_API_VERSION_1_0,
+       };
+       const char * const extensions[] = {
+       };
+       VkInstanceCreateInfo info = {
+               .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+               .pApplicationInfo = &app,
+               .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions),
+               .ppEnabledExtensionNames = extensions,
+       };
+
+       res = vkCreateInstance(&info, NULL, &g->instance);
+       checkFatal(res, "vkCreateInstance");
+}
+
+/**
+ * This finds a suitable device and queue family.
+ * 
+ * In this case it is a device that supports a compute queue.  It
+ * preferentially looks for a non-graphics compute queue.
+ * 
+ * It could also make use of:
+ *  VkPhysicalDeviceProperties props;
+ *  vkGetPhysicalDeviceProperties(devs[i], &props);
+ */
+static void init_device(struct state *g)
+{
+       VkResult res;
+       uint32_t devcount;
+
+       res = vkEnumeratePhysicalDevices(g->instance, &devcount, NULL);
+       VkPhysicalDevice devs[devcount];
+       res = vkEnumeratePhysicalDevices(g->instance, &devcount, devs);
+
+       int best = 0;
+       int devid = -1;
+       int queueid = -1;
+
+       for (int i = 0; i < devcount; i++) {
+               uint32_t family_count;
+
+               vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, NULL);
+               VkQueueFamilyProperties famprops[family_count];
+               vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, famprops);
+
+               for (uint32_t j = 0; j < family_count; j++) {
+                       int score = 0;
+
+                       if ((famprops[j].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
+                               score += 1;
+                       if ((famprops[j].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0)
+                               score += 1;
+
+                       if (score > best) {
+                               score = best;
+                               devid = i;
+                               queueid = j;
+                       }
+               }
+       }
+
+       if (devid == -1)
+               checkFatal(VK_ERROR_FEATURE_NOT_PRESENT, "init_device");
+
+       g->physicalDevice = devs[devid];
+       g->computeQueueIndex = queueid;
+
+       float qpri[] = {0.0f};
+       VkDeviceQueueCreateInfo qinfo = {
+               .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+               .queueCount = 1,
+               .pQueuePriorities = qpri, // Note: cannot be null
+               .queueFamilyIndex = g->computeQueueIndex,
+       };
+       VkDeviceCreateInfo devinfo = {
+               .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+               .queueCreateInfoCount = 1,
+               .pQueueCreateInfos = &qinfo,
+       };
+
+       res = vkCreateDevice(g->physicalDevice, &devinfo, NULL, &g->device);
+       checkFatal(res, "vkCreateDevice");
+
+       /* These values are cached for convenience */
+       vkGetPhysicalDeviceMemoryProperties(g->physicalDevice, &g->deviceMemoryProperties);
+       vkGetDeviceQueue(g->device, g->computeQueueIndex, 0, &g->computeQueue);
+}
+
+/**
+ * Buffers are created in three steps:
+ * 1) create buffer, specifying usage and size
+ * 2) allocate memory based on memory requirements
+ * 3) bind memory
+ * 
+ */
+static void init_buffer(struct state *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer *buffer, VkDeviceMemory *memory)
+{
+       VkResult res;
+       VkMemoryRequirements req;
+       VkBufferCreateInfo buf_info = {
+               .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+               .size = dataSize,
+               .usage = usage,
+               .sharingMode = VK_SHARING_MODE_EXCLUSIVE
+       };
+       res = vkCreateBuffer(g->device, &buf_info, NULL, buffer);
+       checkFatal(res, "vkCreateBuffer");
+
+       vkGetBufferMemoryRequirements(g->device, *buffer, &req);
+
+       VkMemoryAllocateInfo alloc = {
+               .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+               .allocationSize = req.size,
+               .memoryTypeIndex = find_memory_type(&g->deviceMemoryProperties, req.memoryTypeBits, properties)
+       };
+       res = vkAllocateMemory(g->device, &alloc, NULL, memory);
+       checkFatal(res, "vkAllocateMemory");
+
+       res = vkBindBufferMemory(g->device, *buffer, *memory, 0);
+       checkFatal(res, "vkBindBufferMemory");
+}
+
+/**
+ * Descriptors are used to bind and describe memory blocks
+ * to shaders.
+ * 
+ * *Pool is used to allocate descriptors, it is per-device.
+ * *Layout is used to group descriptors for a given pipeline,
+ * The descriptors describe individually-addressable blocks.
+ */
+static void init_descriptor(struct state *g)
+{
+       VkResult res;
+       /* Create descriptorset layout */
+       VkDescriptorSetLayoutBinding layout_binding = {
+               .binding = 0,
+               .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+               .descriptorCount = 1,
+               .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+       };
+       VkDescriptorSetLayoutCreateInfo descriptor_layout = {
+               .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+               .bindingCount = 1,
+               .pBindings = &layout_binding,
+       };
+       res = vkCreateDescriptorSetLayout(g->device, &descriptor_layout, NULL, &g->descriptorSetLayout);
+       checkFatal(res, "vkCreateDescriptorSetLayout");
+
+       /* Create descriptor pool */
+       VkDescriptorPoolSize type_count[] = {
+               {
+                       .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                       .descriptorCount = 1,
+               }
+       };
+       VkDescriptorPoolCreateInfo descriptor_pool = {
+               .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+               .maxSets = 1,
+               .poolSizeCount = ARRAY_SIZEOF(type_count),
+               .pPoolSizes = type_count,
+       };
+
+       res = vkCreateDescriptorPool(g->device, &descriptor_pool, NULL, &g->descriptorPool);
+       checkFatal(res, "vkCreateDescriptorPool");
+
+       /* Allocate from pool */
+       VkDescriptorSetAllocateInfo alloc_info[] = {
+               {
+                       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+                       .descriptorPool = g->descriptorPool,
+                       .descriptorSetCount = 1,
+                       .pSetLayouts = &g->descriptorSetLayout,
+               },
+       };
+       res = vkAllocateDescriptorSets(g->device, alloc_info, g->descriptorSets);
+       checkFatal(res, "vkAllocateDescriptorSets");
+
+       /* Bind a buffer to the descriptor */
+       VkDescriptorBufferInfo bufferInfo = {
+               .buffer = g->dstBuffer,
+               .offset = 0,
+               .range = g->dstBufferSize,
+       };
+       VkWriteDescriptorSet writeSet[] = {
+               {
+                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                       .dstSet = g->descriptorSets[0],
+                       .dstBinding = 0,
+                       .descriptorCount = 1,
+                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                       .pBufferInfo = &bufferInfo,
+               }
+       };
+
+       vkUpdateDescriptorSets(g->device, ARRAY_SIZEOF(writeSet), writeSet, 0, NULL);
+}
+
+/**
+ * Create the compute pipeline.  This is the shader and data layouts for it.
+ */
+static void init_pipeline(struct state *g)
+{
+       VkResult res;
+       /* Set shader code */
+       VkShaderModuleCreateInfo vsInfo = {
+               .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+               .codeSize = sizeof(mandelbrot_cs),
+               .pCode = mandelbrot_cs,
+       };
+
+       res = vkCreateShaderModule(g->device, &vsInfo, NULL, &g->mandelbrotShader);
+       checkFatal(res, "vkCreateShaderModule");
+
+       /* Link shader to layout */
+       VkPipelineLayoutCreateInfo pipelineinfo = {
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+               .setLayoutCount = 1,
+               .pSetLayouts = &g->descriptorSetLayout,
+       };
+
+       res = vkCreatePipelineLayout(g->device, &pipelineinfo, NULL, &g->pipelineLayout);
+       checkFatal(res, "vkCreatePipelineLayout");
+
+       /* Create pipeline */
+       VkComputePipelineCreateInfo pipeline = {
+               .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+               .stage =
+               {
+                       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+                       .module = g->mandelbrotShader,
+                       .pName = mandelbrot_entry,
+               },
+               .layout = g->pipelineLayout
+
+       };
+       res = vkCreateComputePipelines(g->device, NULL, 1, &pipeline, NULL, g->computePipeline);
+       checkFatal(res, "vkCreateComputePipeline");
+}
+
+/**
+ * Create a command buffer, this is somewhat like a display list.
+ */
+static void init_command_buffer(struct state *g)
+{
+       VkResult res;
+       VkCommandPoolCreateInfo poolinfo = {
+               .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+               .queueFamilyIndex = g->computeQueueIndex,
+       };
+
+       res = vkCreateCommandPool(g->device, &poolinfo, NULL, &g->commandPool);
+       checkFatal(res, "vkCreateCommandPool");
+
+       VkCommandBufferAllocateInfo cmdinfo = {
+               .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+               .commandPool = g->commandPool,
+               .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+               .commandBufferCount = 1,
+       };
+
+       res = vkAllocateCommandBuffers(g->device, &cmdinfo, g->commandBuffers);
+       checkFatal(res, "vkAllocateCommandBuffers");
+
+       /* Fill command buffer with commands for later operation */
+       VkCommandBufferBeginInfo beginInfo = {
+               .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+               .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+       };
+       res = vkBeginCommandBuffer(g->commandBuffers[0], &beginInfo);
+       checkFatal(res, "vkBeginCommandBuffer");
+
+       /* Bind the compute operation and data */
+       vkCmdBindPipeline(g->commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, g->computePipeline[0]);
+       vkCmdBindDescriptorSets(g->commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, g->pipelineLayout, 0, 1, g->descriptorSets, 0, NULL);
+
+       /* Run it */
+       vkCmdDispatch(g->commandBuffers[0], round_up(WIDTH, LWS_X), round_up(HEIGHT, LWS_Y), 1);
+
+       res = vkEndCommandBuffer(g->commandBuffers[0]);
+       checkFatal(res, "vkEndCommandBuffer");
+}
+
+/**
+ * Execute the pre-created command buffer.
+ * 
+ * A fence is used to wait for completion.
+ */
+static void execute(struct state *g)
+{
+       VkResult res;
+       VkSubmitInfo submitInfo[] = {
+               {
+                       .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+                       .commandBufferCount = 1,
+                       .pCommandBuffers = g->commandBuffers
+               },
+       };
+
+       /* Create fence to mark the task completion */
+       VkFence fence;
+       VkFenceCreateInfo fenceInfo = {
+               .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO
+       };
+       res = vkCreateFence(g->device, &fenceInfo, NULL, &fence);
+       checkFatal(res, "vkCreateFence");
+
+       /* Await completion */
+       res = vkQueueSubmit(g->computeQueue, ARRAY_SIZEOF(submitInfo), submitInfo, fence);
+       checkFatal(res, "vkQueueSubmit");
+
+       do {
+               res = vkWaitForFences(g->device, 1, &fence, VK_TRUE, 1000000);
+       } while (res == VK_TIMEOUT);
+
+       vkDestroyFence(g->device, fence, NULL);
+}
+
+/**
+ * Trivial pnm format image output.
+ */
+static void pam_save(const char *name, unsigned int width, unsigned int height, unsigned int depth, const uint8_t *pixels)
+{
+       FILE *fp = fopen(name, "wb");
+
+       if (fp) {
+               fprintf(fp, "P6\n%d\n%d\n255\n", width, height);
+               fwrite(pixels, depth, width*height, fp);
+               fclose(fp);
+               printf("wrote: %s\n", name);
+       }
+}
+
+/**
+ * Accesses the gpu buffer, converts it to RGB byte, and saves it as a pam file.
+ */
+void save_result(struct state *g)
+{
+       VkResult res;
+       struct Pixel *dst;
+
+       res = vkMapMemory(g->device, g->dstMemory, 0, g->dstBufferSize, 0, (void **)&dst);
+       checkFatal(res, "vkMapMemory");
+
+       uint8_t *pixels = malloc(WIDTH * HEIGHT * 3);
+
+       // this is super-slow!
+       for (int i = 0; i < WIDTH * HEIGHT; i++) {
+               pixels[i * 3 + 0] = (uint8_t)(255.0f * dst[i].r);
+               pixels[i * 3 + 1] = (uint8_t)(255.0f * dst[i].g);
+               pixels[i * 3 + 2] = (uint8_t)(255.0f * dst[i].b);
+       }
+
+       vkUnmapMemory(g->device, g->dstMemory);
+
+       pam_save("mandelbrot.pam", WIDTH, HEIGHT, 3, pixels);
+       free(pixels);
+}
+
+static void shutdown(struct state *g)
+{
+       vkDestroyCommandPool(g->device, g->commandPool, NULL);
+
+       vkDestroyPipeline(g->device, g->computePipeline[0], NULL);
+       vkDestroyPipelineLayout(g->device, g->pipelineLayout, NULL);
+       vkDestroyShaderModule(g->device, g->mandelbrotShader, NULL);
+
+       vkDestroyDescriptorPool(g->device, g->descriptorPool, NULL);
+       vkDestroyDescriptorSetLayout(g->device, g->descriptorSetLayout, NULL);
+       
+       vkFreeMemory(g->device, g->dstMemory, NULL);
+       vkDestroyBuffer(g->device, g->dstBuffer, NULL);
+
+       vkDestroyDevice(g->device, NULL);
+       vkDestroyInstance(g->instance, NULL);
+}
+
+int main(int argc, char** argv)
+{
+       struct state g = {
+               .dstBufferSize = WIDTH * HEIGHT * sizeof(struct Pixel),
+       };
+
+       init_instance(&g);
+       init_device(&g);
+
+       init_buffer(&g, g.dstBufferSize,
+                   VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+                   VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+                   &g.dstBuffer, &g.dstMemory);
+
+       init_descriptor(&g);
+       init_pipeline(&g);
+       init_command_buffer(&g);
+
+       execute(&g);
+
+       save_result(&g);
+
+       shutdown(&g);
+       
+       return(EXIT_SUCCESS);
+}
diff --git a/mandelbrot-cs/mandelbrot.comp b/mandelbrot-cs/mandelbrot.comp
new file mode 100644 (file)
index 0000000..ed13bf2
--- /dev/null
@@ -0,0 +1,63 @@
+#version 450
+
+#define WIDTH (1920*2)
+#define HEIGHT (1200*2)
+#define LWS_X 8
+#define LWS_Y 8
+
+layout (local_size_x = LWS_X, local_size_y = LWS_Y, local_size_z = 1 ) in;
+
+struct Pixel{
+       vec4 value;
+};
+
+layout(std140, binding = 0) buffer buf
+{
+       Pixel imageData[];
+};
+
+void main() {
+
+       /*
+         In order to fit the work into workgroups, some unnecessary threads are launched.
+         We terminate those threads here. 
+       */
+       if(gl_GlobalInvocationID.x >= WIDTH || gl_GlobalInvocationID.y >= HEIGHT)
+               return;
+
+       float x = float(gl_GlobalInvocationID.x) / float(WIDTH);
+       float y = float(gl_GlobalInvocationID.y) / float(HEIGHT);
+
+       /*
+         What follows is code for rendering the mandelbrot set. 
+       */
+       vec2 uv = vec2(x, (y - 0.5) * (12.0 / 19.0) + 0.5);
+       float n = 0.0;
+       vec2 c = vec2(-.445, 0.0) + (uv - 0.5)*(4.0);
+       vec2 z = vec2(0.0);
+       const int M = 1000;
+
+       for (int i = 0; i<M; i++) {
+               z = vec2(z.x*z.x - z.y*z.y, 2.*z.x*z.y) + c;
+               
+               if (dot(z, z) > 4)
+                       break;
+               n++;
+       }
+          
+       // we use a simple cosine palette to determine color:
+       // http://iquilezles.org/www/articles/palettes/palettes.htm         
+       float t = float(n) * 10.0 / float(M);
+       vec3 d = vec3(0.5, 0.5, 0.5);
+       vec3 e = vec3(0.5, 0.5, 0.5);
+       vec3 f = vec3(1.0, 1.0, 1.0);
+       vec3 g = vec3(0.00, 0.33, 0.67);
+  
+       vec4 color = vec4( d + e*cos( 6.28318*(f*t+g) ) ,1.0);
+
+       if (n == M)
+               color = vec4(0, 0, 0, 1);
+  
+       // store the rendered mandelbrot set into a storage buffer:
+       imageData[WIDTH * gl_GlobalInvocationID.y + gl_GlobalInvocationID.x].value = color;
+}
diff --git a/util/glmaths.h b/util/glmaths.h
new file mode 100644 (file)
index 0000000..55bbadf
--- /dev/null
@@ -0,0 +1,137 @@
+
+/*
+  Some basic GL maths routines, in C.
+ */
+
+#ifndef GLMATHS_H
+#define GLMATHS_H
+
+#include <math.h>
+
+static __inline__ void identity4f(float *matrix) {
+       for (int i = 0; i < 16; i++)
+               matrix[i] = 0.0f;
+       for (int i = 0; i < 4; i++)
+               matrix[i * 4 + i] = 1.0f;
+}
+
+static __inline__ float length3f(float * __restrict a) {
+       float sum = 0;
+       for (int i = 0; i < 3; i++)
+               sum += a[i] * a[i];
+       return sqrtf(sum);
+}
+
+static __inline__ void sub3f(float * __restrict c, float * __restrict a, float * __restrict b) {
+       for (int i = 0; i < 3; i++)
+               c[i] = a[i] - b[i];
+}
+
+static __inline__ void norm3f(float * __restrict vec) {
+       float fix = 1.0f / length3f(vec);
+       for (int i = 0; i < 3; i++)
+               vec[i] *= fix;
+}
+
+static __inline__ void cross3f(float * __restrict c, float * __restrict a, float * __restrict b) {
+       c[0] = a[1] * b[2] - a[2] * b[1];
+       c[1] = a[2] * b[0] - a[0] * b[2];
+       c[2] = a[0] * b[1] - a[1] * b[0];
+}
+
+static __inline__ float dot3f(float * __restrict a, float * __restrict b) {
+       return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
+}
+
+static float * mult4x4f(float * __restrict c, float * __restrict b, float * __restrict a) {
+       c[0] = a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12];
+       c[1] = a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13];
+       c[2] = a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14];
+       c[3] = a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15];
+
+       c[4] = a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12];
+       c[5] = a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13];
+       c[6] = a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14];
+       c[7] = a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15];
+
+       c[8] = a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12];
+       c[9] = a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13];
+       c[10] = a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14];
+       c[11] = a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15];
+
+       c[12] = a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12];
+       c[13] = a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13];
+       c[14] = a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14];
+       c[15] = a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15];
+
+       return c;
+}
+
+static __inline__ void lookAt(float *mat, float *eye, float *centre, float *up) {
+       float forward[3], side[3], u[3];
+
+       sub3f(forward, centre, eye);
+       norm3f(forward);
+       cross3f(side, forward, up);
+       norm3f(side);
+       cross3f(u, side, forward);
+
+       mat[0] = side[0];
+       mat[4] = side[1];
+       mat[8] = side[2];
+
+       mat[1] = u[0];
+       mat[5] = u[1];
+       mat[9] = u[2];
+
+       mat[2] = -forward[0];
+       mat[6] = -forward[1];
+       mat[10] = -forward[2];
+
+       mat[12] = -dot3f(side, eye);
+       mat[13] = -dot3f(u, eye);
+       mat[14] = dot3f(forward, eye);
+
+       mat[3] = 0.0f;
+       mat[7] = 0.0f;
+       mat[11] = 0.0f;
+
+       mat[15] = 1.0f;
+}
+
+static __inline__ void frustum(float *mat, float left, float right, float bottom, float top, float znear, float zfar) {
+       float temp, temp2, temp3, temp4;
+
+       temp = 2.0 * znear;
+       temp2 = right - left;
+       temp3 = top - bottom;
+       temp4 = zfar - znear;
+       mat[0] = temp / temp2;
+       mat[1] = 0.0;
+       mat[2] = 0.0;
+       mat[3] = 0.0;
+       mat[4] = 0.0;
+       mat[5] = temp / temp3;
+       mat[6] = 0.0;
+       mat[7] = 0.0;
+       mat[8] = (right + left) / temp2;
+       mat[9] = (top + bottom) / temp3;
+       mat[10] = (-zfar - znear) / temp4;
+       mat[11] = -1.0;
+       mat[12] = 0.0;
+       mat[13] = 0.0;
+       mat[14] = (-temp * zfar) / temp4;
+       mat[15] = 0.0;
+}
+
+static __inline void perspective(float *mat, float fovy, float aspect, float znear, float zfar) {
+       float ymax, xmax;
+
+       ymax = znear * tanf(fovy * 0.5f);
+       xmax = ymax * aspect;
+
+       frustum(mat, -xmax, xmax, -ymax, ymax, znear, zfar);
+}
+
+#endif
+
diff --git a/util/pam.h b/util/pam.h
new file mode 100644 (file)
index 0000000..0a7fd6e
--- /dev/null
@@ -0,0 +1,20 @@
+
+#ifndef PAM_H
+#define PAM_H
+
+/**
+ * Trivial pnm format image output.
+ */
+static void pam_save(const char *name, unsigned int width, unsigned int height, unsigned int depth, const uint8_t *pixels)
+{
+       FILE *fp = fopen(name, "wb");
+
+       if (fp) {
+               fprintf(fp, "P6\n%d\n%d\n255\n", width, height);
+               fwrite(pixels, depth, width*height, fp);
+
+               fclose(fp);
+       }
+}
+
+#endif
diff --git a/util/timing.h b/util/timing.h
new file mode 100644 (file)
index 0000000..bddf1d4
--- /dev/null
@@ -0,0 +1,30 @@
+
+/*
+  Very basic timing utilities.
+ */
+
+#ifndef TIMING_H
+
+#ifdef HAVE_CLOCK_GETTIME
+#include <time.h>
+
+static __inline__ uint64_t time_stamp(void)
+{
+       struct timespec ts;
+       clock_gettime(CLOCK_REALTIME, &ts);
+
+       return ts.tv_nsec + ts.tv_sec * 1000000000ULL;
+}
+
+#define TIME_START uint64_t stamp = time_stamp()
+#define TIME_PRINT printf(" %12.9f %s\n", 1E-9 * (time_stamp() - stamp), __FUNCTION__) 
+#define TIME_ITEM(x) printf(" %12.9f %s\n", 1E-9 * (time_stamp() - stamp), x) 
+
+#else
+
+#define TIME_START do { } while(0)
+#define TIME_PRINT do { } while(0)
+
+#endif
+
+#endif