--- /dev/null
+
+INTRODUCTION
+------------
+
+This is a collection of learning experiments with Vulkan in the C
+programming language.
+
+COMPILING
+---------
+
+Apart from the basic GNU tool suite one needs glslangValidator and
+clsvp. The former is possibly intalled or available as a package but
+see LINKS for further information.
+
+Copy config.make.in to config.make before compiling.
+
+Each example has it's own Makefile and they must be compiled
+from within their respective directories.
+
+EXAMPLES
+---------
+
+cube/
+
+ A basic render-cube demo from https://vulkan.lunarg.com/sdk/home,
+ converted to plain C.
+
+mandebrot-cs/
+mandelbrot-cl/
+
+ One is a mandelbrot generator using GLSL compute shader, the other
+ uses an OpenCL shader compiled via clspv. Because of the simplicity
+ of the data buffers the code is identical apart from which shader it
+ loads.
+
+ By far most of the execution time is spent on the CPU side,
+ converting the float4 array to uchar3 for output.
+
+LINKS
+-----
+
+ * https://www.lunarg.com/vulkan-sdk - vulkan sdk.
+ * https://github.com/google/clspv - OpenCL C to SPIR-V compiler.
+
+LICENSES
+--------
+
+Various permissive licenses, see individual source files.
+
+Stuff in util/ is (intended to be) public domain, although i'm
+unsure of some of the glmaths.h.
+
+
--- /dev/null
+
+# command used to compile .frag, .vert, and .comp files.
+GLSPV=glslangValidator
+# command used to compile .cl files.
+CLSPV=clspv
+
+# On linux, clock_gettime can be used for timing
+CPPFLAGS=-DHAVE_CLOCK_GETTIME -I../util
+
+# compile flags for graphics examples
+present_LDLIBS=-lm -lvulkan -lX11
+present_CFLAGS=-DVK_USE_PLATFORM_XLIB_KHR -O2 -march=native
+
+# compile flags for compute-only examples
+compute_LDLIBS=-lm -lvulkan
+compute_CFLAGS=-O2 -march=native
+
+# Some implicit rules that might need changing based on
+# the configuration above.
+
+# compile a glsl .vert file into a C include file
+%.vert.inc: %.vert
+ echo $<x
+ $(GLSPV) -V -x --vn $$(basename $< .vert)_vs -o $@ $<
+
+# compile a glsl .frag file into a C include file
+%.frag.inc: %.frag
+ glslangValidator -V -x --vn $$(basename $< .frag)_fs -o $@ $<
+
+# compile a glsl .comp file into a C include file
+%.comp.inc: %.comp
+ glslangValidator -V -x --vn $$(basename $< .comp)_cs -o $@ $<
+
+# convert a .cl.spv file into a C include file (uint32_t array)
+%.cl.inc: %.cl.spv
+ echo "const uint32_t $$(basename $< .cl.spv)_cl[] = {" > $@
+ od -A none -X < $< | sed 's/\([0-9a-f]\+\)/0x\1,/g' >> $@
+ echo "};" >> $@
+
+# compile a .cl file into SPIR-V
+%.cl.spv %.cl.csv: %.cl
+ $(CLSPV) -o $*.cl.spv --descriptormap $*.cl.csv $<
--- /dev/null
+
+include ../config.make
+
+LDLIBS=$(present_LDLIBS)
+CFLAGS=$(present_CFLAGS)
+
+all: cube
+cube: cube.o
+cube.o: cube.c cube.vert.inc cube.frag.inc
+
+clean:
+ rm -f *.o cube cube.*.inc
--- /dev/null
+
+typedef struct Vertex Vertex;
+
+struct Vertex {
+ float posX, posY, posZ, posW; // Position data
+ float r, g, b, a; // Color
+};
+
+static const Vertex g_vb_solid_face_colours_Data[] = {
+ // red face
+ {-1, -1, 1, 1.f, 1.f, 0.f, 0.f, 1.f},
+ {-1, 1, 1, 1.f, 1.f, 0.f, 0.f, 1.f},
+ {1, -1, 1, 1.f, 1.f, 0.f, 0.f, 1.f},
+ {1, -1, 1, 1.f, 1.f, 0.f, 0.f, 1.f},
+ {-1, 1, 1, 1.f, 1.f, 0.f, 0.f, 1.f},
+ {1, 1, 1, 1.f, 1.f, 0.f, 0.f, 1.f},
+ // green face
+ {-1, -1, -1, 1.f, 0.f, 1.f, 0.f, 1.f},
+ {1, -1, -1, 1.f, 0.f, 1.f, 0.f, 1.f},
+ {-1, 1, -1, 1.f, 0.f, 1.f, 0.f, 1.f},
+ {-1, 1, -1, 1.f, 0.f, 1.f, 0.f, 1.f},
+ {1, -1, -1, 1.f, 0.f, 1.f, 0.f, 1.f},
+ {1, 1, -1, 1.f, 0.f, 1.f, 0.f, 1.f},
+ // blue face
+ {-1, 1, 1, 1.f, 0.f, 0.f, 1.f, 1.f},
+ {-1, -1, 1, 1.f, 0.f, 0.f, 1.f, 1.f},
+ {-1, 1, -1, 1.f, 0.f, 0.f, 1.f, 1.f},
+ {-1, 1, -1, 1.f, 0.f, 0.f, 1.f, 1.f},
+ {-1, -1, 1, 1.f, 0.f, 0.f, 1.f, 1.f},
+ {-1, -1, -1, 1.f, 0.f, 0.f, 1.f, 1.f},
+ // yellow face
+ {1, 1, 1, 1.f, 1.f, 1.f, 0.f, 1.f},
+ {1, 1, -1, 1.f, 1.f, 1.f, 0.f, 1.f},
+ {1, -1, 1, 1.f, 1.f, 1.f, 0.f, 1.f},
+ {1, -1, 1, 1.f, 1.f, 1.f, 0.f, 1.f},
+ {1, 1, -1, 1.f, 1.f, 1.f, 0.f, 1.f},
+ {1, -1, -1, 1.f, 1.f, 1.f, 0.f, 1.f},
+ // magenta face
+ {1, 1, 1, 1.f, 1.f, 0.f, 1.f, 1.f},
+ {-1, 1, 1, 1.f, 1.f, 0.f, 1.f, 1.f},
+ {1, 1, -1, 1.f, 1.f, 0.f, 1.f, 1.f},
+ {1, 1, -1, 1.f, 1.f, 0.f, 1.f, 1.f},
+ {-1, 1, 1, 1.f, 1.f, 0.f, 1.f, 1.f},
+ {-1, 1, -1, 1.f, 1.f, 0.f, 1.f, 1.f},
+ // cyan face
+ {1, -1, 1, 1.f, 0.f, 1.f, 1.f, 1.f},
+ {1, -1, -1, 1.f, 0.f, 1.f, 1.f, 1.f},
+ {-1, -1, 1, 1.f, 0.f, 1.f, 1.f, 1.f},
+ {-1, -1, 1, 1.f, 0.f, 1.f, 1.f, 1.f},
+ {1, -1, -1, 1.f, 0.f, 1.f, 1.f, 1.f},
+ {-1, -1, -1, 1.f, 0.f, 1.f, 1.f, 1.f},
+};
--- /dev/null
+/*
+ * Vulkan Samples
+ *
+ * Copyright (C) 2015-2016 Valve Corporation
+ * Copyright (C) 2015-2016 LunarG, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ This based on the tutorial in the vulkansdk:
+
+ https://vulkan.lunarg.com/sdk/home
+
+ But it has been made stand-alone and converted to C. C struct
+ initialisers do something - but not much - toward cleaning up the
+ mess.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <vulkan/vulkan.h>
+
+#if defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_XCB_KHR)
+#include <X11/Xutil.h>
+#endif
+
+#include <GL/glu.h>
+#include <math.h>
+#include "glmaths.h"
+
+// These can't be changed
+#define NUM_SAMPLES VK_SAMPLE_COUNT_1_BIT
+#define NUM_DESCRIPTOR_SETS 1
+
+#include "cube.vert.inc"
+#include "cube.frag.inc"
+#include "cube-data.h"
+
+typedef struct mat4x4 {
+ float m[16];
+} mat4x4;
+
+struct state {
+ VkInstance vk;
+
+ VkPhysicalDevice physical;
+ VkPhysicalDeviceMemoryProperties memory_properties;
+ VkPhysicalDeviceFeatures device_features;
+
+ VkCommandPool cmd_pool;
+ VkDescriptorPool desc_pool;
+
+ VkDevice device;
+ VkCommandBuffer cmd;
+ VkSwapchainKHR chain;
+ int device_id;
+ int graphics_queue_index;
+ int present_queue_index;
+
+ VkQueue graphics_queue;
+ VkQueue present_queue;
+
+ int shaderCount;
+ VkShaderModule shader[2];
+
+ VkRenderPass render_pass;
+ VkFramebuffer *framebuffers;
+
+ int width, height;
+ VkSurfaceKHR surface;
+
+ float projection[16];
+ float view[16];
+ float model[16];
+ float clip[16];
+ float mvp[16];
+
+ VkBuffer uniformBuffer;
+ VkDeviceMemory uniformMemory;
+ VkDescriptorBufferInfo uniformInfo;
+
+ VkPipeline pipeline[1];
+
+ VkDescriptorSetLayout desc_layout[NUM_DESCRIPTOR_SETS];
+ VkDescriptorSet desc_set[NUM_DESCRIPTOR_SETS];
+ VkPipelineLayout pipeline_layout;
+
+ VkBuffer vertexBuffer;
+ VkDeviceMemory vertexMemory;
+ VkVertexInputBindingDescription vi_binding;
+ VkVertexInputAttributeDescription vi_attribs[2];
+
+ VkFence drawFence;
+
+ VkSemaphore chainSemaphore;
+ uint32_t chainIndex;
+
+ VkFormat chainImageFormat;
+ uint32_t chainImageCount;
+ VkImage *chainImage;
+ VkImageView *chainImageView;
+
+ VkFormat depthFormat;
+ VkImage depthImage;
+ VkImageView depthView;
+ VkDeviceMemory depthMemory;
+
+#if defined(VK_USE_PLATFORM_XLIB_KHR)
+ Display *display;
+ Window window;
+ Atom wm_delete_window;
+#endif
+};
+
+static void checkFatal(VkResult res, const char *cmd)
+{
+ if (res != VK_SUCCESS) {
+ fprintf(stderr, "%s: %d\n", cmd, res);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static int find_memory_type(VkPhysicalDeviceMemoryProperties *memory, uint32_t typeMask, VkMemoryPropertyFlagBits query)
+{
+ for (int i = 0; i < memory->memoryTypeCount; i++) {
+ if (((1 << i) & typeMask) && (memory->memoryTypes[i].propertyFlags & query) == query)
+ return i;
+ }
+ return -1;
+}
+
+void execute_begin_command_buffer(struct state *g)
+{
+ /* DEPENDS on init_command_buffer() */
+ VkResult res;
+
+ VkCommandBufferBeginInfo cmd_buf_info = {};
+ cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ cmd_buf_info.pNext = NULL;
+ cmd_buf_info.flags = 0;
+ cmd_buf_info.pInheritanceInfo = NULL;
+
+ res = vkBeginCommandBuffer(g->cmd, &cmd_buf_info);
+}
+
+void execute_end_command_buffer(struct state *g)
+{
+ VkResult res;
+
+ res = vkEndCommandBuffer(g->cmd);
+}
+
+#define FENCE_TIMEOUT 100000000
+
+void execute_queue_command_buffer(struct state *g)
+{
+ VkResult res;
+
+ /* Queue the command buffer for execution */
+ const VkCommandBuffer cmd_bufs[] = {g->cmd};
+ VkFenceCreateInfo fenceInfo;
+ VkFence drawFence;
+ fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+ fenceInfo.pNext = NULL;
+ fenceInfo.flags = 0;
+ vkCreateFence(g->device, &fenceInfo, NULL, &drawFence);
+
+ VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ VkSubmitInfo submit_info[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .waitSemaphoreCount = 0,
+ .pWaitSemaphores = NULL,
+ .pWaitDstStageMask = &pipe_stage_flags,
+ .commandBufferCount = 1,
+ .pCommandBuffers = cmd_bufs,
+ .signalSemaphoreCount = 0,
+ .pSignalSemaphores = NULL,
+ }
+ };
+ res = vkQueueSubmit(g->graphics_queue, 1, submit_info, drawFence);
+
+ do {
+ res = vkWaitForFences(g->device, 1, &drawFence, VK_TRUE, FENCE_TIMEOUT);
+ } while (res == VK_TIMEOUT);
+
+ vkDestroyFence(g->device, drawFence, NULL);
+}
+
+static void init_instance(struct state *g)
+{
+ VkResult res;
+ VkApplicationInfo app = {
+ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pApplicationName = "basic",
+ .pEngineName = "none",
+ .apiVersion = VK_API_VERSION_1_0,
+ };
+ const char * const extensions[] = {
+ VK_KHR_SURFACE_EXTENSION_NAME,
+ VK_KHR_XLIB_SURFACE_EXTENSION_NAME
+ };
+ VkInstanceCreateInfo info = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ .pApplicationInfo = &app,
+ .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions),
+ .ppEnabledExtensionNames = extensions,
+ };
+
+ res = vkCreateInstance(&info, NULL, &g->vk);
+ if (res != VK_SUCCESS) {
+ fprintf(stderr, "vkCreateInstance: %d\n", res);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static int clampi(int v, int min, int max)
+{
+ return v < min ? min : v < max ? v : max;
+}
+
+#if defined(VK_USE_PLATFORM_XLIB_KHR)
+static void init_surface(struct state *g)
+{
+ XInitThreads();
+ g->display = XOpenDisplay(NULL);
+ long visualMask = VisualScreenMask;
+ int numberOfVisuals;
+ XVisualInfo vInfoTemplate = {
+ .screen = DefaultScreen(g->display)
+ };
+ XVisualInfo *visualInfo = XGetVisualInfo(g->display, visualMask, &vInfoTemplate, &numberOfVisuals);
+ Colormap colormap = XCreateColormap(g->display, RootWindow(g->display, vInfoTemplate.screen), visualInfo->visual, AllocNone);
+ XSetWindowAttributes windowAttributes = {
+ .colormap = colormap,
+ .background_pixel = 0xFFFFFFFF,
+ .border_pixel = 0,
+ .event_mask = KeyPressMask | KeyReleaseMask | StructureNotifyMask | ExposureMask
+ };
+
+ g->window = XCreateWindow(g->display, RootWindow(g->display, vInfoTemplate.screen),
+ 0, 0, g->width, g->height,
+ 0, visualInfo->depth, InputOutput, visualInfo->visual,
+ CWBackPixel | CWBorderPixel | CWEventMask | CWColormap, &windowAttributes);
+
+ XSelectInput(g->display, g->window, ExposureMask | KeyPressMask);
+ XMapWindow(g->display, g->window);
+ XFlush(g->display);
+ g->wm_delete_window = XInternAtom(g->display, "WM_DELETE_WINDOW", False);
+
+ //
+ VkXlibSurfaceCreateInfoKHR surfaceinfo = {
+ .sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR,
+ .pNext = NULL,
+ .flags = 0,
+ .dpy = g->display,
+ .window = g->window
+ };
+
+ VkResult res;
+
+ res = vkCreateXlibSurfaceKHR(g->vk, &surfaceinfo, NULL, &g->surface);
+ checkFatal(res, "vkCrateXlibSurfaceKHR");
+}
+#else
+#error "Only XLIB is implemented"
+#endif
+
+static void init_device(struct state *g)
+{
+ VkResult res;
+ uint32_t devcount;
+
+ res = vkEnumeratePhysicalDevices(g->vk, &devcount, NULL);
+ VkPhysicalDevice devs[devcount];
+ res = vkEnumeratePhysicalDevices(g->vk, &devcount, devs);
+
+ // Search for device and queue indices
+ int devid = -1;
+ int present_queue = -1;
+ int graphics_queue = -1;
+ for (int i = 0; i < devcount; i++) {
+ uint32_t family_count;
+ vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, NULL);
+ VkQueueFamilyProperties famprops[family_count];
+ vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, famprops);
+
+ for (uint32_t j = 0; j < family_count; j++) {
+ VkBool32 present;
+
+ vkGetPhysicalDeviceSurfaceSupportKHR(devs[i], j, g->surface, &present);
+
+ if (present && present_queue == -1)
+ present_queue = j;
+ if ((famprops[j].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) {
+ graphics_queue = j;
+ if (present) {
+ present_queue = j;
+ break;
+ }
+ }
+ }
+ if (present_queue != -1 && graphics_queue != -1) {
+ devid = i;
+ break;
+ }
+ }
+
+ if (devid == -1) {
+ fprintf(stderr, "Unable to find suitable device\n");
+ exit(1);
+ }
+ g->device_id = devid;
+ g->present_queue_index = present_queue;
+ g->graphics_queue_index = graphics_queue;
+
+ vkGetPhysicalDeviceMemoryProperties(devs[devid], &g->memory_properties);
+ vkGetPhysicalDeviceFeatures(devs[devid], &g->device_features);
+
+ /* ************************************************************** */
+
+ float qpri[] = {0.0f};
+ VkDeviceQueueCreateInfo qinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .queueCount = 1,
+ .pQueuePriorities = qpri,
+ .queueFamilyIndex = g->graphics_queue_index,
+ };
+
+ const char * const extensions[] = {
+ VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+ };
+
+ VkPhysicalDeviceFeatures features = {
+ .depthClamp = VK_TRUE,
+ };
+ VkDeviceCreateInfo devinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .queueCreateInfoCount = 1,
+ .pQueueCreateInfos = &qinfo,
+ .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions),
+ .ppEnabledExtensionNames = extensions,
+ .pEnabledFeatures = &features,
+ };
+
+ res = vkCreateDevice(devs[devid], &devinfo, NULL, &g->device);
+ checkFatal(res, "vkCreateDevice");
+
+ /* ************************************************************** */
+ VkFormat format;
+ uint32_t formatCount;
+ res = vkGetPhysicalDeviceSurfaceFormatsKHR(devs[devid], g->surface, &formatCount, NULL);
+ VkSurfaceFormatKHR surfFormats[formatCount];
+ res = vkGetPhysicalDeviceSurfaceFormatsKHR(devs[devid], g->surface, &formatCount, surfFormats);
+ // If the format list includes just one entry of VK_FORMAT_UNDEFINED,
+ // the surface has no preferred format. Otherwise, at least one
+ // supported format will be returned.
+ if (formatCount == 1 && surfFormats[0].format == VK_FORMAT_UNDEFINED) {
+ format = VK_FORMAT_B8G8R8A8_UNORM;
+ } else {
+ format = surfFormats[0].format;
+ }
+
+ VkSurfaceCapabilitiesKHR surfCapabilities;
+
+ res = vkGetPhysicalDeviceSurfaceCapabilitiesKHR(devs[devid], g->surface, &surfCapabilities);
+ checkFatal(res, "vkGetPhysicalDeviceSurfaceCapabilitiesKHR");
+
+ uint32_t presentModeCount;
+ res = vkGetPhysicalDeviceSurfacePresentModesKHR(devs[devid], g->surface, &presentModeCount, NULL);
+ VkPresentModeKHR presentModes[presentModeCount];
+ res = vkGetPhysicalDeviceSurfacePresentModesKHR(devs[devid], g->surface, &presentModeCount, presentModes);
+ checkFatal(res, "vkGetPhysicalDeviceSurfacePresentModesKHR");
+
+ VkExtent2D swapchainExtent;
+ // width and height are either both 0xFFFFFFFF, or both not 0xFFFFFFFF.
+ if (surfCapabilities.currentExtent.width == 0xFFFFFFFF) {
+ // If the surface size is undefined, the size is set to
+ // the size of the images requested.
+ swapchainExtent.width = clampi(g->width, surfCapabilities.minImageExtent.width, surfCapabilities.maxImageExtent.width);
+ swapchainExtent.height = clampi(g->height, surfCapabilities.minImageExtent.height, surfCapabilities.maxImageExtent.height);
+ } else {
+ // If the surface size is defined, the swap chain size must match
+ swapchainExtent = surfCapabilities.currentExtent;
+ }
+
+ VkCompositeAlphaFlagBitsKHR compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+ VkCompositeAlphaFlagBitsKHR compositeAlphaFlags[4] = {
+ VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
+ VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR,
+ VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR,
+ VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR,
+ };
+ for (uint32_t i = 0; i < sizeof(compositeAlphaFlags) / sizeof(compositeAlphaFlags[0]); i++) {
+ if (surfCapabilities.supportedCompositeAlpha & compositeAlphaFlags[i]) {
+ compositeAlpha = compositeAlphaFlags[i];
+ break;
+ }
+ }
+
+ VkSwapchainCreateInfoKHR chaininfo = {
+ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
+ .surface = g->surface,
+ .minImageCount = surfCapabilities.minImageCount,
+ .imageFormat = format,
+ .imageExtent = swapchainExtent,
+ .preTransform = (surfCapabilities.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR)
+ ? VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR : surfCapabilities.currentTransform,
+ .compositeAlpha = compositeAlpha,
+ .imageArrayLayers = 1,
+ .presentMode = VK_PRESENT_MODE_FIFO_KHR,
+ .clipped = VK_TRUE,
+ .imageColorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
+ .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ // assumes queues are same.
+ .queueFamilyIndexCount = 0,
+ };
+
+ res = vkCreateSwapchainKHR(g->device, &chaininfo, NULL, &g->chain);
+ checkFatal(res, "vkCreateSwapchainKHR");
+
+ res = vkGetSwapchainImagesKHR(g->device, g->chain, &g->chainImageCount, NULL);
+ checkFatal(res, "vkGetSwapchainImagesKHR");
+
+ g->chainImage = malloc(sizeof(*g->chainImage) * g->chainImageCount);
+ g->chainImageView = malloc(sizeof(*g->chainImage) * g->chainImageCount);
+
+ res = vkGetSwapchainImagesKHR(g->device, g->chain, &g->chainImageCount, g->chainImage);
+
+ VkImageViewCreateInfo viewinfo = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = format,
+ .components.r = VK_COMPONENT_SWIZZLE_R,
+ .components.g = VK_COMPONENT_SWIZZLE_G,
+ .components.b = VK_COMPONENT_SWIZZLE_B,
+ .components.a = VK_COMPONENT_SWIZZLE_A,
+ .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ };
+ for (uint32_t i = 0; i < g->chainImageCount; i++) {
+ viewinfo.image = g->chainImage[i];
+
+ res = vkCreateImageView(g->device, &viewinfo, NULL, &g->chainImageView[i]);
+ checkFatal(res, "vkCreateImageView");
+ }
+
+ g->chainImageFormat = format;
+}
+
+static void init_device_queue(struct state *g)
+{
+ vkGetDeviceQueue(g->device, g->graphics_queue_index, 0, &g->graphics_queue);
+ if (g->graphics_queue_index == g->present_queue_index) {
+ g->present_queue = g->graphics_queue;
+ } else {
+ vkGetDeviceQueue(g->device, g->present_queue_index, 0, &g->present_queue);
+ }
+}
+
+static void init_command(struct state *g)
+{
+ VkResult res;
+ VkCommandPoolCreateInfo poolinfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .queueFamilyIndex = g->graphics_queue_index,
+ };
+
+ res = vkCreateCommandPool(g->device, &poolinfo, NULL, &g->cmd_pool);
+ checkFatal(res, "vkCreateCommandPool");
+
+ VkCommandBufferAllocateInfo cmdinfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .commandPool = g->cmd_pool,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = 1,
+ };
+
+ res = vkAllocateCommandBuffers(g->device, &cmdinfo, &g->cmd);
+ checkFatal(res, "vkCAllocateCommandBuffers");
+}
+
+static void init_depth(struct state *g)
+{
+ VkResult res;
+ VkMemoryRequirements req;
+ VkImageCreateInfo imageinfo = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = NULL,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = VK_FORMAT_D16_UNORM,
+ .extent.width = g->width,
+ .extent.height = g->height,
+ .extent.depth = 1,
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = NUM_SAMPLES,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = NULL,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .flags = 0,
+ };
+ res = vkCreateImage(g->device, &imageinfo, NULL, &g->depthImage);
+ checkFatal(res, "vkCreateImage");
+
+ vkGetImageMemoryRequirements(g->device, g->depthImage, &req);
+ VkMemoryAllocateInfo alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .allocationSize = req.size,
+ .memoryTypeIndex = find_memory_type(&g->memory_properties, req.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
+ };
+ res = vkAllocateMemory(g->device, &alloc, NULL, &g->depthMemory);
+ checkFatal(res, "vkAllocateMemory");
+
+ res = vkBindImageMemory(g->device, g->depthImage, g->depthMemory, 0);
+ checkFatal(res, "vkBindImageMemory");
+
+ VkImageViewCreateInfo view_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = g->depthImage,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = VK_FORMAT_D16_UNORM,
+ .components =
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A},
+ .subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ };
+ res = vkCreateImageView(g->device, &view_info, NULL, &g->depthView);
+ checkFatal(res, "vkCreateImageView");
+
+ g->depthFormat = imageinfo.format;
+}
+
+static void init_uniform(struct state *g)
+{
+ VkResult res;
+ VkBufferCreateInfo buf_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ .size = sizeof(g->mvp),
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE
+ };
+ res = vkCreateBuffer(g->device, &buf_info, NULL, &g->uniformBuffer);
+ checkFatal(res, "vkCreateBuffer (uniform)");
+
+ VkMemoryRequirements req;
+ vkGetBufferMemoryRequirements(g->device, g->uniformBuffer, &req);
+
+ VkMemoryAllocateInfo alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .allocationSize = req.size,
+ .memoryTypeIndex = find_memory_type(&g->memory_properties, req.memoryTypeBits,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
+ };
+ res = vkAllocateMemory(g->device, &alloc, NULL, &g->uniformMemory);
+ checkFatal(res, "vkAllocateMemory (uniform)");
+
+ void *mem;
+ res = vkMapMemory(g->device, g->uniformMemory, 0, req.size, 0, &mem);
+ checkFatal(res, "vkMapMemory (uniform)");
+ memcpy(mem, g->mvp, sizeof(g->mvp));
+ vkUnmapMemory(g->device, g->uniformMemory);
+
+ res = vkBindBufferMemory(g->device, g->uniformBuffer, g->uniformMemory, 0);
+ checkFatal(res, "vkBindBufferMemory (uniform)");
+
+ g->uniformInfo.buffer = g->uniformBuffer;
+ g->uniformInfo.offset = 0;
+ g->uniformInfo.range = sizeof(g->mvp);
+}
+
+static void init_descriptor(struct state *g)
+{
+ VkResult res;
+ VkDescriptorSetLayoutBinding layout_binding = {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+ };
+ VkDescriptorSetLayoutCreateInfo descriptor_layout = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = 1,
+ .pBindings = &layout_binding,
+ };
+ res = vkCreateDescriptorSetLayout(g->device, &descriptor_layout, NULL, g->desc_layout);
+ checkFatal(res, "vkCreateDescriptorSetLayout");
+
+ VkPipelineLayoutCreateInfo pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = NULL,
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = NULL,
+ .setLayoutCount = 1,
+ .pSetLayouts = g->desc_layout,
+ };
+
+ res = vkCreatePipelineLayout(g->device, &pipeline_info, NULL, &g->pipeline_layout);
+ checkFatal(res, "vkCreatePipelineLayout");
+
+ VkDescriptorPoolSize type_count[1];
+ type_count[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+ type_count[0].descriptorCount = 1;
+
+ VkDescriptorPoolCreateInfo descriptor_pool = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .maxSets = 1,
+ .poolSizeCount = 1,
+ .pPoolSizes = type_count,
+ };
+
+ res = vkCreateDescriptorPool(g->device, &descriptor_pool, NULL, &g->desc_pool);
+ checkFatal(res, "vkCreateDescriptorPool");
+
+ VkDescriptorSetAllocateInfo alloc_info[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = g->desc_pool,
+ .descriptorSetCount = NUM_DESCRIPTOR_SETS,
+ .pSetLayouts = g->desc_layout,
+ },
+ };
+ res = vkAllocateDescriptorSets(g->device, alloc_info, g->desc_set);
+ checkFatal(res, "vkAllocateDescriptorSets");
+
+ VkWriteDescriptorSet writes[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = g->desc_set[0],
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .pBufferInfo = &g->uniformInfo,
+ .dstArrayElement = 0,
+ .dstBinding = 0,
+ }
+ };
+
+ vkUpdateDescriptorSets(g->device, 1, writes, 0, NULL);
+
+}
+
+static void init_render(struct state *g)
+{
+ VkResult res;
+ VkAttachmentDescription attachments[] = {
+ {
+ .format = g->chainImageFormat,
+ .samples = NUM_SAMPLES,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+ .flags = 0,
+ },
+ {
+ .format = g->depthFormat,
+ .samples = NUM_SAMPLES,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+ .storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ .flags = 0,
+ },
+ };
+ VkAttachmentReference color_reference = {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ };
+
+ VkAttachmentReference depth_reference = {
+ .attachment = 1,
+ .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ };
+ VkSubpassDescription subpass = {
+ .flags = 0,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &color_reference,
+ .pDepthStencilAttachment = &depth_reference,
+ };
+ VkRenderPassCreateInfo rp_info = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = sizeof(attachments) / sizeof(attachments[0]),
+ .pAttachments = attachments,
+ .subpassCount = 1,
+ .pSubpasses = &subpass,
+ };
+ res = vkCreateRenderPass(g->device, &rp_info, NULL, &g->render_pass);
+ checkFatal(res, "vkCreateRenderPass");
+}
+
+static void init_framebuffer(struct state *g)
+{
+ VkResult res;
+ VkImageView attachments[] = {
+ NULL,
+ g->depthView,
+ };
+
+ VkFramebufferCreateInfo fb_info = {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .renderPass = g->render_pass,
+ .attachmentCount = 2,
+ .pAttachments = attachments,
+ .width = g->width,
+ .height = g->height,
+ .layers = 1,
+ };
+
+ g->framebuffers = malloc(sizeof(*g->framebuffers) * g->chainImageCount);
+ for (int i = 0; i < g->chainImageCount; i++) {
+ attachments[0] = g->chainImageView[i];
+ res = vkCreateFramebuffer(g->device, &fb_info, NULL, &g->framebuffers[i]);
+ }
+}
+
+static void init_vertexbuffer(struct state *g)
+{
+ // mostly same as uniform
+ VkResult res;
+ const void *data = g_vb_solid_face_colours_Data;
+ size_t dataSize = sizeof(g_vb_solid_face_colours_Data);
+ VkBufferCreateInfo buf_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ .size = dataSize,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE
+ };
+ res = vkCreateBuffer(g->device, &buf_info, NULL, &g->vertexBuffer);
+ checkFatal(res, "vkCreateBuffer (vertex)");
+
+ VkMemoryRequirements req;
+ vkGetBufferMemoryRequirements(g->device, g->vertexBuffer, &req);
+
+ VkMemoryAllocateInfo alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .allocationSize = req.size,
+ .memoryTypeIndex = find_memory_type(&g->memory_properties, req.memoryTypeBits,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
+ };
+ res = vkAllocateMemory(g->device, &alloc, NULL, &g->vertexMemory);
+ checkFatal(res, "vkAllocateMemory (vertex)");
+
+ void *mem;
+ res = vkMapMemory(g->device, g->vertexMemory, 0, req.size, 0, &mem);
+ checkFatal(res, "vkMapMemory (vertex)");
+ memcpy(mem, data, dataSize);
+ vkUnmapMemory(g->device, g->vertexMemory);
+
+ res = vkBindBufferMemory(g->device, g->vertexBuffer, g->vertexMemory, 0);
+ checkFatal(res, "vkBindBufferMemory (vertex)");
+
+ /* ***************************************** */
+ g->vi_binding.binding = 0;
+ g->vi_binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
+ g->vi_binding.stride = sizeof(Vertex);
+
+ g->vi_attribs[0].binding = 0;
+ g->vi_attribs[0].location = 0;
+ g->vi_attribs[0].format = VK_FORMAT_R32G32B32A32_SFLOAT;
+ g->vi_attribs[0].offset = 0;
+ g->vi_attribs[1].binding = 0;
+ g->vi_attribs[1].location = 1;
+ g->vi_attribs[1].format = VK_FORMAT_R32G32B32A32_SFLOAT;
+ g->vi_attribs[1].offset = 16;
+}
+
+static void init_pipeline(struct state *g)
+{
+ VkResult res;
+ VkDynamicState dynamicStateEnables[VK_DYNAMIC_STATE_RANGE_SIZE] = {};
+
+ VkPipelineDynamicStateCreateInfo dynamicState = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .pDynamicStates = dynamicStateEnables,
+ .dynamicStateCount = 0,
+ };
+
+ VkPipelineVertexInputStateCreateInfo vi = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .flags = 0,
+ .vertexBindingDescriptionCount = 1,
+ .pVertexBindingDescriptions = &g->vi_binding,
+ .vertexAttributeDescriptionCount = 2,
+ .pVertexAttributeDescriptions = g->vi_attribs
+ };
+
+ VkPipelineInputAssemblyStateCreateInfo ia = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .flags = 0,
+ .primitiveRestartEnable = VK_FALSE,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+ };
+
+ VkPipelineRasterizationStateCreateInfo rs = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .flags = 0,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_BACK_BIT,
+ .frontFace = VK_FRONT_FACE_CLOCKWISE,
+ .depthClampEnable = VK_TRUE,
+ .rasterizerDiscardEnable = VK_FALSE,
+ .depthBiasEnable = VK_FALSE,
+ .depthBiasConstantFactor = 0,
+ .depthBiasClamp = 0,
+ .depthBiasSlopeFactor = 0,
+ .lineWidth = 1.0f,
+ };
+
+ VkPipelineColorBlendAttachmentState att_state[] = {
+ {
+ .colorWriteMask = 0xf,
+ .blendEnable = VK_FALSE,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ }
+ };
+
+ VkPipelineColorBlendStateCreateInfo cb = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .flags = 0,
+ .attachmentCount = 1,
+ .pAttachments = att_state,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_NO_OP,
+ .blendConstants[0] = 1.0f,
+ .blendConstants[1] = 1.0f,
+ .blendConstants[2] = 1.0f,
+ .blendConstants[3] = 1.0f,
+ };
+
+ VkPipelineViewportStateCreateInfo vp = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .flags = 0,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ };
+ dynamicStateEnables[dynamicState.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT;
+ dynamicStateEnables[dynamicState.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR;
+
+ VkPipelineDepthStencilStateCreateInfo ds = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .flags = 0,
+ .depthTestEnable = VK_TRUE,
+ .depthWriteEnable = VK_TRUE,
+ .depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL,
+ .depthBoundsTestEnable = VK_FALSE,
+ .minDepthBounds = 0,
+ .maxDepthBounds = 0,
+ .stencilTestEnable = VK_FALSE,
+ .back.failOp = VK_STENCIL_OP_KEEP,
+ .back.passOp = VK_STENCIL_OP_KEEP,
+ .back.compareOp = VK_COMPARE_OP_ALWAYS,
+ .back.compareMask = 0,
+ .back.reference = 0,
+ .back.depthFailOp = VK_STENCIL_OP_KEEP,
+ .back.writeMask = 0,
+ .front.failOp = VK_STENCIL_OP_KEEP,
+ .front.passOp = VK_STENCIL_OP_KEEP,
+ .front.compareOp = VK_COMPARE_OP_ALWAYS,
+ .front.compareMask = 0,
+ .front.reference = 0,
+ .front.depthFailOp = VK_STENCIL_OP_KEEP,
+ .front.writeMask = 0,
+ };
+
+ VkPipelineMultisampleStateCreateInfo ms = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .flags = 0,
+ .pSampleMask = NULL,
+ .rasterizationSamples = NUM_SAMPLES,
+ .sampleShadingEnable = VK_FALSE,
+ .alphaToCoverageEnable = VK_FALSE,
+ .alphaToOneEnable = VK_FALSE,
+ .minSampleShading = 0.0,
+ };
+
+ {
+ VkShaderModuleCreateInfo vsInfo = {
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .codeSize = sizeof(cube_vs),
+ .pCode = cube_vs,
+ };
+ VkShaderModuleCreateInfo fsInfo = {
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .codeSize = sizeof(cube_fs),
+ .pCode = cube_fs,
+ };
+
+ res = vkCreateShaderModule(g->device, &vsInfo, NULL, &g->shader[0]);
+ checkFatal(res, "vkCreateShaderModule (vs)");
+ res = vkCreateShaderModule(g->device, &fsInfo, NULL, &g->shader[1]);
+ checkFatal(res, "vkCreateShaderModule (fs)");
+ g->shaderCount = 2;
+ }
+
+ VkPipelineShaderStageCreateInfo shaderStages[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .pName = "main",
+ .module = g->shader[0],
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pName = "main",
+ .module = g->shader[1],
+ }
+ };
+
+ VkGraphicsPipelineCreateInfo pipeline = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .layout = g->pipeline_layout,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ .basePipelineIndex = 0,
+ .flags = 0,
+ .pVertexInputState = &vi,
+ .pInputAssemblyState = &ia,
+ .pRasterizationState = &rs,
+ .pColorBlendState = &cb,
+ .pTessellationState = NULL,
+ .pMultisampleState = &ms,
+ .pDynamicState = &dynamicState,
+ .pViewportState = &vp,
+ .pDepthStencilState = &ds,
+ .pStages = shaderStages,
+ .stageCount = 2,
+ .renderPass = g->render_pass,
+ .subpass = 0,
+ };
+
+ res = vkCreateGraphicsPipelines(g->device, NULL, 1, &pipeline, NULL, g->pipeline);
+ checkFatal(res, "vkCreateGraphicsPipeline");
+
+ VkSemaphoreCreateInfo seminfo = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ };
+ res = vkCreateSemaphore(g->device, &seminfo, NULL, &g->chainSemaphore);
+ checkFatal(res, "vkCreateSemaphore");
+
+ VkFenceCreateInfo fenceInfo = {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO
+ };
+ res = vkCreateFence(g->device, &fenceInfo, NULL, &g->drawFence);
+
+}
+
+void cmd_viewport(struct state *g)
+{
+ VkViewport viewport = {
+ .x = 0,
+ .y = 0,
+ .width = g->width,
+ .height = g->height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f,
+ };
+ vkCmdSetViewport(g->cmd, 0, 1, &viewport);
+}
+
+void cmd_scissors(struct state *g)
+{
+ VkRect2D scissor = {
+ .offset.x = 0,
+ .offset.y = 0,
+ .extent.width = g->width,
+ .extent.height = g->height
+ };
+ vkCmdSetScissor(g->cmd, 0, 1, &scissor);
+}
+
+static void cmd_paint(struct state *g)
+{
+ VkResult res;
+
+ res = vkAcquireNextImageKHR(g->device, g->chain, UINT64_MAX, g->chainSemaphore, VK_NULL_HANDLE, &g->chainIndex);
+ checkFatal(res, "vkAcquireNextImageKHR");
+
+ const VkDeviceSize offsets[1] = {0};
+
+ VkClearValue clear_values[] = {
+ {
+ .color.float32[0] = 0.2f,
+ .color.float32[1] = 0.2f,
+ .color.float32[2] = 0.2f,
+ .color.float32[3] = 0.2f
+ },
+ {
+ .depthStencil.depth = 1.0f,
+ .depthStencil.stencil = 0
+ }
+ };
+ VkRenderPassBeginInfo rp_begin = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = g->render_pass,
+ .framebuffer = g->framebuffers[g->chainIndex],
+ .renderArea.offset.x = 0,
+ .renderArea.offset.y = 0,
+ .renderArea.extent.width = g->width,
+ .renderArea.extent.height = g->height,
+ .clearValueCount = 2,
+ .pClearValues = clear_values,
+ };
+
+ vkCmdBeginRenderPass(g->cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
+
+ vkCmdBindPipeline(g->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, g->pipeline[0]);
+ vkCmdBindDescriptorSets(g->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, g->pipeline_layout, 0, NUM_DESCRIPTOR_SETS, g->desc_set, 0, NULL);
+ vkCmdBindVertexBuffers(g->cmd, 0, 1, &g->vertexBuffer, offsets);
+
+ cmd_viewport(g);
+ cmd_scissors(g);
+
+ vkCmdDraw(g->cmd, 12 * 3, 1, 0, 0);
+ vkCmdEndRenderPass(g->cmd);
+
+ res = vkEndCommandBuffer(g->cmd);
+ checkFatal(res, "vkEndCommandBuffer");
+
+ const VkCommandBuffer cmd_bufs[] = {g->cmd};
+
+ VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ VkSubmitInfo submit_info[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .waitSemaphoreCount = 1,
+ .pWaitSemaphores = &g->chainSemaphore,
+ .pWaitDstStageMask = &pipe_stage_flags,
+ .commandBufferCount = 1,
+ .pCommandBuffers = cmd_bufs,
+ .signalSemaphoreCount = 0,
+ .pSignalSemaphores = NULL,
+ }
+ };
+ /* Queue the command buffer for execution */
+ vkResetFences(g->device, 1, &g->drawFence);
+
+ res = vkQueueSubmit(g->graphics_queue, 1, submit_info, g->drawFence);
+
+ // hmm, why is this needed, surely it can handle this itself?
+
+ /* Make sure command buffer is finished before presenting */
+ do {
+ res = vkWaitForFences(g->device, 1, &g->drawFence, VK_TRUE, FENCE_TIMEOUT);
+ } while (res == VK_TIMEOUT);
+
+ /* Now present the image in the window */
+ VkPresentInfoKHR present = {
+ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+ .swapchainCount = 1,
+ .pSwapchains = &g->chain,
+ .pImageIndices = &g->chainIndex,
+ .pWaitSemaphores = NULL,
+ .waitSemaphoreCount = 0,
+ .pResults = NULL,
+ };
+
+ res = vkQueuePresentKHR(g->present_queue, &present);
+}
+
+static void shutdown(struct state *g)
+{
+ vkDestroyFence(g->device, g->drawFence, NULL);
+ vkDestroySemaphore(g->device, g->chainSemaphore, NULL);
+
+ vkDestroyPipeline(g->device, g->pipeline[0], NULL);
+ for (int i = 0; i < g->shaderCount; i++)
+ vkDestroyShaderModule(g->device, g->shader[i], NULL);
+
+ vkFreeMemory(g->device, g->vertexMemory, NULL);
+ vkDestroyBuffer(g->device, g->vertexBuffer, NULL);
+
+ for (int i = 0; i < g->chainImageCount; i++)
+ vkDestroyFramebuffer(g->device, g->framebuffers[i], NULL);
+
+ vkDestroyRenderPass(g->device, g->render_pass, NULL);
+
+ vkDestroyDescriptorPool(g->device, g->desc_pool, NULL);
+ vkDestroyPipelineLayout(g->device, g->pipeline_layout, NULL);
+ vkDestroyDescriptorSetLayout(g->device, g->desc_layout[0], NULL);
+
+ vkFreeMemory(g->device, g->uniformMemory, NULL);
+
+ vkDestroyImageView(g->device, g->depthView, NULL);
+ vkFreeMemory(g->device, g->depthMemory, NULL);
+ vkDestroyImage(g->device, g->depthImage, NULL);
+
+ vkDestroyBuffer(g->device, g->uniformBuffer, NULL);
+
+ for (int i = 0; i < g->chainImageCount; i++)
+ vkDestroyImageView(g->device, g->chainImageView[i], NULL);
+
+ vkDestroySwapchainKHR(g->device, g->chain, NULL);
+
+ vkDestroyCommandPool(g->device, g->cmd_pool, NULL);
+ vkDestroyDevice(g->device, NULL);
+
+ vkDestroySurfaceKHR(g->vk, g->surface, NULL);
+
+ vkDestroyInstance(g->vk, NULL);
+}
+
+int main(int argc, char** argv)
+{
+ struct state state = {
+ .width = 800,
+ .height = 800,
+ .clip =
+ {
+ 1.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, -1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.5f, 0.0f,
+ 0.0f, 0.0f, 0.5f, 1.0f
+ }
+ };
+ float eye[3] = {-5, 3, -10};
+ float centre[3] = {0, 0, 0};
+ float up[3] = {0, -1, 0};
+ float t0[16], t1[16];
+
+ perspective(state.projection, M_PI_4, 1.0f, 0.1f, 100.0f);
+ lookAt(state.view, eye, centre, up);
+ identity4f(state.model);
+ mult4x4f(t0, state.clip, state.projection);
+ mult4x4f(t1, t0, state.view);
+ mult4x4f(state.mvp, t1, state.model);
+
+ init_instance(&state);
+ init_surface(&state);
+
+ init_device(&state);
+ init_device_queue(&state);
+ init_command(&state);
+
+ init_depth(&state);
+ init_uniform(&state);
+ init_descriptor(&state);
+ init_render(&state);
+ init_framebuffer(&state);
+ init_vertexbuffer(&state);
+ init_pipeline(&state);
+
+ execute_begin_command_buffer(&state);
+
+ cmd_paint(&state);
+
+ printf("behold the prize!\n");
+ sleep(2);
+
+ shutdown(&state);
+
+ return(EXIT_SUCCESS);
+}
--- /dev/null
+#version 400
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_ARB_shading_language_420pack : enable
+layout (location = 0) in vec4 color;
+layout (location = 0) out vec4 outColor;
+void main() {
+ outColor = color;
+}
--- /dev/null
+#version 400
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_ARB_shading_language_420pack : enable
+layout (std140, binding = 0) uniform bufferVals {
+ mat4 mvp;
+} data;
+layout (location = 0) in vec4 pos;
+layout (location = 1) in vec4 inColor;
+layout (location = 0) out vec4 outColor;
+
+void main() {
+ outColor = inColor;
+ gl_Position = data.mvp * pos;
+}
--- /dev/null
+
+include ../config.make
+
+LDLIBS=$(compute_LDLIBS)
+CFLAGS=$(compute_CFLAGS)
+
+all: mandelbrot
+mandelbrot: mandelbrot.o
+mandelbrot.o: mandelbrot.c mandelbrot.cl.inc
+
+clean:
+ rm -f *.o mandelbrot mandelbrot.cl.inc mandelbrot.cl.csv
--- /dev/null
+ /*
+The MIT License (MIT)
+
+Copyright (C) 2017 Eric Arnebäck
+Copyright (C) 2019 Michael Zucchi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+ */
+
+/*
+ * This is a C conversion of this:
+ * https://github.com/Erkaman/vulkan_minimal_compute
+ *
+ * In addition it's been made completely stand-alone
+ * and simplified further. C struct initialisation
+ * syntax is used heavily to reduce clutter.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <vulkan/vulkan.h>
+#include <string.h>
+
+// These 4 defines must match compute shader. The local work size
+// must be a power of 2. They can be other values (up to the device
+// limits) but they affect concurrency and efficiency of the gpu.
+#define WIDTH (1920*2)
+#define HEIGHT (1200*2)
+#define LWS_X 8
+#define LWS_Y 8
+
+// The compiled shader (SPIR-V binary)
+#include "mandelbrot.cl.inc"
+#define mandelbrot_entry "mandelbrot"
+
+#define ARRAY_SIZEOF(a) (sizeof(a)/sizeof(*a))
+
+struct Pixel {
+ float r, g, b, a;
+};
+
+struct state {
+ VkInstance instance;
+ VkPhysicalDevice physicalDevice;
+
+ VkDevice device;
+ VkQueue computeQueue;
+
+ VkDeviceSize dstBufferSize;
+ VkBuffer dstBuffer;
+ VkDeviceMemory dstMemory;
+
+ VkDescriptorSetLayout descriptorSetLayout;
+ VkDescriptorPool descriptorPool;
+ VkDescriptorSet descriptorSets[1];
+
+ VkShaderModule mandelbrotShader;
+ VkPipelineLayout pipelineLayout;
+ VkPipeline computePipeline[1];
+
+ VkCommandPool commandPool;
+ VkCommandBuffer commandBuffers[1];
+
+ uint32_t computeQueueIndex;
+ VkPhysicalDeviceMemoryProperties deviceMemoryProperties;
+};
+
+static void checkFatal(VkResult res, const char *cmd)
+{
+ if (res != VK_SUCCESS) {
+ fprintf(stderr, "%s: %d\n", cmd, res);
+ exit(EXIT_FAILURE);
+ }
+}
+
+/**
+ * This finds the memory type index for the memory on a specific device.
+ */
+static int find_memory_type(VkPhysicalDeviceMemoryProperties *memory, uint32_t typeMask, VkMemoryPropertyFlagBits query)
+{
+ for (int i = 0; i < memory->memoryTypeCount; i++) {
+ if (((1 << i) & typeMask) && (memory->memoryTypes[i].propertyFlags & query) == query)
+ return i;
+ }
+ return -1;
+}
+
+/**
+ * Round up to next nearest value of step size. Step must be a power of 2.
+ */
+static uint32_t round_up(uint32_t v, uint32_t step)
+{
+ return(v + step - 1)&~(step - 1);
+}
+
+/**
+ * Create vulkan instance.
+ *
+ * Compute-only requires no extensions so this is simple.
+ *
+ * To turn on debugging layers:
+ * export VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_standard_validation
+ */
+static void init_instance(struct state *g)
+{
+ VkResult res;
+ VkApplicationInfo app = {
+ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pApplicationName = "mandelbrot",
+ .pEngineName = "none",
+ .apiVersion = VK_API_VERSION_1_0,
+ };
+ const char * const extensions[] = {
+ };
+ VkInstanceCreateInfo info = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ .pApplicationInfo = &app,
+ .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions),
+ .ppEnabledExtensionNames = extensions,
+ };
+
+ res = vkCreateInstance(&info, NULL, &g->instance);
+ checkFatal(res, "vkCreateInstance");
+}
+
+/**
+ * This finds a suitable device and queue family.
+ *
+ * In this case it is a device that supports a compute queue. It
+ * preferentially looks for a non-graphics compute queue.
+ *
+ * It could also make use of:
+ * VkPhysicalDeviceProperties props;
+ * vkGetPhysicalDeviceProperties(devs[i], &props);
+ */
+static void init_device(struct state *g)
+{
+ VkResult res;
+ uint32_t devcount;
+
+ res = vkEnumeratePhysicalDevices(g->instance, &devcount, NULL);
+ VkPhysicalDevice devs[devcount];
+ res = vkEnumeratePhysicalDevices(g->instance, &devcount, devs);
+
+ int best = 0;
+ int devid = -1;
+ int queueid = -1;
+
+ for (int i = 0; i < devcount; i++) {
+ uint32_t family_count;
+
+ vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, NULL);
+ VkQueueFamilyProperties famprops[family_count];
+ vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, famprops);
+
+ for (uint32_t j = 0; j < family_count; j++) {
+ int score = 0;
+
+ if ((famprops[j].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
+ score += 1;
+ if ((famprops[j].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0)
+ score += 1;
+
+ if (score > best) {
+ score = best;
+ devid = i;
+ queueid = j;
+ }
+ }
+ }
+
+ if (devid == -1)
+ checkFatal(VK_ERROR_FEATURE_NOT_PRESENT, "init_device");
+
+ g->physicalDevice = devs[devid];
+ g->computeQueueIndex = queueid;
+
+ float qpri[] = {0.0f};
+ VkDeviceQueueCreateInfo qinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .queueCount = 1,
+ .pQueuePriorities = qpri, // Note: cannot be null
+ .queueFamilyIndex = g->computeQueueIndex,
+ };
+ VkDeviceCreateInfo devinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .queueCreateInfoCount = 1,
+ .pQueueCreateInfos = &qinfo,
+ };
+
+ res = vkCreateDevice(g->physicalDevice, &devinfo, NULL, &g->device);
+ checkFatal(res, "vkCreateDevice");
+
+ /* These values are cached for convenience */
+ vkGetPhysicalDeviceMemoryProperties(g->physicalDevice, &g->deviceMemoryProperties);
+ vkGetDeviceQueue(g->device, g->computeQueueIndex, 0, &g->computeQueue);
+}
+
+/**
+ * Buffers are created in three steps:
+ * 1) create buffer, specifying usage and size
+ * 2) allocate memory based on memory requirements
+ * 3) bind memory
+ *
+ */
+static void init_buffer(struct state *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer *buffer, VkDeviceMemory *memory)
+{
+ VkResult res;
+ VkMemoryRequirements req;
+ VkBufferCreateInfo buf_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .size = dataSize,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE
+ };
+ res = vkCreateBuffer(g->device, &buf_info, NULL, buffer);
+ checkFatal(res, "vkCreateBuffer");
+
+ vkGetBufferMemoryRequirements(g->device, *buffer, &req);
+
+ VkMemoryAllocateInfo alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .allocationSize = req.size,
+ .memoryTypeIndex = find_memory_type(&g->deviceMemoryProperties, req.memoryTypeBits, properties)
+ };
+ res = vkAllocateMemory(g->device, &alloc, NULL, memory);
+ checkFatal(res, "vkAllocateMemory");
+
+ res = vkBindBufferMemory(g->device, *buffer, *memory, 0);
+ checkFatal(res, "vkBindBufferMemory");
+}
+
+/**
+ * Descriptors are used to bind and describe memory blocks
+ * to shaders.
+ *
+ * *Pool is used to allocate descriptors, it is per-device.
+ * *Layout is used to group descriptors for a given pipeline,
+ * The descriptors describe individually-addressable blocks.
+ */
+static void init_descriptor(struct state *g)
+{
+ VkResult res;
+ /* Create descriptorset layout */
+ VkDescriptorSetLayoutBinding layout_binding = {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ };
+ VkDescriptorSetLayoutCreateInfo descriptor_layout = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = 1,
+ .pBindings = &layout_binding,
+ };
+ res = vkCreateDescriptorSetLayout(g->device, &descriptor_layout, NULL, &g->descriptorSetLayout);
+ checkFatal(res, "vkCreateDescriptorSetLayout");
+
+ /* Create descriptor pool */
+ VkDescriptorPoolSize type_count[] = {
+ {
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ }
+ };
+ VkDescriptorPoolCreateInfo descriptor_pool = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .maxSets = 1,
+ .poolSizeCount = ARRAY_SIZEOF(type_count),
+ .pPoolSizes = type_count,
+ };
+
+ res = vkCreateDescriptorPool(g->device, &descriptor_pool, NULL, &g->descriptorPool);
+ checkFatal(res, "vkCreateDescriptorPool");
+
+ /* Allocate from pool */
+ VkDescriptorSetAllocateInfo alloc_info[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = g->descriptorPool,
+ .descriptorSetCount = 1,
+ .pSetLayouts = &g->descriptorSetLayout,
+ },
+ };
+ res = vkAllocateDescriptorSets(g->device, alloc_info, g->descriptorSets);
+ checkFatal(res, "vkAllocateDescriptorSets");
+
+ /* Bind a buffer to the descriptor */
+ VkDescriptorBufferInfo bufferInfo = {
+ .buffer = g->dstBuffer,
+ .offset = 0,
+ .range = g->dstBufferSize,
+ };
+ VkWriteDescriptorSet writeSet[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = g->descriptorSets[0],
+ .dstBinding = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &bufferInfo,
+ }
+ };
+
+ vkUpdateDescriptorSets(g->device, ARRAY_SIZEOF(writeSet), writeSet, 0, NULL);
+}
+
+/**
+ * Create the compute pipeline. This is the shader and data layouts for it.
+ */
+static void init_pipeline(struct state *g)
+{
+ VkResult res;
+ /* Set shader code */
+ VkShaderModuleCreateInfo vsInfo = {
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .codeSize = sizeof(mandelbrot_cl),
+ .pCode = mandelbrot_cl,
+ };
+
+ res = vkCreateShaderModule(g->device, &vsInfo, NULL, &g->mandelbrotShader);
+ checkFatal(res, "vkCreateShaderModule");
+
+ /* Link shader to layout */
+ VkPipelineLayoutCreateInfo pipelineinfo = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &g->descriptorSetLayout,
+ };
+
+ res = vkCreatePipelineLayout(g->device, &pipelineinfo, NULL, &g->pipelineLayout);
+ checkFatal(res, "vkCreatePipelineLayout");
+
+ /* Create pipeline */
+ VkComputePipelineCreateInfo pipeline = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage =
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = g->mandelbrotShader,
+ .pName = mandelbrot_entry,
+ },
+ .layout = g->pipelineLayout
+
+ };
+ res = vkCreateComputePipelines(g->device, NULL, 1, &pipeline, NULL, g->computePipeline);
+ checkFatal(res, "vkCreateComputePipeline");
+}
+
+/**
+ * Create a command buffer, this is somewhat like a display list.
+ */
+static void init_command_buffer(struct state *g)
+{
+ VkResult res;
+ VkCommandPoolCreateInfo poolinfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .queueFamilyIndex = g->computeQueueIndex,
+ };
+
+ res = vkCreateCommandPool(g->device, &poolinfo, NULL, &g->commandPool);
+ checkFatal(res, "vkCreateCommandPool");
+
+ VkCommandBufferAllocateInfo cmdinfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .commandPool = g->commandPool,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = 1,
+ };
+
+ res = vkAllocateCommandBuffers(g->device, &cmdinfo, g->commandBuffers);
+ checkFatal(res, "vkAllocateCommandBuffers");
+
+ /* Fill command buffer with commands for later operation */
+ VkCommandBufferBeginInfo beginInfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+ res = vkBeginCommandBuffer(g->commandBuffers[0], &beginInfo);
+ checkFatal(res, "vkBeginCommandBuffer");
+
+ /* Bind the compute operation and data */
+ vkCmdBindPipeline(g->commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, g->computePipeline[0]);
+ vkCmdBindDescriptorSets(g->commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, g->pipelineLayout, 0, 1, g->descriptorSets, 0, NULL);
+
+ /* Run it */
+ vkCmdDispatch(g->commandBuffers[0], round_up(WIDTH, LWS_X), round_up(HEIGHT, LWS_Y), 1);
+
+ res = vkEndCommandBuffer(g->commandBuffers[0]);
+ checkFatal(res, "vkEndCommandBuffer");
+}
+
+/**
+ * Execute the pre-created command buffer.
+ *
+ * A fence is used to wait for completion.
+ */
+static void execute(struct state *g)
+{
+ VkResult res;
+ VkSubmitInfo submitInfo[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = g->commandBuffers
+ },
+ };
+
+ /* Create fence to mark the task completion */
+ VkFence fence;
+ VkFenceCreateInfo fenceInfo = {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO
+ };
+ res = vkCreateFence(g->device, &fenceInfo, NULL, &fence);
+ checkFatal(res, "vkCreateFence");
+
+ /* Await completion */
+ res = vkQueueSubmit(g->computeQueue, ARRAY_SIZEOF(submitInfo), submitInfo, fence);
+ checkFatal(res, "vkQueueSubmit");
+
+ do {
+ res = vkWaitForFences(g->device, 1, &fence, VK_TRUE, 1000000);
+ } while (res == VK_TIMEOUT);
+
+ vkDestroyFence(g->device, fence, NULL);
+}
+
+/**
+ * Trivial pnm format image output.
+ */
+static void pam_save(const char *name, unsigned int width, unsigned int height, unsigned int depth, const uint8_t *pixels)
+{
+ FILE *fp = fopen(name, "wb");
+
+ if (fp) {
+ fprintf(fp, "P6\n%d\n%d\n255\n", width, height);
+ fwrite(pixels, depth, width*height, fp);
+ fclose(fp);
+ printf("wrote: %s\n", name);
+ }
+}
+
+/**
+ * Accesses the gpu buffer, converts it to RGB byte, and saves it as a pam file.
+ */
+void save_result(struct state *g)
+{
+ VkResult res;
+ struct Pixel *dst;
+
+ res = vkMapMemory(g->device, g->dstMemory, 0, g->dstBufferSize, 0, (void **)&dst);
+ checkFatal(res, "vkMapMemory");
+
+ uint8_t *pixels = malloc(WIDTH * HEIGHT * 3);
+
+ // this is super-slow!
+ for (int i = 0; i < WIDTH * HEIGHT; i++) {
+ pixels[i * 3 + 0] = (uint8_t)(255.0f * dst[i].r);
+ pixels[i * 3 + 1] = (uint8_t)(255.0f * dst[i].g);
+ pixels[i * 3 + 2] = (uint8_t)(255.0f * dst[i].b);
+ }
+
+ vkUnmapMemory(g->device, g->dstMemory);
+
+ pam_save("mandelbrot.pam", WIDTH, HEIGHT, 3, pixels);
+ free(pixels);
+}
+
+static void shutdown(struct state *g)
+{
+ vkDestroyCommandPool(g->device, g->commandPool, NULL);
+
+ vkDestroyPipeline(g->device, g->computePipeline[0], NULL);
+ vkDestroyPipelineLayout(g->device, g->pipelineLayout, NULL);
+ vkDestroyShaderModule(g->device, g->mandelbrotShader, NULL);
+
+ vkDestroyDescriptorPool(g->device, g->descriptorPool, NULL);
+ vkDestroyDescriptorSetLayout(g->device, g->descriptorSetLayout, NULL);
+
+ vkFreeMemory(g->device, g->dstMemory, NULL);
+ vkDestroyBuffer(g->device, g->dstBuffer, NULL);
+
+ vkDestroyDevice(g->device, NULL);
+ vkDestroyInstance(g->instance, NULL);
+}
+
+int main(int argc, char** argv)
+{
+ struct state g = {
+ .dstBufferSize = WIDTH * HEIGHT * sizeof(struct Pixel),
+ };
+
+ init_instance(&g);
+ init_device(&g);
+
+ init_buffer(&g, g.dstBufferSize,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ &g.dstBuffer, &g.dstMemory);
+
+ init_descriptor(&g);
+ init_pipeline(&g);
+ init_command_buffer(&g);
+
+ execute(&g);
+
+ save_result(&g);
+
+ shutdown(&g);
+
+ return(EXIT_SUCCESS);
+}
--- /dev/null
+
+#define WIDTH (1920*2)
+#define HEIGHT (1200*2)
+#define LWS_X 8
+#define LWS_Y 8
+
+kernel void
+__attribute__((reqd_work_group_size(LWS_X, LWS_Y, 1)))
+mandelbrot(global float4 *imageData) {
+
+ /*
+ In order to fit the work into workgroups, some unnecessary threads are launched.
+ We terminate those threads here.
+ */
+ if(get_global_id(0) >= WIDTH || get_global_id(1) >= HEIGHT)
+ return;
+
+ float x = convert_float(get_global_id(0)) / WIDTH;
+ float y = convert_float(get_global_id(1)) / HEIGHT;
+
+ /*
+ What follows is code for rendering the mandelbrot set.
+ */
+ float2 uv = (float2)(x, (y - 0.5f) * (12.0f / 19.0f) + 0.5f);
+ float n = 0.0f;
+ float2 c = (float2)(-0.445f, 0.0f) + (uv - 0.5f)*(4.0f);
+ const int M = 1000;
+
+ for (int i = 0; i<M; i++) {
+ float2 z = (float2)(z.x*z.x - z.y*z.y, 2.0f*z.x*z.y) + c;
+ if (dot(z, z) > 4)
+ break;
+ n++;
+ }
+
+ // we use a simple cosine palette to determine color:
+ // http://iquilezles.org/www/articles/palettes/palettes.htm
+ float t = n * 10.0f / M;
+ float3 d = (float3)(0.5f, 0.5f, 0.5f);
+ float3 e = (float3)(0.5f, 0.5f, 0.5f);
+ float3 f = (float3)(1.0f, 1.0f, 1.0f);
+ float3 g = (float3)(0.00f, 0.33f, 0.67f);
+ float4 color = (float4)( d + e * cos(6.28318f * (f * t + g)), 1.0f);
+
+ if (convert_int(n) == M)
+ color = (float4)(0, 0, 0, 1);
+
+ // store the rendered mandelbrot set into a storage buffer:
+ imageData[WIDTH * get_global_id(1) + get_global_id(0)] = color;
+}
--- /dev/null
+
+include ../config.make
+
+LDLIBS=$(compute_LDLIBS)
+CFLAGS=$(compute_CFLAGS)
+
+all: mandelbrot
+mandelbrot: mandelbrot.o
+mandelbrot.o: mandelbrot.c mandelbrot.comp.inc
+
+clean:
+ rm -f *.o mandelbrot mandelbrot.comp.inc
+
--- /dev/null
+ /*
+The MIT License (MIT)
+
+Copyright (C) 2017 Eric Arnebäck
+Copyright (C) 2019 Michael Zucchi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+ */
+
+/*
+ * This is a C conversion of this:
+ * https://github.com/Erkaman/vulkan_minimal_compute
+ *
+ * In addition it's been made completely stand-alone
+ * and simplified further. C struct initialisation
+ * syntax is used heavily to reduce clutter.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <vulkan/vulkan.h>
+#include <string.h>
+
+// These 4 defines must match compute shader. The local work size
+// must be a power of 2. They can be other values (up to the device
+// limits) but they affect concurrency and efficiency of the gpu.
+#define WIDTH (1920*2)
+#define HEIGHT (1200*2)
+#define LWS_X 8
+#define LWS_Y 8
+
+// The compiled shader (SPIR-V binary)
+#include "mandelbrot.comp.inc"
+#define mandelbrot_entry "main"
+
+#define ARRAY_SIZEOF(a) (sizeof(a)/sizeof(*a))
+
+struct Pixel {
+ float r, g, b, a;
+};
+
+struct state {
+ VkInstance instance;
+ VkPhysicalDevice physicalDevice;
+
+ VkDevice device;
+ VkQueue computeQueue;
+
+ VkDeviceSize dstBufferSize;
+ VkBuffer dstBuffer;
+ VkDeviceMemory dstMemory;
+
+ VkDescriptorSetLayout descriptorSetLayout;
+ VkDescriptorPool descriptorPool;
+ VkDescriptorSet descriptorSets[1];
+
+ VkShaderModule mandelbrotShader;
+ VkPipelineLayout pipelineLayout;
+ VkPipeline computePipeline[1];
+
+ VkCommandPool commandPool;
+ VkCommandBuffer commandBuffers[1];
+
+ uint32_t computeQueueIndex;
+ VkPhysicalDeviceMemoryProperties deviceMemoryProperties;
+};
+
+static void checkFatal(VkResult res, const char *cmd)
+{
+ if (res != VK_SUCCESS) {
+ fprintf(stderr, "%s: %d\n", cmd, res);
+ exit(EXIT_FAILURE);
+ }
+}
+
+/**
+ * This finds the memory type index for the memory on a specific device.
+ */
+static int find_memory_type(VkPhysicalDeviceMemoryProperties *memory, uint32_t typeMask, VkMemoryPropertyFlagBits query)
+{
+ for (int i = 0; i < memory->memoryTypeCount; i++) {
+ if (((1 << i) & typeMask) && (memory->memoryTypes[i].propertyFlags & query) == query)
+ return i;
+ }
+ return -1;
+}
+
+/**
+ * Round up to next nearest value of step size. Step must be a power of 2.
+ */
+static uint32_t round_up(uint32_t v, uint32_t step)
+{
+ return(v + step - 1)&~(step - 1);
+}
+
+/**
+ * Create vulkan instance.
+ *
+ * Compute-only requires no extensions so this is simple.
+ *
+ * To turn on debugging layers:
+ * export VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_standard_validation
+ */
+static void init_instance(struct state *g)
+{
+ VkResult res;
+ VkApplicationInfo app = {
+ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pApplicationName = "mandelbrot",
+ .pEngineName = "none",
+ .apiVersion = VK_API_VERSION_1_0,
+ };
+ const char * const extensions[] = {
+ };
+ VkInstanceCreateInfo info = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ .pApplicationInfo = &app,
+ .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions),
+ .ppEnabledExtensionNames = extensions,
+ };
+
+ res = vkCreateInstance(&info, NULL, &g->instance);
+ checkFatal(res, "vkCreateInstance");
+}
+
+/**
+ * This finds a suitable device and queue family.
+ *
+ * In this case it is a device that supports a compute queue. It
+ * preferentially looks for a non-graphics compute queue.
+ *
+ * It could also make use of:
+ * VkPhysicalDeviceProperties props;
+ * vkGetPhysicalDeviceProperties(devs[i], &props);
+ */
+static void init_device(struct state *g)
+{
+ VkResult res;
+ uint32_t devcount;
+
+ res = vkEnumeratePhysicalDevices(g->instance, &devcount, NULL);
+ VkPhysicalDevice devs[devcount];
+ res = vkEnumeratePhysicalDevices(g->instance, &devcount, devs);
+
+ int best = 0;
+ int devid = -1;
+ int queueid = -1;
+
+ for (int i = 0; i < devcount; i++) {
+ uint32_t family_count;
+
+ vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, NULL);
+ VkQueueFamilyProperties famprops[family_count];
+ vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, famprops);
+
+ for (uint32_t j = 0; j < family_count; j++) {
+ int score = 0;
+
+ if ((famprops[j].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
+ score += 1;
+ if ((famprops[j].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0)
+ score += 1;
+
+ if (score > best) {
+ score = best;
+ devid = i;
+ queueid = j;
+ }
+ }
+ }
+
+ if (devid == -1)
+ checkFatal(VK_ERROR_FEATURE_NOT_PRESENT, "init_device");
+
+ g->physicalDevice = devs[devid];
+ g->computeQueueIndex = queueid;
+
+ float qpri[] = {0.0f};
+ VkDeviceQueueCreateInfo qinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .queueCount = 1,
+ .pQueuePriorities = qpri, // Note: cannot be null
+ .queueFamilyIndex = g->computeQueueIndex,
+ };
+ VkDeviceCreateInfo devinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .queueCreateInfoCount = 1,
+ .pQueueCreateInfos = &qinfo,
+ };
+
+ res = vkCreateDevice(g->physicalDevice, &devinfo, NULL, &g->device);
+ checkFatal(res, "vkCreateDevice");
+
+ /* These values are cached for convenience */
+ vkGetPhysicalDeviceMemoryProperties(g->physicalDevice, &g->deviceMemoryProperties);
+ vkGetDeviceQueue(g->device, g->computeQueueIndex, 0, &g->computeQueue);
+}
+
+/**
+ * Buffers are created in three steps:
+ * 1) create buffer, specifying usage and size
+ * 2) allocate memory based on memory requirements
+ * 3) bind memory
+ *
+ */
+static void init_buffer(struct state *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer *buffer, VkDeviceMemory *memory)
+{
+ VkResult res;
+ VkMemoryRequirements req;
+ VkBufferCreateInfo buf_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .size = dataSize,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE
+ };
+ res = vkCreateBuffer(g->device, &buf_info, NULL, buffer);
+ checkFatal(res, "vkCreateBuffer");
+
+ vkGetBufferMemoryRequirements(g->device, *buffer, &req);
+
+ VkMemoryAllocateInfo alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .allocationSize = req.size,
+ .memoryTypeIndex = find_memory_type(&g->deviceMemoryProperties, req.memoryTypeBits, properties)
+ };
+ res = vkAllocateMemory(g->device, &alloc, NULL, memory);
+ checkFatal(res, "vkAllocateMemory");
+
+ res = vkBindBufferMemory(g->device, *buffer, *memory, 0);
+ checkFatal(res, "vkBindBufferMemory");
+}
+
+/**
+ * Descriptors are used to bind and describe memory blocks
+ * to shaders.
+ *
+ * *Pool is used to allocate descriptors, it is per-device.
+ * *Layout is used to group descriptors for a given pipeline,
+ * The descriptors describe individually-addressable blocks.
+ */
+static void init_descriptor(struct state *g)
+{
+ VkResult res;
+ /* Create descriptorset layout */
+ VkDescriptorSetLayoutBinding layout_binding = {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ };
+ VkDescriptorSetLayoutCreateInfo descriptor_layout = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = 1,
+ .pBindings = &layout_binding,
+ };
+ res = vkCreateDescriptorSetLayout(g->device, &descriptor_layout, NULL, &g->descriptorSetLayout);
+ checkFatal(res, "vkCreateDescriptorSetLayout");
+
+ /* Create descriptor pool */
+ VkDescriptorPoolSize type_count[] = {
+ {
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ }
+ };
+ VkDescriptorPoolCreateInfo descriptor_pool = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .maxSets = 1,
+ .poolSizeCount = ARRAY_SIZEOF(type_count),
+ .pPoolSizes = type_count,
+ };
+
+ res = vkCreateDescriptorPool(g->device, &descriptor_pool, NULL, &g->descriptorPool);
+ checkFatal(res, "vkCreateDescriptorPool");
+
+ /* Allocate from pool */
+ VkDescriptorSetAllocateInfo alloc_info[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = g->descriptorPool,
+ .descriptorSetCount = 1,
+ .pSetLayouts = &g->descriptorSetLayout,
+ },
+ };
+ res = vkAllocateDescriptorSets(g->device, alloc_info, g->descriptorSets);
+ checkFatal(res, "vkAllocateDescriptorSets");
+
+ /* Bind a buffer to the descriptor */
+ VkDescriptorBufferInfo bufferInfo = {
+ .buffer = g->dstBuffer,
+ .offset = 0,
+ .range = g->dstBufferSize,
+ };
+ VkWriteDescriptorSet writeSet[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = g->descriptorSets[0],
+ .dstBinding = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &bufferInfo,
+ }
+ };
+
+ vkUpdateDescriptorSets(g->device, ARRAY_SIZEOF(writeSet), writeSet, 0, NULL);
+}
+
+/**
+ * Create the compute pipeline. This is the shader and data layouts for it.
+ */
+static void init_pipeline(struct state *g)
+{
+ VkResult res;
+ /* Set shader code */
+ VkShaderModuleCreateInfo vsInfo = {
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .codeSize = sizeof(mandelbrot_cs),
+ .pCode = mandelbrot_cs,
+ };
+
+ res = vkCreateShaderModule(g->device, &vsInfo, NULL, &g->mandelbrotShader);
+ checkFatal(res, "vkCreateShaderModule");
+
+ /* Link shader to layout */
+ VkPipelineLayoutCreateInfo pipelineinfo = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &g->descriptorSetLayout,
+ };
+
+ res = vkCreatePipelineLayout(g->device, &pipelineinfo, NULL, &g->pipelineLayout);
+ checkFatal(res, "vkCreatePipelineLayout");
+
+ /* Create pipeline */
+ VkComputePipelineCreateInfo pipeline = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage =
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = g->mandelbrotShader,
+ .pName = mandelbrot_entry,
+ },
+ .layout = g->pipelineLayout
+
+ };
+ res = vkCreateComputePipelines(g->device, NULL, 1, &pipeline, NULL, g->computePipeline);
+ checkFatal(res, "vkCreateComputePipeline");
+}
+
+/**
+ * Create a command buffer, this is somewhat like a display list.
+ */
+static void init_command_buffer(struct state *g)
+{
+ VkResult res;
+ VkCommandPoolCreateInfo poolinfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .queueFamilyIndex = g->computeQueueIndex,
+ };
+
+ res = vkCreateCommandPool(g->device, &poolinfo, NULL, &g->commandPool);
+ checkFatal(res, "vkCreateCommandPool");
+
+ VkCommandBufferAllocateInfo cmdinfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .commandPool = g->commandPool,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = 1,
+ };
+
+ res = vkAllocateCommandBuffers(g->device, &cmdinfo, g->commandBuffers);
+ checkFatal(res, "vkAllocateCommandBuffers");
+
+ /* Fill command buffer with commands for later operation */
+ VkCommandBufferBeginInfo beginInfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+ res = vkBeginCommandBuffer(g->commandBuffers[0], &beginInfo);
+ checkFatal(res, "vkBeginCommandBuffer");
+
+ /* Bind the compute operation and data */
+ vkCmdBindPipeline(g->commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, g->computePipeline[0]);
+ vkCmdBindDescriptorSets(g->commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, g->pipelineLayout, 0, 1, g->descriptorSets, 0, NULL);
+
+ /* Run it */
+ vkCmdDispatch(g->commandBuffers[0], round_up(WIDTH, LWS_X), round_up(HEIGHT, LWS_Y), 1);
+
+ res = vkEndCommandBuffer(g->commandBuffers[0]);
+ checkFatal(res, "vkEndCommandBuffer");
+}
+
+/**
+ * Execute the pre-created command buffer.
+ *
+ * A fence is used to wait for completion.
+ */
+static void execute(struct state *g)
+{
+ VkResult res;
+ VkSubmitInfo submitInfo[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = g->commandBuffers
+ },
+ };
+
+ /* Create fence to mark the task completion */
+ VkFence fence;
+ VkFenceCreateInfo fenceInfo = {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO
+ };
+ res = vkCreateFence(g->device, &fenceInfo, NULL, &fence);
+ checkFatal(res, "vkCreateFence");
+
+ /* Await completion */
+ res = vkQueueSubmit(g->computeQueue, ARRAY_SIZEOF(submitInfo), submitInfo, fence);
+ checkFatal(res, "vkQueueSubmit");
+
+ do {
+ res = vkWaitForFences(g->device, 1, &fence, VK_TRUE, 1000000);
+ } while (res == VK_TIMEOUT);
+
+ vkDestroyFence(g->device, fence, NULL);
+}
+
+/**
+ * Trivial pnm format image output.
+ */
+static void pam_save(const char *name, unsigned int width, unsigned int height, unsigned int depth, const uint8_t *pixels)
+{
+ FILE *fp = fopen(name, "wb");
+
+ if (fp) {
+ fprintf(fp, "P6\n%d\n%d\n255\n", width, height);
+ fwrite(pixels, depth, width*height, fp);
+ fclose(fp);
+ printf("wrote: %s\n", name);
+ }
+}
+
+/**
+ * Accesses the gpu buffer, converts it to RGB byte, and saves it as a pam file.
+ */
+void save_result(struct state *g)
+{
+ VkResult res;
+ struct Pixel *dst;
+
+ res = vkMapMemory(g->device, g->dstMemory, 0, g->dstBufferSize, 0, (void **)&dst);
+ checkFatal(res, "vkMapMemory");
+
+ uint8_t *pixels = malloc(WIDTH * HEIGHT * 3);
+
+ // this is super-slow!
+ for (int i = 0; i < WIDTH * HEIGHT; i++) {
+ pixels[i * 3 + 0] = (uint8_t)(255.0f * dst[i].r);
+ pixels[i * 3 + 1] = (uint8_t)(255.0f * dst[i].g);
+ pixels[i * 3 + 2] = (uint8_t)(255.0f * dst[i].b);
+ }
+
+ vkUnmapMemory(g->device, g->dstMemory);
+
+ pam_save("mandelbrot.pam", WIDTH, HEIGHT, 3, pixels);
+ free(pixels);
+}
+
+static void shutdown(struct state *g)
+{
+ vkDestroyCommandPool(g->device, g->commandPool, NULL);
+
+ vkDestroyPipeline(g->device, g->computePipeline[0], NULL);
+ vkDestroyPipelineLayout(g->device, g->pipelineLayout, NULL);
+ vkDestroyShaderModule(g->device, g->mandelbrotShader, NULL);
+
+ vkDestroyDescriptorPool(g->device, g->descriptorPool, NULL);
+ vkDestroyDescriptorSetLayout(g->device, g->descriptorSetLayout, NULL);
+
+ vkFreeMemory(g->device, g->dstMemory, NULL);
+ vkDestroyBuffer(g->device, g->dstBuffer, NULL);
+
+ vkDestroyDevice(g->device, NULL);
+ vkDestroyInstance(g->instance, NULL);
+}
+
+int main(int argc, char** argv)
+{
+ struct state g = {
+ .dstBufferSize = WIDTH * HEIGHT * sizeof(struct Pixel),
+ };
+
+ init_instance(&g);
+ init_device(&g);
+
+ init_buffer(&g, g.dstBufferSize,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ &g.dstBuffer, &g.dstMemory);
+
+ init_descriptor(&g);
+ init_pipeline(&g);
+ init_command_buffer(&g);
+
+ execute(&g);
+
+ save_result(&g);
+
+ shutdown(&g);
+
+ return(EXIT_SUCCESS);
+}
--- /dev/null
+#version 450
+
+#define WIDTH (1920*2)
+#define HEIGHT (1200*2)
+#define LWS_X 8
+#define LWS_Y 8
+
+layout (local_size_x = LWS_X, local_size_y = LWS_Y, local_size_z = 1 ) in;
+
+struct Pixel{
+ vec4 value;
+};
+
+layout(std140, binding = 0) buffer buf
+{
+ Pixel imageData[];
+};
+
+void main() {
+
+ /*
+ In order to fit the work into workgroups, some unnecessary threads are launched.
+ We terminate those threads here.
+ */
+ if(gl_GlobalInvocationID.x >= WIDTH || gl_GlobalInvocationID.y >= HEIGHT)
+ return;
+
+ float x = float(gl_GlobalInvocationID.x) / float(WIDTH);
+ float y = float(gl_GlobalInvocationID.y) / float(HEIGHT);
+
+ /*
+ What follows is code for rendering the mandelbrot set.
+ */
+ vec2 uv = vec2(x, (y - 0.5) * (12.0 / 19.0) + 0.5);
+ float n = 0.0;
+ vec2 c = vec2(-.445, 0.0) + (uv - 0.5)*(4.0);
+ vec2 z = vec2(0.0);
+ const int M = 1000;
+
+ for (int i = 0; i<M; i++) {
+ z = vec2(z.x*z.x - z.y*z.y, 2.*z.x*z.y) + c;
+
+ if (dot(z, z) > 4)
+ break;
+ n++;
+ }
+
+ // we use a simple cosine palette to determine color:
+ // http://iquilezles.org/www/articles/palettes/palettes.htm
+ float t = float(n) * 10.0 / float(M);
+ vec3 d = vec3(0.5, 0.5, 0.5);
+ vec3 e = vec3(0.5, 0.5, 0.5);
+ vec3 f = vec3(1.0, 1.0, 1.0);
+ vec3 g = vec3(0.00, 0.33, 0.67);
+
+ vec4 color = vec4( d + e*cos( 6.28318*(f*t+g) ) ,1.0);
+
+ if (n == M)
+ color = vec4(0, 0, 0, 1);
+
+ // store the rendered mandelbrot set into a storage buffer:
+ imageData[WIDTH * gl_GlobalInvocationID.y + gl_GlobalInvocationID.x].value = color;
+}
--- /dev/null
+
+/*
+ Some basic GL maths routines, in C.
+ */
+
+#ifndef GLMATHS_H
+#define GLMATHS_H
+
+#include <math.h>
+
+static __inline__ void identity4f(float *matrix) {
+ for (int i = 0; i < 16; i++)
+ matrix[i] = 0.0f;
+ for (int i = 0; i < 4; i++)
+ matrix[i * 4 + i] = 1.0f;
+}
+
+static __inline__ float length3f(float * __restrict a) {
+ float sum = 0;
+ for (int i = 0; i < 3; i++)
+ sum += a[i] * a[i];
+ return sqrtf(sum);
+}
+
+static __inline__ void sub3f(float * __restrict c, float * __restrict a, float * __restrict b) {
+ for (int i = 0; i < 3; i++)
+ c[i] = a[i] - b[i];
+}
+
+static __inline__ void norm3f(float * __restrict vec) {
+ float fix = 1.0f / length3f(vec);
+ for (int i = 0; i < 3; i++)
+ vec[i] *= fix;
+}
+
+static __inline__ void cross3f(float * __restrict c, float * __restrict a, float * __restrict b) {
+ c[0] = a[1] * b[2] - a[2] * b[1];
+ c[1] = a[2] * b[0] - a[0] * b[2];
+ c[2] = a[0] * b[1] - a[1] * b[0];
+}
+
+static __inline__ float dot3f(float * __restrict a, float * __restrict b) {
+ return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
+}
+
+static float * mult4x4f(float * __restrict c, float * __restrict b, float * __restrict a) {
+ c[0] = a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12];
+ c[1] = a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13];
+ c[2] = a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14];
+ c[3] = a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15];
+
+ c[4] = a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12];
+ c[5] = a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13];
+ c[6] = a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14];
+ c[7] = a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15];
+
+ c[8] = a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12];
+ c[9] = a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13];
+ c[10] = a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14];
+ c[11] = a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15];
+
+ c[12] = a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12];
+ c[13] = a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13];
+ c[14] = a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14];
+ c[15] = a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15];
+
+ return c;
+}
+
+static __inline__ void lookAt(float *mat, float *eye, float *centre, float *up) {
+ float forward[3], side[3], u[3];
+
+ sub3f(forward, centre, eye);
+ norm3f(forward);
+ cross3f(side, forward, up);
+ norm3f(side);
+ cross3f(u, side, forward);
+
+ mat[0] = side[0];
+ mat[4] = side[1];
+ mat[8] = side[2];
+
+ mat[1] = u[0];
+ mat[5] = u[1];
+ mat[9] = u[2];
+
+ mat[2] = -forward[0];
+ mat[6] = -forward[1];
+ mat[10] = -forward[2];
+
+ mat[12] = -dot3f(side, eye);
+ mat[13] = -dot3f(u, eye);
+ mat[14] = dot3f(forward, eye);
+
+ mat[3] = 0.0f;
+ mat[7] = 0.0f;
+ mat[11] = 0.0f;
+
+ mat[15] = 1.0f;
+}
+
+static __inline__ void frustum(float *mat, float left, float right, float bottom, float top, float znear, float zfar) {
+ float temp, temp2, temp3, temp4;
+
+ temp = 2.0 * znear;
+ temp2 = right - left;
+ temp3 = top - bottom;
+ temp4 = zfar - znear;
+ mat[0] = temp / temp2;
+ mat[1] = 0.0;
+ mat[2] = 0.0;
+ mat[3] = 0.0;
+ mat[4] = 0.0;
+ mat[5] = temp / temp3;
+ mat[6] = 0.0;
+ mat[7] = 0.0;
+ mat[8] = (right + left) / temp2;
+ mat[9] = (top + bottom) / temp3;
+ mat[10] = (-zfar - znear) / temp4;
+ mat[11] = -1.0;
+ mat[12] = 0.0;
+ mat[13] = 0.0;
+ mat[14] = (-temp * zfar) / temp4;
+ mat[15] = 0.0;
+}
+
+static __inline void perspective(float *mat, float fovy, float aspect, float znear, float zfar) {
+ float ymax, xmax;
+
+ ymax = znear * tanf(fovy * 0.5f);
+ xmax = ymax * aspect;
+
+ frustum(mat, -xmax, xmax, -ymax, ymax, znear, zfar);
+}
+
+#endif
+
--- /dev/null
+
+#ifndef PAM_H
+#define PAM_H
+
+/**
+ * Trivial pnm format image output.
+ */
+static void pam_save(const char *name, unsigned int width, unsigned int height, unsigned int depth, const uint8_t *pixels)
+{
+ FILE *fp = fopen(name, "wb");
+
+ if (fp) {
+ fprintf(fp, "P6\n%d\n%d\n255\n", width, height);
+ fwrite(pixels, depth, width*height, fp);
+
+ fclose(fp);
+ }
+}
+
+#endif
--- /dev/null
+
+/*
+ Very basic timing utilities.
+ */
+
+#ifndef TIMING_H
+
+#ifdef HAVE_CLOCK_GETTIME
+#include <time.h>
+
+static __inline__ uint64_t time_stamp(void)
+{
+ struct timespec ts;
+ clock_gettime(CLOCK_REALTIME, &ts);
+
+ return ts.tv_nsec + ts.tv_sec * 1000000000ULL;
+}
+
+#define TIME_START uint64_t stamp = time_stamp()
+#define TIME_PRINT printf(" %12.9f %s\n", 1E-9 * (time_stamp() - stamp), __FUNCTION__)
+#define TIME_ITEM(x) printf(" %12.9f %s\n", 1E-9 * (time_stamp() - stamp), x)
+
+#else
+
+#define TIME_START do { } while(0)
+#define TIME_PRINT do { } while(0)
+
+#endif
+
+#endif