From 90456dc2c32931bc27a5ce6a7b89ba775af61d83 Mon Sep 17 00:00:00 2001 From: Not Zed Date: Wed, 11 Dec 2019 16:08:49 +1030 Subject: [PATCH] Load some initial examples. --- README | 53 ++ config.make.in | 42 ++ cube/Makefile | 12 + cube/cube-data.h | 52 ++ cube/cube.c | 1181 +++++++++++++++++++++++++++++++++ cube/cube.frag | 8 + cube/cube.vert | 14 + mandelbrot-cl/Makefile | 12 + mandelbrot-cl/mandelbrot.c | 531 +++++++++++++++ mandelbrot-cl/mandelbrot.cl | 50 ++ mandelbrot-cs/Makefile | 13 + mandelbrot-cs/mandelbrot.c | 531 +++++++++++++++ mandelbrot-cs/mandelbrot.comp | 63 ++ util/glmaths.h | 137 ++++ util/pam.h | 20 + util/timing.h | 30 + 16 files changed, 2749 insertions(+) create mode 100644 README create mode 100644 config.make.in create mode 100644 cube/Makefile create mode 100644 cube/cube-data.h create mode 100644 cube/cube.c create mode 100644 cube/cube.frag create mode 100644 cube/cube.vert create mode 100644 mandelbrot-cl/Makefile create mode 100644 mandelbrot-cl/mandelbrot.c create mode 100644 mandelbrot-cl/mandelbrot.cl create mode 100644 mandelbrot-cs/Makefile create mode 100644 mandelbrot-cs/mandelbrot.c create mode 100644 mandelbrot-cs/mandelbrot.comp create mode 100644 util/glmaths.h create mode 100644 util/pam.h create mode 100644 util/timing.h diff --git a/README b/README new file mode 100644 index 0000000..ecb8cb9 --- /dev/null +++ b/README @@ -0,0 +1,53 @@ + +INTRODUCTION +------------ + +This is a collection of learning experiments with Vulkan in the C +programming language. + +COMPILING +--------- + +Apart from the basic GNU tool suite one needs glslangValidator and +clsvp. The former is possibly intalled or available as a package but +see LINKS for further information. + +Copy config.make.in to config.make before compiling. + +Each example has it's own Makefile and they must be compiled +from within their respective directories. + +EXAMPLES +--------- + +cube/ + + A basic render-cube demo from https://vulkan.lunarg.com/sdk/home, + converted to plain C. + +mandebrot-cs/ +mandelbrot-cl/ + + One is a mandelbrot generator using GLSL compute shader, the other + uses an OpenCL shader compiled via clspv. Because of the simplicity + of the data buffers the code is identical apart from which shader it + loads. + + By far most of the execution time is spent on the CPU side, + converting the float4 array to uchar3 for output. + +LINKS +----- + + * https://www.lunarg.com/vulkan-sdk - vulkan sdk. + * https://github.com/google/clspv - OpenCL C to SPIR-V compiler. + +LICENSES +-------- + +Various permissive licenses, see individual source files. + +Stuff in util/ is (intended to be) public domain, although i'm +unsure of some of the glmaths.h. + + diff --git a/config.make.in b/config.make.in new file mode 100644 index 0000000..cf81135 --- /dev/null +++ b/config.make.in @@ -0,0 +1,42 @@ + +# command used to compile .frag, .vert, and .comp files. +GLSPV=glslangValidator +# command used to compile .cl files. +CLSPV=clspv + +# On linux, clock_gettime can be used for timing +CPPFLAGS=-DHAVE_CLOCK_GETTIME -I../util + +# compile flags for graphics examples +present_LDLIBS=-lm -lvulkan -lX11 +present_CFLAGS=-DVK_USE_PLATFORM_XLIB_KHR -O2 -march=native + +# compile flags for compute-only examples +compute_LDLIBS=-lm -lvulkan +compute_CFLAGS=-O2 -march=native + +# Some implicit rules that might need changing based on +# the configuration above. + +# compile a glsl .vert file into a C include file +%.vert.inc: %.vert + echo $ $@ + od -A none -X < $< | sed 's/\([0-9a-f]\+\)/0x\1,/g' >> $@ + echo "};" >> $@ + +# compile a .cl file into SPIR-V +%.cl.spv %.cl.csv: %.cl + $(CLSPV) -o $*.cl.spv --descriptormap $*.cl.csv $< diff --git a/cube/Makefile b/cube/Makefile new file mode 100644 index 0000000..4e6d243 --- /dev/null +++ b/cube/Makefile @@ -0,0 +1,12 @@ + +include ../config.make + +LDLIBS=$(present_LDLIBS) +CFLAGS=$(present_CFLAGS) + +all: cube +cube: cube.o +cube.o: cube.c cube.vert.inc cube.frag.inc + +clean: + rm -f *.o cube cube.*.inc diff --git a/cube/cube-data.h b/cube/cube-data.h new file mode 100644 index 0000000..a9626cd --- /dev/null +++ b/cube/cube-data.h @@ -0,0 +1,52 @@ + +typedef struct Vertex Vertex; + +struct Vertex { + float posX, posY, posZ, posW; // Position data + float r, g, b, a; // Color +}; + +static const Vertex g_vb_solid_face_colours_Data[] = { + // red face + {-1, -1, 1, 1.f, 1.f, 0.f, 0.f, 1.f}, + {-1, 1, 1, 1.f, 1.f, 0.f, 0.f, 1.f}, + {1, -1, 1, 1.f, 1.f, 0.f, 0.f, 1.f}, + {1, -1, 1, 1.f, 1.f, 0.f, 0.f, 1.f}, + {-1, 1, 1, 1.f, 1.f, 0.f, 0.f, 1.f}, + {1, 1, 1, 1.f, 1.f, 0.f, 0.f, 1.f}, + // green face + {-1, -1, -1, 1.f, 0.f, 1.f, 0.f, 1.f}, + {1, -1, -1, 1.f, 0.f, 1.f, 0.f, 1.f}, + {-1, 1, -1, 1.f, 0.f, 1.f, 0.f, 1.f}, + {-1, 1, -1, 1.f, 0.f, 1.f, 0.f, 1.f}, + {1, -1, -1, 1.f, 0.f, 1.f, 0.f, 1.f}, + {1, 1, -1, 1.f, 0.f, 1.f, 0.f, 1.f}, + // blue face + {-1, 1, 1, 1.f, 0.f, 0.f, 1.f, 1.f}, + {-1, -1, 1, 1.f, 0.f, 0.f, 1.f, 1.f}, + {-1, 1, -1, 1.f, 0.f, 0.f, 1.f, 1.f}, + {-1, 1, -1, 1.f, 0.f, 0.f, 1.f, 1.f}, + {-1, -1, 1, 1.f, 0.f, 0.f, 1.f, 1.f}, + {-1, -1, -1, 1.f, 0.f, 0.f, 1.f, 1.f}, + // yellow face + {1, 1, 1, 1.f, 1.f, 1.f, 0.f, 1.f}, + {1, 1, -1, 1.f, 1.f, 1.f, 0.f, 1.f}, + {1, -1, 1, 1.f, 1.f, 1.f, 0.f, 1.f}, + {1, -1, 1, 1.f, 1.f, 1.f, 0.f, 1.f}, + {1, 1, -1, 1.f, 1.f, 1.f, 0.f, 1.f}, + {1, -1, -1, 1.f, 1.f, 1.f, 0.f, 1.f}, + // magenta face + {1, 1, 1, 1.f, 1.f, 0.f, 1.f, 1.f}, + {-1, 1, 1, 1.f, 1.f, 0.f, 1.f, 1.f}, + {1, 1, -1, 1.f, 1.f, 0.f, 1.f, 1.f}, + {1, 1, -1, 1.f, 1.f, 0.f, 1.f, 1.f}, + {-1, 1, 1, 1.f, 1.f, 0.f, 1.f, 1.f}, + {-1, 1, -1, 1.f, 1.f, 0.f, 1.f, 1.f}, + // cyan face + {1, -1, 1, 1.f, 0.f, 1.f, 1.f, 1.f}, + {1, -1, -1, 1.f, 0.f, 1.f, 1.f, 1.f}, + {-1, -1, 1, 1.f, 0.f, 1.f, 1.f, 1.f}, + {-1, -1, 1, 1.f, 0.f, 1.f, 1.f, 1.f}, + {1, -1, -1, 1.f, 0.f, 1.f, 1.f, 1.f}, + {-1, -1, -1, 1.f, 0.f, 1.f, 1.f, 1.f}, +}; diff --git a/cube/cube.c b/cube/cube.c new file mode 100644 index 0000000..0020d9b --- /dev/null +++ b/cube/cube.c @@ -0,0 +1,1181 @@ +/* + * Vulkan Samples + * + * Copyright (C) 2015-2016 Valve Corporation + * Copyright (C) 2015-2016 LunarG, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + This based on the tutorial in the vulkansdk: + + https://vulkan.lunarg.com/sdk/home + + But it has been made stand-alone and converted to C. C struct + initialisers do something - but not much - toward cleaning up the + mess. +*/ + +#include +#include +#include +#include + +#include + +#if defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) +#include +#endif + +#include +#include +#include "glmaths.h" + +// These can't be changed +#define NUM_SAMPLES VK_SAMPLE_COUNT_1_BIT +#define NUM_DESCRIPTOR_SETS 1 + +#include "cube.vert.inc" +#include "cube.frag.inc" +#include "cube-data.h" + +typedef struct mat4x4 { + float m[16]; +} mat4x4; + +struct state { + VkInstance vk; + + VkPhysicalDevice physical; + VkPhysicalDeviceMemoryProperties memory_properties; + VkPhysicalDeviceFeatures device_features; + + VkCommandPool cmd_pool; + VkDescriptorPool desc_pool; + + VkDevice device; + VkCommandBuffer cmd; + VkSwapchainKHR chain; + int device_id; + int graphics_queue_index; + int present_queue_index; + + VkQueue graphics_queue; + VkQueue present_queue; + + int shaderCount; + VkShaderModule shader[2]; + + VkRenderPass render_pass; + VkFramebuffer *framebuffers; + + int width, height; + VkSurfaceKHR surface; + + float projection[16]; + float view[16]; + float model[16]; + float clip[16]; + float mvp[16]; + + VkBuffer uniformBuffer; + VkDeviceMemory uniformMemory; + VkDescriptorBufferInfo uniformInfo; + + VkPipeline pipeline[1]; + + VkDescriptorSetLayout desc_layout[NUM_DESCRIPTOR_SETS]; + VkDescriptorSet desc_set[NUM_DESCRIPTOR_SETS]; + VkPipelineLayout pipeline_layout; + + VkBuffer vertexBuffer; + VkDeviceMemory vertexMemory; + VkVertexInputBindingDescription vi_binding; + VkVertexInputAttributeDescription vi_attribs[2]; + + VkFence drawFence; + + VkSemaphore chainSemaphore; + uint32_t chainIndex; + + VkFormat chainImageFormat; + uint32_t chainImageCount; + VkImage *chainImage; + VkImageView *chainImageView; + + VkFormat depthFormat; + VkImage depthImage; + VkImageView depthView; + VkDeviceMemory depthMemory; + +#if defined(VK_USE_PLATFORM_XLIB_KHR) + Display *display; + Window window; + Atom wm_delete_window; +#endif +}; + +static void checkFatal(VkResult res, const char *cmd) +{ + if (res != VK_SUCCESS) { + fprintf(stderr, "%s: %d\n", cmd, res); + exit(EXIT_FAILURE); + } +} + +static int find_memory_type(VkPhysicalDeviceMemoryProperties *memory, uint32_t typeMask, VkMemoryPropertyFlagBits query) +{ + for (int i = 0; i < memory->memoryTypeCount; i++) { + if (((1 << i) & typeMask) && (memory->memoryTypes[i].propertyFlags & query) == query) + return i; + } + return -1; +} + +void execute_begin_command_buffer(struct state *g) +{ + /* DEPENDS on init_command_buffer() */ + VkResult res; + + VkCommandBufferBeginInfo cmd_buf_info = {}; + cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cmd_buf_info.pNext = NULL; + cmd_buf_info.flags = 0; + cmd_buf_info.pInheritanceInfo = NULL; + + res = vkBeginCommandBuffer(g->cmd, &cmd_buf_info); +} + +void execute_end_command_buffer(struct state *g) +{ + VkResult res; + + res = vkEndCommandBuffer(g->cmd); +} + +#define FENCE_TIMEOUT 100000000 + +void execute_queue_command_buffer(struct state *g) +{ + VkResult res; + + /* Queue the command buffer for execution */ + const VkCommandBuffer cmd_bufs[] = {g->cmd}; + VkFenceCreateInfo fenceInfo; + VkFence drawFence; + fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fenceInfo.pNext = NULL; + fenceInfo.flags = 0; + vkCreateFence(g->device, &fenceInfo, NULL, &drawFence); + + VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkSubmitInfo submit_info[] = { + { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .waitSemaphoreCount = 0, + .pWaitSemaphores = NULL, + .pWaitDstStageMask = &pipe_stage_flags, + .commandBufferCount = 1, + .pCommandBuffers = cmd_bufs, + .signalSemaphoreCount = 0, + .pSignalSemaphores = NULL, + } + }; + res = vkQueueSubmit(g->graphics_queue, 1, submit_info, drawFence); + + do { + res = vkWaitForFences(g->device, 1, &drawFence, VK_TRUE, FENCE_TIMEOUT); + } while (res == VK_TIMEOUT); + + vkDestroyFence(g->device, drawFence, NULL); +} + +static void init_instance(struct state *g) +{ + VkResult res; + VkApplicationInfo app = { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pApplicationName = "basic", + .pEngineName = "none", + .apiVersion = VK_API_VERSION_1_0, + }; + const char * const extensions[] = { + VK_KHR_SURFACE_EXTENSION_NAME, + VK_KHR_XLIB_SURFACE_EXTENSION_NAME + }; + VkInstanceCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pApplicationInfo = &app, + .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions), + .ppEnabledExtensionNames = extensions, + }; + + res = vkCreateInstance(&info, NULL, &g->vk); + if (res != VK_SUCCESS) { + fprintf(stderr, "vkCreateInstance: %d\n", res); + exit(EXIT_FAILURE); + } +} + +static int clampi(int v, int min, int max) +{ + return v < min ? min : v < max ? v : max; +} + +#if defined(VK_USE_PLATFORM_XLIB_KHR) +static void init_surface(struct state *g) +{ + XInitThreads(); + g->display = XOpenDisplay(NULL); + long visualMask = VisualScreenMask; + int numberOfVisuals; + XVisualInfo vInfoTemplate = { + .screen = DefaultScreen(g->display) + }; + XVisualInfo *visualInfo = XGetVisualInfo(g->display, visualMask, &vInfoTemplate, &numberOfVisuals); + Colormap colormap = XCreateColormap(g->display, RootWindow(g->display, vInfoTemplate.screen), visualInfo->visual, AllocNone); + XSetWindowAttributes windowAttributes = { + .colormap = colormap, + .background_pixel = 0xFFFFFFFF, + .border_pixel = 0, + .event_mask = KeyPressMask | KeyReleaseMask | StructureNotifyMask | ExposureMask + }; + + g->window = XCreateWindow(g->display, RootWindow(g->display, vInfoTemplate.screen), + 0, 0, g->width, g->height, + 0, visualInfo->depth, InputOutput, visualInfo->visual, + CWBackPixel | CWBorderPixel | CWEventMask | CWColormap, &windowAttributes); + + XSelectInput(g->display, g->window, ExposureMask | KeyPressMask); + XMapWindow(g->display, g->window); + XFlush(g->display); + g->wm_delete_window = XInternAtom(g->display, "WM_DELETE_WINDOW", False); + + // + VkXlibSurfaceCreateInfoKHR surfaceinfo = { + .sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, + .pNext = NULL, + .flags = 0, + .dpy = g->display, + .window = g->window + }; + + VkResult res; + + res = vkCreateXlibSurfaceKHR(g->vk, &surfaceinfo, NULL, &g->surface); + checkFatal(res, "vkCrateXlibSurfaceKHR"); +} +#else +#error "Only XLIB is implemented" +#endif + +static void init_device(struct state *g) +{ + VkResult res; + uint32_t devcount; + + res = vkEnumeratePhysicalDevices(g->vk, &devcount, NULL); + VkPhysicalDevice devs[devcount]; + res = vkEnumeratePhysicalDevices(g->vk, &devcount, devs); + + // Search for device and queue indices + int devid = -1; + int present_queue = -1; + int graphics_queue = -1; + for (int i = 0; i < devcount; i++) { + uint32_t family_count; + vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, NULL); + VkQueueFamilyProperties famprops[family_count]; + vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, famprops); + + for (uint32_t j = 0; j < family_count; j++) { + VkBool32 present; + + vkGetPhysicalDeviceSurfaceSupportKHR(devs[i], j, g->surface, &present); + + if (present && present_queue == -1) + present_queue = j; + if ((famprops[j].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) { + graphics_queue = j; + if (present) { + present_queue = j; + break; + } + } + } + if (present_queue != -1 && graphics_queue != -1) { + devid = i; + break; + } + } + + if (devid == -1) { + fprintf(stderr, "Unable to find suitable device\n"); + exit(1); + } + g->device_id = devid; + g->present_queue_index = present_queue; + g->graphics_queue_index = graphics_queue; + + vkGetPhysicalDeviceMemoryProperties(devs[devid], &g->memory_properties); + vkGetPhysicalDeviceFeatures(devs[devid], &g->device_features); + + /* ************************************************************** */ + + float qpri[] = {0.0f}; + VkDeviceQueueCreateInfo qinfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .queueCount = 1, + .pQueuePriorities = qpri, + .queueFamilyIndex = g->graphics_queue_index, + }; + + const char * const extensions[] = { + VK_KHR_SWAPCHAIN_EXTENSION_NAME, + }; + + VkPhysicalDeviceFeatures features = { + .depthClamp = VK_TRUE, + }; + VkDeviceCreateInfo devinfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .queueCreateInfoCount = 1, + .pQueueCreateInfos = &qinfo, + .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions), + .ppEnabledExtensionNames = extensions, + .pEnabledFeatures = &features, + }; + + res = vkCreateDevice(devs[devid], &devinfo, NULL, &g->device); + checkFatal(res, "vkCreateDevice"); + + /* ************************************************************** */ + VkFormat format; + uint32_t formatCount; + res = vkGetPhysicalDeviceSurfaceFormatsKHR(devs[devid], g->surface, &formatCount, NULL); + VkSurfaceFormatKHR surfFormats[formatCount]; + res = vkGetPhysicalDeviceSurfaceFormatsKHR(devs[devid], g->surface, &formatCount, surfFormats); + // If the format list includes just one entry of VK_FORMAT_UNDEFINED, + // the surface has no preferred format. Otherwise, at least one + // supported format will be returned. + if (formatCount == 1 && surfFormats[0].format == VK_FORMAT_UNDEFINED) { + format = VK_FORMAT_B8G8R8A8_UNORM; + } else { + format = surfFormats[0].format; + } + + VkSurfaceCapabilitiesKHR surfCapabilities; + + res = vkGetPhysicalDeviceSurfaceCapabilitiesKHR(devs[devid], g->surface, &surfCapabilities); + checkFatal(res, "vkGetPhysicalDeviceSurfaceCapabilitiesKHR"); + + uint32_t presentModeCount; + res = vkGetPhysicalDeviceSurfacePresentModesKHR(devs[devid], g->surface, &presentModeCount, NULL); + VkPresentModeKHR presentModes[presentModeCount]; + res = vkGetPhysicalDeviceSurfacePresentModesKHR(devs[devid], g->surface, &presentModeCount, presentModes); + checkFatal(res, "vkGetPhysicalDeviceSurfacePresentModesKHR"); + + VkExtent2D swapchainExtent; + // width and height are either both 0xFFFFFFFF, or both not 0xFFFFFFFF. + if (surfCapabilities.currentExtent.width == 0xFFFFFFFF) { + // If the surface size is undefined, the size is set to + // the size of the images requested. + swapchainExtent.width = clampi(g->width, surfCapabilities.minImageExtent.width, surfCapabilities.maxImageExtent.width); + swapchainExtent.height = clampi(g->height, surfCapabilities.minImageExtent.height, surfCapabilities.maxImageExtent.height); + } else { + // If the surface size is defined, the swap chain size must match + swapchainExtent = surfCapabilities.currentExtent; + } + + VkCompositeAlphaFlagBitsKHR compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + VkCompositeAlphaFlagBitsKHR compositeAlphaFlags[4] = { + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR, + VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, + VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR, + }; + for (uint32_t i = 0; i < sizeof(compositeAlphaFlags) / sizeof(compositeAlphaFlags[0]); i++) { + if (surfCapabilities.supportedCompositeAlpha & compositeAlphaFlags[i]) { + compositeAlpha = compositeAlphaFlags[i]; + break; + } + } + + VkSwapchainCreateInfoKHR chaininfo = { + .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, + .surface = g->surface, + .minImageCount = surfCapabilities.minImageCount, + .imageFormat = format, + .imageExtent = swapchainExtent, + .preTransform = (surfCapabilities.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR) + ? VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR : surfCapabilities.currentTransform, + .compositeAlpha = compositeAlpha, + .imageArrayLayers = 1, + .presentMode = VK_PRESENT_MODE_FIFO_KHR, + .clipped = VK_TRUE, + .imageColorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR, + .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, + // assumes queues are same. + .queueFamilyIndexCount = 0, + }; + + res = vkCreateSwapchainKHR(g->device, &chaininfo, NULL, &g->chain); + checkFatal(res, "vkCreateSwapchainKHR"); + + res = vkGetSwapchainImagesKHR(g->device, g->chain, &g->chainImageCount, NULL); + checkFatal(res, "vkGetSwapchainImagesKHR"); + + g->chainImage = malloc(sizeof(*g->chainImage) * g->chainImageCount); + g->chainImageView = malloc(sizeof(*g->chainImage) * g->chainImageCount); + + res = vkGetSwapchainImagesKHR(g->device, g->chain, &g->chainImageCount, g->chainImage); + + VkImageViewCreateInfo viewinfo = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = format, + .components.r = VK_COMPONENT_SWIZZLE_R, + .components.g = VK_COMPONENT_SWIZZLE_G, + .components.b = VK_COMPONENT_SWIZZLE_B, + .components.a = VK_COMPONENT_SWIZZLE_A, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }; + for (uint32_t i = 0; i < g->chainImageCount; i++) { + viewinfo.image = g->chainImage[i]; + + res = vkCreateImageView(g->device, &viewinfo, NULL, &g->chainImageView[i]); + checkFatal(res, "vkCreateImageView"); + } + + g->chainImageFormat = format; +} + +static void init_device_queue(struct state *g) +{ + vkGetDeviceQueue(g->device, g->graphics_queue_index, 0, &g->graphics_queue); + if (g->graphics_queue_index == g->present_queue_index) { + g->present_queue = g->graphics_queue; + } else { + vkGetDeviceQueue(g->device, g->present_queue_index, 0, &g->present_queue); + } +} + +static void init_command(struct state *g) +{ + VkResult res; + VkCommandPoolCreateInfo poolinfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .queueFamilyIndex = g->graphics_queue_index, + }; + + res = vkCreateCommandPool(g->device, &poolinfo, NULL, &g->cmd_pool); + checkFatal(res, "vkCreateCommandPool"); + + VkCommandBufferAllocateInfo cmdinfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = g->cmd_pool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }; + + res = vkAllocateCommandBuffers(g->device, &cmdinfo, &g->cmd); + checkFatal(res, "vkCAllocateCommandBuffers"); +} + +static void init_depth(struct state *g) +{ + VkResult res; + VkMemoryRequirements req; + VkImageCreateInfo imageinfo = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = NULL, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_D16_UNORM, + .extent.width = g->width, + .extent.height = g->height, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .samples = NUM_SAMPLES, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = NULL, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .flags = 0, + }; + res = vkCreateImage(g->device, &imageinfo, NULL, &g->depthImage); + checkFatal(res, "vkCreateImage"); + + vkGetImageMemoryRequirements(g->device, g->depthImage, &req); + VkMemoryAllocateInfo alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = req.size, + .memoryTypeIndex = find_memory_type(&g->memory_properties, req.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), + }; + res = vkAllocateMemory(g->device, &alloc, NULL, &g->depthMemory); + checkFatal(res, "vkAllocateMemory"); + + res = vkBindImageMemory(g->device, g->depthImage, g->depthMemory, 0); + checkFatal(res, "vkBindImageMemory"); + + VkImageViewCreateInfo view_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = g->depthImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = VK_FORMAT_D16_UNORM, + .components = + { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A}, + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }; + res = vkCreateImageView(g->device, &view_info, NULL, &g->depthView); + checkFatal(res, "vkCreateImageView"); + + g->depthFormat = imageinfo.format; +} + +static void init_uniform(struct state *g) +{ + VkResult res; + VkBufferCreateInfo buf_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .size = sizeof(g->mvp), + .sharingMode = VK_SHARING_MODE_EXCLUSIVE + }; + res = vkCreateBuffer(g->device, &buf_info, NULL, &g->uniformBuffer); + checkFatal(res, "vkCreateBuffer (uniform)"); + + VkMemoryRequirements req; + vkGetBufferMemoryRequirements(g->device, g->uniformBuffer, &req); + + VkMemoryAllocateInfo alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = req.size, + .memoryTypeIndex = find_memory_type(&g->memory_properties, req.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), + }; + res = vkAllocateMemory(g->device, &alloc, NULL, &g->uniformMemory); + checkFatal(res, "vkAllocateMemory (uniform)"); + + void *mem; + res = vkMapMemory(g->device, g->uniformMemory, 0, req.size, 0, &mem); + checkFatal(res, "vkMapMemory (uniform)"); + memcpy(mem, g->mvp, sizeof(g->mvp)); + vkUnmapMemory(g->device, g->uniformMemory); + + res = vkBindBufferMemory(g->device, g->uniformBuffer, g->uniformMemory, 0); + checkFatal(res, "vkBindBufferMemory (uniform)"); + + g->uniformInfo.buffer = g->uniformBuffer; + g->uniformInfo.offset = 0; + g->uniformInfo.range = sizeof(g->mvp); +} + +static void init_descriptor(struct state *g) +{ + VkResult res; + VkDescriptorSetLayoutBinding layout_binding = { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + }; + VkDescriptorSetLayoutCreateInfo descriptor_layout = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = &layout_binding, + }; + res = vkCreateDescriptorSetLayout(g->device, &descriptor_layout, NULL, g->desc_layout); + checkFatal(res, "vkCreateDescriptorSetLayout"); + + VkPipelineLayoutCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = NULL, + .pushConstantRangeCount = 0, + .pPushConstantRanges = NULL, + .setLayoutCount = 1, + .pSetLayouts = g->desc_layout, + }; + + res = vkCreatePipelineLayout(g->device, &pipeline_info, NULL, &g->pipeline_layout); + checkFatal(res, "vkCreatePipelineLayout"); + + VkDescriptorPoolSize type_count[1]; + type_count[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + type_count[0].descriptorCount = 1; + + VkDescriptorPoolCreateInfo descriptor_pool = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = type_count, + }; + + res = vkCreateDescriptorPool(g->device, &descriptor_pool, NULL, &g->desc_pool); + checkFatal(res, "vkCreateDescriptorPool"); + + VkDescriptorSetAllocateInfo alloc_info[] = { + { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = g->desc_pool, + .descriptorSetCount = NUM_DESCRIPTOR_SETS, + .pSetLayouts = g->desc_layout, + }, + }; + res = vkAllocateDescriptorSets(g->device, alloc_info, g->desc_set); + checkFatal(res, "vkAllocateDescriptorSets"); + + VkWriteDescriptorSet writes[] = { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = g->desc_set[0], + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .pBufferInfo = &g->uniformInfo, + .dstArrayElement = 0, + .dstBinding = 0, + } + }; + + vkUpdateDescriptorSets(g->device, 1, writes, 0, NULL); + +} + +static void init_render(struct state *g) +{ + VkResult res; + VkAttachmentDescription attachments[] = { + { + .format = g->chainImageFormat, + .samples = NUM_SAMPLES, + .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + .flags = 0, + }, + { + .format = g->depthFormat, + .samples = NUM_SAMPLES, + .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, + .storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + .flags = 0, + }, + }; + VkAttachmentReference color_reference = { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }; + + VkAttachmentReference depth_reference = { + .attachment = 1, + .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + }; + VkSubpassDescription subpass = { + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .colorAttachmentCount = 1, + .pColorAttachments = &color_reference, + .pDepthStencilAttachment = &depth_reference, + }; + VkRenderPassCreateInfo rp_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = sizeof(attachments) / sizeof(attachments[0]), + .pAttachments = attachments, + .subpassCount = 1, + .pSubpasses = &subpass, + }; + res = vkCreateRenderPass(g->device, &rp_info, NULL, &g->render_pass); + checkFatal(res, "vkCreateRenderPass"); +} + +static void init_framebuffer(struct state *g) +{ + VkResult res; + VkImageView attachments[] = { + NULL, + g->depthView, + }; + + VkFramebufferCreateInfo fb_info = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .renderPass = g->render_pass, + .attachmentCount = 2, + .pAttachments = attachments, + .width = g->width, + .height = g->height, + .layers = 1, + }; + + g->framebuffers = malloc(sizeof(*g->framebuffers) * g->chainImageCount); + for (int i = 0; i < g->chainImageCount; i++) { + attachments[0] = g->chainImageView[i]; + res = vkCreateFramebuffer(g->device, &fb_info, NULL, &g->framebuffers[i]); + } +} + +static void init_vertexbuffer(struct state *g) +{ + // mostly same as uniform + VkResult res; + const void *data = g_vb_solid_face_colours_Data; + size_t dataSize = sizeof(g_vb_solid_face_colours_Data); + VkBufferCreateInfo buf_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .size = dataSize, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE + }; + res = vkCreateBuffer(g->device, &buf_info, NULL, &g->vertexBuffer); + checkFatal(res, "vkCreateBuffer (vertex)"); + + VkMemoryRequirements req; + vkGetBufferMemoryRequirements(g->device, g->vertexBuffer, &req); + + VkMemoryAllocateInfo alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = req.size, + .memoryTypeIndex = find_memory_type(&g->memory_properties, req.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), + }; + res = vkAllocateMemory(g->device, &alloc, NULL, &g->vertexMemory); + checkFatal(res, "vkAllocateMemory (vertex)"); + + void *mem; + res = vkMapMemory(g->device, g->vertexMemory, 0, req.size, 0, &mem); + checkFatal(res, "vkMapMemory (vertex)"); + memcpy(mem, data, dataSize); + vkUnmapMemory(g->device, g->vertexMemory); + + res = vkBindBufferMemory(g->device, g->vertexBuffer, g->vertexMemory, 0); + checkFatal(res, "vkBindBufferMemory (vertex)"); + + /* ***************************************** */ + g->vi_binding.binding = 0; + g->vi_binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + g->vi_binding.stride = sizeof(Vertex); + + g->vi_attribs[0].binding = 0; + g->vi_attribs[0].location = 0; + g->vi_attribs[0].format = VK_FORMAT_R32G32B32A32_SFLOAT; + g->vi_attribs[0].offset = 0; + g->vi_attribs[1].binding = 0; + g->vi_attribs[1].location = 1; + g->vi_attribs[1].format = VK_FORMAT_R32G32B32A32_SFLOAT; + g->vi_attribs[1].offset = 16; +} + +static void init_pipeline(struct state *g) +{ + VkResult res; + VkDynamicState dynamicStateEnables[VK_DYNAMIC_STATE_RANGE_SIZE] = {}; + + VkPipelineDynamicStateCreateInfo dynamicState = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pDynamicStates = dynamicStateEnables, + .dynamicStateCount = 0, + }; + + VkPipelineVertexInputStateCreateInfo vi = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .flags = 0, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &g->vi_binding, + .vertexAttributeDescriptionCount = 2, + .pVertexAttributeDescriptions = g->vi_attribs + }; + + VkPipelineInputAssemblyStateCreateInfo ia = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .flags = 0, + .primitiveRestartEnable = VK_FALSE, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + }; + + VkPipelineRasterizationStateCreateInfo rs = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .flags = 0, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_BACK_BIT, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthClampEnable = VK_TRUE, + .rasterizerDiscardEnable = VK_FALSE, + .depthBiasEnable = VK_FALSE, + .depthBiasConstantFactor = 0, + .depthBiasClamp = 0, + .depthBiasSlopeFactor = 0, + .lineWidth = 1.0f, + }; + + VkPipelineColorBlendAttachmentState att_state[] = { + { + .colorWriteMask = 0xf, + .blendEnable = VK_FALSE, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + } + }; + + VkPipelineColorBlendStateCreateInfo cb = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .flags = 0, + .attachmentCount = 1, + .pAttachments = att_state, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_NO_OP, + .blendConstants[0] = 1.0f, + .blendConstants[1] = 1.0f, + .blendConstants[2] = 1.0f, + .blendConstants[3] = 1.0f, + }; + + VkPipelineViewportStateCreateInfo vp = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .flags = 0, + .viewportCount = 1, + .scissorCount = 1, + }; + dynamicStateEnables[dynamicState.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT; + dynamicStateEnables[dynamicState.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR; + + VkPipelineDepthStencilStateCreateInfo ds = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .flags = 0, + .depthTestEnable = VK_TRUE, + .depthWriteEnable = VK_TRUE, + .depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL, + .depthBoundsTestEnable = VK_FALSE, + .minDepthBounds = 0, + .maxDepthBounds = 0, + .stencilTestEnable = VK_FALSE, + .back.failOp = VK_STENCIL_OP_KEEP, + .back.passOp = VK_STENCIL_OP_KEEP, + .back.compareOp = VK_COMPARE_OP_ALWAYS, + .back.compareMask = 0, + .back.reference = 0, + .back.depthFailOp = VK_STENCIL_OP_KEEP, + .back.writeMask = 0, + .front.failOp = VK_STENCIL_OP_KEEP, + .front.passOp = VK_STENCIL_OP_KEEP, + .front.compareOp = VK_COMPARE_OP_ALWAYS, + .front.compareMask = 0, + .front.reference = 0, + .front.depthFailOp = VK_STENCIL_OP_KEEP, + .front.writeMask = 0, + }; + + VkPipelineMultisampleStateCreateInfo ms = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .flags = 0, + .pSampleMask = NULL, + .rasterizationSamples = NUM_SAMPLES, + .sampleShadingEnable = VK_FALSE, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + .minSampleShading = 0.0, + }; + + { + VkShaderModuleCreateInfo vsInfo = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .codeSize = sizeof(cube_vs), + .pCode = cube_vs, + }; + VkShaderModuleCreateInfo fsInfo = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .codeSize = sizeof(cube_fs), + .pCode = cube_fs, + }; + + res = vkCreateShaderModule(g->device, &vsInfo, NULL, &g->shader[0]); + checkFatal(res, "vkCreateShaderModule (vs)"); + res = vkCreateShaderModule(g->device, &fsInfo, NULL, &g->shader[1]); + checkFatal(res, "vkCreateShaderModule (fs)"); + g->shaderCount = 2; + } + + VkPipelineShaderStageCreateInfo shaderStages[] = { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .pName = "main", + .module = g->shader[0], + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .pName = "main", + .module = g->shader[1], + } + }; + + VkGraphicsPipelineCreateInfo pipeline = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .layout = g->pipeline_layout, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + .flags = 0, + .pVertexInputState = &vi, + .pInputAssemblyState = &ia, + .pRasterizationState = &rs, + .pColorBlendState = &cb, + .pTessellationState = NULL, + .pMultisampleState = &ms, + .pDynamicState = &dynamicState, + .pViewportState = &vp, + .pDepthStencilState = &ds, + .pStages = shaderStages, + .stageCount = 2, + .renderPass = g->render_pass, + .subpass = 0, + }; + + res = vkCreateGraphicsPipelines(g->device, NULL, 1, &pipeline, NULL, g->pipeline); + checkFatal(res, "vkCreateGraphicsPipeline"); + + VkSemaphoreCreateInfo seminfo = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + }; + res = vkCreateSemaphore(g->device, &seminfo, NULL, &g->chainSemaphore); + checkFatal(res, "vkCreateSemaphore"); + + VkFenceCreateInfo fenceInfo = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO + }; + res = vkCreateFence(g->device, &fenceInfo, NULL, &g->drawFence); + +} + +void cmd_viewport(struct state *g) +{ + VkViewport viewport = { + .x = 0, + .y = 0, + .width = g->width, + .height = g->height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + vkCmdSetViewport(g->cmd, 0, 1, &viewport); +} + +void cmd_scissors(struct state *g) +{ + VkRect2D scissor = { + .offset.x = 0, + .offset.y = 0, + .extent.width = g->width, + .extent.height = g->height + }; + vkCmdSetScissor(g->cmd, 0, 1, &scissor); +} + +static void cmd_paint(struct state *g) +{ + VkResult res; + + res = vkAcquireNextImageKHR(g->device, g->chain, UINT64_MAX, g->chainSemaphore, VK_NULL_HANDLE, &g->chainIndex); + checkFatal(res, "vkAcquireNextImageKHR"); + + const VkDeviceSize offsets[1] = {0}; + + VkClearValue clear_values[] = { + { + .color.float32[0] = 0.2f, + .color.float32[1] = 0.2f, + .color.float32[2] = 0.2f, + .color.float32[3] = 0.2f + }, + { + .depthStencil.depth = 1.0f, + .depthStencil.stencil = 0 + } + }; + VkRenderPassBeginInfo rp_begin = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = g->render_pass, + .framebuffer = g->framebuffers[g->chainIndex], + .renderArea.offset.x = 0, + .renderArea.offset.y = 0, + .renderArea.extent.width = g->width, + .renderArea.extent.height = g->height, + .clearValueCount = 2, + .pClearValues = clear_values, + }; + + vkCmdBeginRenderPass(g->cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + + vkCmdBindPipeline(g->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, g->pipeline[0]); + vkCmdBindDescriptorSets(g->cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, g->pipeline_layout, 0, NUM_DESCRIPTOR_SETS, g->desc_set, 0, NULL); + vkCmdBindVertexBuffers(g->cmd, 0, 1, &g->vertexBuffer, offsets); + + cmd_viewport(g); + cmd_scissors(g); + + vkCmdDraw(g->cmd, 12 * 3, 1, 0, 0); + vkCmdEndRenderPass(g->cmd); + + res = vkEndCommandBuffer(g->cmd); + checkFatal(res, "vkEndCommandBuffer"); + + const VkCommandBuffer cmd_bufs[] = {g->cmd}; + + VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkSubmitInfo submit_info[] = { + { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &g->chainSemaphore, + .pWaitDstStageMask = &pipe_stage_flags, + .commandBufferCount = 1, + .pCommandBuffers = cmd_bufs, + .signalSemaphoreCount = 0, + .pSignalSemaphores = NULL, + } + }; + /* Queue the command buffer for execution */ + vkResetFences(g->device, 1, &g->drawFence); + + res = vkQueueSubmit(g->graphics_queue, 1, submit_info, g->drawFence); + + // hmm, why is this needed, surely it can handle this itself? + + /* Make sure command buffer is finished before presenting */ + do { + res = vkWaitForFences(g->device, 1, &g->drawFence, VK_TRUE, FENCE_TIMEOUT); + } while (res == VK_TIMEOUT); + + /* Now present the image in the window */ + VkPresentInfoKHR present = { + .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + .swapchainCount = 1, + .pSwapchains = &g->chain, + .pImageIndices = &g->chainIndex, + .pWaitSemaphores = NULL, + .waitSemaphoreCount = 0, + .pResults = NULL, + }; + + res = vkQueuePresentKHR(g->present_queue, &present); +} + +static void shutdown(struct state *g) +{ + vkDestroyFence(g->device, g->drawFence, NULL); + vkDestroySemaphore(g->device, g->chainSemaphore, NULL); + + vkDestroyPipeline(g->device, g->pipeline[0], NULL); + for (int i = 0; i < g->shaderCount; i++) + vkDestroyShaderModule(g->device, g->shader[i], NULL); + + vkFreeMemory(g->device, g->vertexMemory, NULL); + vkDestroyBuffer(g->device, g->vertexBuffer, NULL); + + for (int i = 0; i < g->chainImageCount; i++) + vkDestroyFramebuffer(g->device, g->framebuffers[i], NULL); + + vkDestroyRenderPass(g->device, g->render_pass, NULL); + + vkDestroyDescriptorPool(g->device, g->desc_pool, NULL); + vkDestroyPipelineLayout(g->device, g->pipeline_layout, NULL); + vkDestroyDescriptorSetLayout(g->device, g->desc_layout[0], NULL); + + vkFreeMemory(g->device, g->uniformMemory, NULL); + + vkDestroyImageView(g->device, g->depthView, NULL); + vkFreeMemory(g->device, g->depthMemory, NULL); + vkDestroyImage(g->device, g->depthImage, NULL); + + vkDestroyBuffer(g->device, g->uniformBuffer, NULL); + + for (int i = 0; i < g->chainImageCount; i++) + vkDestroyImageView(g->device, g->chainImageView[i], NULL); + + vkDestroySwapchainKHR(g->device, g->chain, NULL); + + vkDestroyCommandPool(g->device, g->cmd_pool, NULL); + vkDestroyDevice(g->device, NULL); + + vkDestroySurfaceKHR(g->vk, g->surface, NULL); + + vkDestroyInstance(g->vk, NULL); +} + +int main(int argc, char** argv) +{ + struct state state = { + .width = 800, + .height = 800, + .clip = + { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, -1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.5f, 0.0f, + 0.0f, 0.0f, 0.5f, 1.0f + } + }; + float eye[3] = {-5, 3, -10}; + float centre[3] = {0, 0, 0}; + float up[3] = {0, -1, 0}; + float t0[16], t1[16]; + + perspective(state.projection, M_PI_4, 1.0f, 0.1f, 100.0f); + lookAt(state.view, eye, centre, up); + identity4f(state.model); + mult4x4f(t0, state.clip, state.projection); + mult4x4f(t1, t0, state.view); + mult4x4f(state.mvp, t1, state.model); + + init_instance(&state); + init_surface(&state); + + init_device(&state); + init_device_queue(&state); + init_command(&state); + + init_depth(&state); + init_uniform(&state); + init_descriptor(&state); + init_render(&state); + init_framebuffer(&state); + init_vertexbuffer(&state); + init_pipeline(&state); + + execute_begin_command_buffer(&state); + + cmd_paint(&state); + + printf("behold the prize!\n"); + sleep(2); + + shutdown(&state); + + return(EXIT_SUCCESS); +} diff --git a/cube/cube.frag b/cube/cube.frag new file mode 100644 index 0000000..de24544 --- /dev/null +++ b/cube/cube.frag @@ -0,0 +1,8 @@ +#version 400 +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable +layout (location = 0) in vec4 color; +layout (location = 0) out vec4 outColor; +void main() { + outColor = color; +} diff --git a/cube/cube.vert b/cube/cube.vert new file mode 100644 index 0000000..5d21e1e --- /dev/null +++ b/cube/cube.vert @@ -0,0 +1,14 @@ +#version 400 +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable +layout (std140, binding = 0) uniform bufferVals { + mat4 mvp; +} data; +layout (location = 0) in vec4 pos; +layout (location = 1) in vec4 inColor; +layout (location = 0) out vec4 outColor; + +void main() { + outColor = inColor; + gl_Position = data.mvp * pos; +} diff --git a/mandelbrot-cl/Makefile b/mandelbrot-cl/Makefile new file mode 100644 index 0000000..93559ef --- /dev/null +++ b/mandelbrot-cl/Makefile @@ -0,0 +1,12 @@ + +include ../config.make + +LDLIBS=$(compute_LDLIBS) +CFLAGS=$(compute_CFLAGS) + +all: mandelbrot +mandelbrot: mandelbrot.o +mandelbrot.o: mandelbrot.c mandelbrot.cl.inc + +clean: + rm -f *.o mandelbrot mandelbrot.cl.inc mandelbrot.cl.csv diff --git a/mandelbrot-cl/mandelbrot.c b/mandelbrot-cl/mandelbrot.c new file mode 100644 index 0000000..9ae7b11 --- /dev/null +++ b/mandelbrot-cl/mandelbrot.c @@ -0,0 +1,531 @@ + /* +The MIT License (MIT) + +Copyright (C) 2017 Eric Arnebäck +Copyright (C) 2019 Michael Zucchi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + + */ + +/* + * This is a C conversion of this: + * https://github.com/Erkaman/vulkan_minimal_compute + * + * In addition it's been made completely stand-alone + * and simplified further. C struct initialisation + * syntax is used heavily to reduce clutter. + */ + +#include +#include + +#include +#include + +// These 4 defines must match compute shader. The local work size +// must be a power of 2. They can be other values (up to the device +// limits) but they affect concurrency and efficiency of the gpu. +#define WIDTH (1920*2) +#define HEIGHT (1200*2) +#define LWS_X 8 +#define LWS_Y 8 + +// The compiled shader (SPIR-V binary) +#include "mandelbrot.cl.inc" +#define mandelbrot_entry "mandelbrot" + +#define ARRAY_SIZEOF(a) (sizeof(a)/sizeof(*a)) + +struct Pixel { + float r, g, b, a; +}; + +struct state { + VkInstance instance; + VkPhysicalDevice physicalDevice; + + VkDevice device; + VkQueue computeQueue; + + VkDeviceSize dstBufferSize; + VkBuffer dstBuffer; + VkDeviceMemory dstMemory; + + VkDescriptorSetLayout descriptorSetLayout; + VkDescriptorPool descriptorPool; + VkDescriptorSet descriptorSets[1]; + + VkShaderModule mandelbrotShader; + VkPipelineLayout pipelineLayout; + VkPipeline computePipeline[1]; + + VkCommandPool commandPool; + VkCommandBuffer commandBuffers[1]; + + uint32_t computeQueueIndex; + VkPhysicalDeviceMemoryProperties deviceMemoryProperties; +}; + +static void checkFatal(VkResult res, const char *cmd) +{ + if (res != VK_SUCCESS) { + fprintf(stderr, "%s: %d\n", cmd, res); + exit(EXIT_FAILURE); + } +} + +/** + * This finds the memory type index for the memory on a specific device. + */ +static int find_memory_type(VkPhysicalDeviceMemoryProperties *memory, uint32_t typeMask, VkMemoryPropertyFlagBits query) +{ + for (int i = 0; i < memory->memoryTypeCount; i++) { + if (((1 << i) & typeMask) && (memory->memoryTypes[i].propertyFlags & query) == query) + return i; + } + return -1; +} + +/** + * Round up to next nearest value of step size. Step must be a power of 2. + */ +static uint32_t round_up(uint32_t v, uint32_t step) +{ + return(v + step - 1)&~(step - 1); +} + +/** + * Create vulkan instance. + * + * Compute-only requires no extensions so this is simple. + * + * To turn on debugging layers: + * export VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_standard_validation + */ +static void init_instance(struct state *g) +{ + VkResult res; + VkApplicationInfo app = { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pApplicationName = "mandelbrot", + .pEngineName = "none", + .apiVersion = VK_API_VERSION_1_0, + }; + const char * const extensions[] = { + }; + VkInstanceCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pApplicationInfo = &app, + .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions), + .ppEnabledExtensionNames = extensions, + }; + + res = vkCreateInstance(&info, NULL, &g->instance); + checkFatal(res, "vkCreateInstance"); +} + +/** + * This finds a suitable device and queue family. + * + * In this case it is a device that supports a compute queue. It + * preferentially looks for a non-graphics compute queue. + * + * It could also make use of: + * VkPhysicalDeviceProperties props; + * vkGetPhysicalDeviceProperties(devs[i], &props); + */ +static void init_device(struct state *g) +{ + VkResult res; + uint32_t devcount; + + res = vkEnumeratePhysicalDevices(g->instance, &devcount, NULL); + VkPhysicalDevice devs[devcount]; + res = vkEnumeratePhysicalDevices(g->instance, &devcount, devs); + + int best = 0; + int devid = -1; + int queueid = -1; + + for (int i = 0; i < devcount; i++) { + uint32_t family_count; + + vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, NULL); + VkQueueFamilyProperties famprops[family_count]; + vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, famprops); + + for (uint32_t j = 0; j < family_count; j++) { + int score = 0; + + if ((famprops[j].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0) + score += 1; + if ((famprops[j].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0) + score += 1; + + if (score > best) { + score = best; + devid = i; + queueid = j; + } + } + } + + if (devid == -1) + checkFatal(VK_ERROR_FEATURE_NOT_PRESENT, "init_device"); + + g->physicalDevice = devs[devid]; + g->computeQueueIndex = queueid; + + float qpri[] = {0.0f}; + VkDeviceQueueCreateInfo qinfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .queueCount = 1, + .pQueuePriorities = qpri, // Note: cannot be null + .queueFamilyIndex = g->computeQueueIndex, + }; + VkDeviceCreateInfo devinfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .queueCreateInfoCount = 1, + .pQueueCreateInfos = &qinfo, + }; + + res = vkCreateDevice(g->physicalDevice, &devinfo, NULL, &g->device); + checkFatal(res, "vkCreateDevice"); + + /* These values are cached for convenience */ + vkGetPhysicalDeviceMemoryProperties(g->physicalDevice, &g->deviceMemoryProperties); + vkGetDeviceQueue(g->device, g->computeQueueIndex, 0, &g->computeQueue); +} + +/** + * Buffers are created in three steps: + * 1) create buffer, specifying usage and size + * 2) allocate memory based on memory requirements + * 3) bind memory + * + */ +static void init_buffer(struct state *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer *buffer, VkDeviceMemory *memory) +{ + VkResult res; + VkMemoryRequirements req; + VkBufferCreateInfo buf_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = dataSize, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE + }; + res = vkCreateBuffer(g->device, &buf_info, NULL, buffer); + checkFatal(res, "vkCreateBuffer"); + + vkGetBufferMemoryRequirements(g->device, *buffer, &req); + + VkMemoryAllocateInfo alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = req.size, + .memoryTypeIndex = find_memory_type(&g->deviceMemoryProperties, req.memoryTypeBits, properties) + }; + res = vkAllocateMemory(g->device, &alloc, NULL, memory); + checkFatal(res, "vkAllocateMemory"); + + res = vkBindBufferMemory(g->device, *buffer, *memory, 0); + checkFatal(res, "vkBindBufferMemory"); +} + +/** + * Descriptors are used to bind and describe memory blocks + * to shaders. + * + * *Pool is used to allocate descriptors, it is per-device. + * *Layout is used to group descriptors for a given pipeline, + * The descriptors describe individually-addressable blocks. + */ +static void init_descriptor(struct state *g) +{ + VkResult res; + /* Create descriptorset layout */ + VkDescriptorSetLayoutBinding layout_binding = { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + }; + VkDescriptorSetLayoutCreateInfo descriptor_layout = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = &layout_binding, + }; + res = vkCreateDescriptorSetLayout(g->device, &descriptor_layout, NULL, &g->descriptorSetLayout); + checkFatal(res, "vkCreateDescriptorSetLayout"); + + /* Create descriptor pool */ + VkDescriptorPoolSize type_count[] = { + { + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + } + }; + VkDescriptorPoolCreateInfo descriptor_pool = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .maxSets = 1, + .poolSizeCount = ARRAY_SIZEOF(type_count), + .pPoolSizes = type_count, + }; + + res = vkCreateDescriptorPool(g->device, &descriptor_pool, NULL, &g->descriptorPool); + checkFatal(res, "vkCreateDescriptorPool"); + + /* Allocate from pool */ + VkDescriptorSetAllocateInfo alloc_info[] = { + { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = g->descriptorPool, + .descriptorSetCount = 1, + .pSetLayouts = &g->descriptorSetLayout, + }, + }; + res = vkAllocateDescriptorSets(g->device, alloc_info, g->descriptorSets); + checkFatal(res, "vkAllocateDescriptorSets"); + + /* Bind a buffer to the descriptor */ + VkDescriptorBufferInfo bufferInfo = { + .buffer = g->dstBuffer, + .offset = 0, + .range = g->dstBufferSize, + }; + VkWriteDescriptorSet writeSet[] = { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = g->descriptorSets[0], + .dstBinding = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &bufferInfo, + } + }; + + vkUpdateDescriptorSets(g->device, ARRAY_SIZEOF(writeSet), writeSet, 0, NULL); +} + +/** + * Create the compute pipeline. This is the shader and data layouts for it. + */ +static void init_pipeline(struct state *g) +{ + VkResult res; + /* Set shader code */ + VkShaderModuleCreateInfo vsInfo = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .codeSize = sizeof(mandelbrot_cl), + .pCode = mandelbrot_cl, + }; + + res = vkCreateShaderModule(g->device, &vsInfo, NULL, &g->mandelbrotShader); + checkFatal(res, "vkCreateShaderModule"); + + /* Link shader to layout */ + VkPipelineLayoutCreateInfo pipelineinfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &g->descriptorSetLayout, + }; + + res = vkCreatePipelineLayout(g->device, &pipelineinfo, NULL, &g->pipelineLayout); + checkFatal(res, "vkCreatePipelineLayout"); + + /* Create pipeline */ + VkComputePipelineCreateInfo pipeline = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = g->mandelbrotShader, + .pName = mandelbrot_entry, + }, + .layout = g->pipelineLayout + + }; + res = vkCreateComputePipelines(g->device, NULL, 1, &pipeline, NULL, g->computePipeline); + checkFatal(res, "vkCreateComputePipeline"); +} + +/** + * Create a command buffer, this is somewhat like a display list. + */ +static void init_command_buffer(struct state *g) +{ + VkResult res; + VkCommandPoolCreateInfo poolinfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .queueFamilyIndex = g->computeQueueIndex, + }; + + res = vkCreateCommandPool(g->device, &poolinfo, NULL, &g->commandPool); + checkFatal(res, "vkCreateCommandPool"); + + VkCommandBufferAllocateInfo cmdinfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = g->commandPool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }; + + res = vkAllocateCommandBuffers(g->device, &cmdinfo, g->commandBuffers); + checkFatal(res, "vkAllocateCommandBuffers"); + + /* Fill command buffer with commands for later operation */ + VkCommandBufferBeginInfo beginInfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + res = vkBeginCommandBuffer(g->commandBuffers[0], &beginInfo); + checkFatal(res, "vkBeginCommandBuffer"); + + /* Bind the compute operation and data */ + vkCmdBindPipeline(g->commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, g->computePipeline[0]); + vkCmdBindDescriptorSets(g->commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, g->pipelineLayout, 0, 1, g->descriptorSets, 0, NULL); + + /* Run it */ + vkCmdDispatch(g->commandBuffers[0], round_up(WIDTH, LWS_X), round_up(HEIGHT, LWS_Y), 1); + + res = vkEndCommandBuffer(g->commandBuffers[0]); + checkFatal(res, "vkEndCommandBuffer"); +} + +/** + * Execute the pre-created command buffer. + * + * A fence is used to wait for completion. + */ +static void execute(struct state *g) +{ + VkResult res; + VkSubmitInfo submitInfo[] = { + { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = g->commandBuffers + }, + }; + + /* Create fence to mark the task completion */ + VkFence fence; + VkFenceCreateInfo fenceInfo = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO + }; + res = vkCreateFence(g->device, &fenceInfo, NULL, &fence); + checkFatal(res, "vkCreateFence"); + + /* Await completion */ + res = vkQueueSubmit(g->computeQueue, ARRAY_SIZEOF(submitInfo), submitInfo, fence); + checkFatal(res, "vkQueueSubmit"); + + do { + res = vkWaitForFences(g->device, 1, &fence, VK_TRUE, 1000000); + } while (res == VK_TIMEOUT); + + vkDestroyFence(g->device, fence, NULL); +} + +/** + * Trivial pnm format image output. + */ +static void pam_save(const char *name, unsigned int width, unsigned int height, unsigned int depth, const uint8_t *pixels) +{ + FILE *fp = fopen(name, "wb"); + + if (fp) { + fprintf(fp, "P6\n%d\n%d\n255\n", width, height); + fwrite(pixels, depth, width*height, fp); + fclose(fp); + printf("wrote: %s\n", name); + } +} + +/** + * Accesses the gpu buffer, converts it to RGB byte, and saves it as a pam file. + */ +void save_result(struct state *g) +{ + VkResult res; + struct Pixel *dst; + + res = vkMapMemory(g->device, g->dstMemory, 0, g->dstBufferSize, 0, (void **)&dst); + checkFatal(res, "vkMapMemory"); + + uint8_t *pixels = malloc(WIDTH * HEIGHT * 3); + + // this is super-slow! + for (int i = 0; i < WIDTH * HEIGHT; i++) { + pixels[i * 3 + 0] = (uint8_t)(255.0f * dst[i].r); + pixels[i * 3 + 1] = (uint8_t)(255.0f * dst[i].g); + pixels[i * 3 + 2] = (uint8_t)(255.0f * dst[i].b); + } + + vkUnmapMemory(g->device, g->dstMemory); + + pam_save("mandelbrot.pam", WIDTH, HEIGHT, 3, pixels); + free(pixels); +} + +static void shutdown(struct state *g) +{ + vkDestroyCommandPool(g->device, g->commandPool, NULL); + + vkDestroyPipeline(g->device, g->computePipeline[0], NULL); + vkDestroyPipelineLayout(g->device, g->pipelineLayout, NULL); + vkDestroyShaderModule(g->device, g->mandelbrotShader, NULL); + + vkDestroyDescriptorPool(g->device, g->descriptorPool, NULL); + vkDestroyDescriptorSetLayout(g->device, g->descriptorSetLayout, NULL); + + vkFreeMemory(g->device, g->dstMemory, NULL); + vkDestroyBuffer(g->device, g->dstBuffer, NULL); + + vkDestroyDevice(g->device, NULL); + vkDestroyInstance(g->instance, NULL); +} + +int main(int argc, char** argv) +{ + struct state g = { + .dstBufferSize = WIDTH * HEIGHT * sizeof(struct Pixel), + }; + + init_instance(&g); + init_device(&g); + + init_buffer(&g, g.dstBufferSize, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &g.dstBuffer, &g.dstMemory); + + init_descriptor(&g); + init_pipeline(&g); + init_command_buffer(&g); + + execute(&g); + + save_result(&g); + + shutdown(&g); + + return(EXIT_SUCCESS); +} diff --git a/mandelbrot-cl/mandelbrot.cl b/mandelbrot-cl/mandelbrot.cl new file mode 100644 index 0000000..7031cbe --- /dev/null +++ b/mandelbrot-cl/mandelbrot.cl @@ -0,0 +1,50 @@ + +#define WIDTH (1920*2) +#define HEIGHT (1200*2) +#define LWS_X 8 +#define LWS_Y 8 + +kernel void +__attribute__((reqd_work_group_size(LWS_X, LWS_Y, 1))) +mandelbrot(global float4 *imageData) { + + /* + In order to fit the work into workgroups, some unnecessary threads are launched. + We terminate those threads here. + */ + if(get_global_id(0) >= WIDTH || get_global_id(1) >= HEIGHT) + return; + + float x = convert_float(get_global_id(0)) / WIDTH; + float y = convert_float(get_global_id(1)) / HEIGHT; + + /* + What follows is code for rendering the mandelbrot set. + */ + float2 uv = (float2)(x, (y - 0.5f) * (12.0f / 19.0f) + 0.5f); + float n = 0.0f; + float2 c = (float2)(-0.445f, 0.0f) + (uv - 0.5f)*(4.0f); + const int M = 1000; + + for (int i = 0; i 4) + break; + n++; + } + + // we use a simple cosine palette to determine color: + // http://iquilezles.org/www/articles/palettes/palettes.htm + float t = n * 10.0f / M; + float3 d = (float3)(0.5f, 0.5f, 0.5f); + float3 e = (float3)(0.5f, 0.5f, 0.5f); + float3 f = (float3)(1.0f, 1.0f, 1.0f); + float3 g = (float3)(0.00f, 0.33f, 0.67f); + float4 color = (float4)( d + e * cos(6.28318f * (f * t + g)), 1.0f); + + if (convert_int(n) == M) + color = (float4)(0, 0, 0, 1); + + // store the rendered mandelbrot set into a storage buffer: + imageData[WIDTH * get_global_id(1) + get_global_id(0)] = color; +} diff --git a/mandelbrot-cs/Makefile b/mandelbrot-cs/Makefile new file mode 100644 index 0000000..cb0d541 --- /dev/null +++ b/mandelbrot-cs/Makefile @@ -0,0 +1,13 @@ + +include ../config.make + +LDLIBS=$(compute_LDLIBS) +CFLAGS=$(compute_CFLAGS) + +all: mandelbrot +mandelbrot: mandelbrot.o +mandelbrot.o: mandelbrot.c mandelbrot.comp.inc + +clean: + rm -f *.o mandelbrot mandelbrot.comp.inc + diff --git a/mandelbrot-cs/mandelbrot.c b/mandelbrot-cs/mandelbrot.c new file mode 100644 index 0000000..64c14f2 --- /dev/null +++ b/mandelbrot-cs/mandelbrot.c @@ -0,0 +1,531 @@ + /* +The MIT License (MIT) + +Copyright (C) 2017 Eric Arnebäck +Copyright (C) 2019 Michael Zucchi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + + */ + +/* + * This is a C conversion of this: + * https://github.com/Erkaman/vulkan_minimal_compute + * + * In addition it's been made completely stand-alone + * and simplified further. C struct initialisation + * syntax is used heavily to reduce clutter. + */ + +#include +#include + +#include +#include + +// These 4 defines must match compute shader. The local work size +// must be a power of 2. They can be other values (up to the device +// limits) but they affect concurrency and efficiency of the gpu. +#define WIDTH (1920*2) +#define HEIGHT (1200*2) +#define LWS_X 8 +#define LWS_Y 8 + +// The compiled shader (SPIR-V binary) +#include "mandelbrot.comp.inc" +#define mandelbrot_entry "main" + +#define ARRAY_SIZEOF(a) (sizeof(a)/sizeof(*a)) + +struct Pixel { + float r, g, b, a; +}; + +struct state { + VkInstance instance; + VkPhysicalDevice physicalDevice; + + VkDevice device; + VkQueue computeQueue; + + VkDeviceSize dstBufferSize; + VkBuffer dstBuffer; + VkDeviceMemory dstMemory; + + VkDescriptorSetLayout descriptorSetLayout; + VkDescriptorPool descriptorPool; + VkDescriptorSet descriptorSets[1]; + + VkShaderModule mandelbrotShader; + VkPipelineLayout pipelineLayout; + VkPipeline computePipeline[1]; + + VkCommandPool commandPool; + VkCommandBuffer commandBuffers[1]; + + uint32_t computeQueueIndex; + VkPhysicalDeviceMemoryProperties deviceMemoryProperties; +}; + +static void checkFatal(VkResult res, const char *cmd) +{ + if (res != VK_SUCCESS) { + fprintf(stderr, "%s: %d\n", cmd, res); + exit(EXIT_FAILURE); + } +} + +/** + * This finds the memory type index for the memory on a specific device. + */ +static int find_memory_type(VkPhysicalDeviceMemoryProperties *memory, uint32_t typeMask, VkMemoryPropertyFlagBits query) +{ + for (int i = 0; i < memory->memoryTypeCount; i++) { + if (((1 << i) & typeMask) && (memory->memoryTypes[i].propertyFlags & query) == query) + return i; + } + return -1; +} + +/** + * Round up to next nearest value of step size. Step must be a power of 2. + */ +static uint32_t round_up(uint32_t v, uint32_t step) +{ + return(v + step - 1)&~(step - 1); +} + +/** + * Create vulkan instance. + * + * Compute-only requires no extensions so this is simple. + * + * To turn on debugging layers: + * export VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_standard_validation + */ +static void init_instance(struct state *g) +{ + VkResult res; + VkApplicationInfo app = { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pApplicationName = "mandelbrot", + .pEngineName = "none", + .apiVersion = VK_API_VERSION_1_0, + }; + const char * const extensions[] = { + }; + VkInstanceCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pApplicationInfo = &app, + .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions), + .ppEnabledExtensionNames = extensions, + }; + + res = vkCreateInstance(&info, NULL, &g->instance); + checkFatal(res, "vkCreateInstance"); +} + +/** + * This finds a suitable device and queue family. + * + * In this case it is a device that supports a compute queue. It + * preferentially looks for a non-graphics compute queue. + * + * It could also make use of: + * VkPhysicalDeviceProperties props; + * vkGetPhysicalDeviceProperties(devs[i], &props); + */ +static void init_device(struct state *g) +{ + VkResult res; + uint32_t devcount; + + res = vkEnumeratePhysicalDevices(g->instance, &devcount, NULL); + VkPhysicalDevice devs[devcount]; + res = vkEnumeratePhysicalDevices(g->instance, &devcount, devs); + + int best = 0; + int devid = -1; + int queueid = -1; + + for (int i = 0; i < devcount; i++) { + uint32_t family_count; + + vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, NULL); + VkQueueFamilyProperties famprops[family_count]; + vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, famprops); + + for (uint32_t j = 0; j < family_count; j++) { + int score = 0; + + if ((famprops[j].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0) + score += 1; + if ((famprops[j].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0) + score += 1; + + if (score > best) { + score = best; + devid = i; + queueid = j; + } + } + } + + if (devid == -1) + checkFatal(VK_ERROR_FEATURE_NOT_PRESENT, "init_device"); + + g->physicalDevice = devs[devid]; + g->computeQueueIndex = queueid; + + float qpri[] = {0.0f}; + VkDeviceQueueCreateInfo qinfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .queueCount = 1, + .pQueuePriorities = qpri, // Note: cannot be null + .queueFamilyIndex = g->computeQueueIndex, + }; + VkDeviceCreateInfo devinfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .queueCreateInfoCount = 1, + .pQueueCreateInfos = &qinfo, + }; + + res = vkCreateDevice(g->physicalDevice, &devinfo, NULL, &g->device); + checkFatal(res, "vkCreateDevice"); + + /* These values are cached for convenience */ + vkGetPhysicalDeviceMemoryProperties(g->physicalDevice, &g->deviceMemoryProperties); + vkGetDeviceQueue(g->device, g->computeQueueIndex, 0, &g->computeQueue); +} + +/** + * Buffers are created in three steps: + * 1) create buffer, specifying usage and size + * 2) allocate memory based on memory requirements + * 3) bind memory + * + */ +static void init_buffer(struct state *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer *buffer, VkDeviceMemory *memory) +{ + VkResult res; + VkMemoryRequirements req; + VkBufferCreateInfo buf_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = dataSize, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE + }; + res = vkCreateBuffer(g->device, &buf_info, NULL, buffer); + checkFatal(res, "vkCreateBuffer"); + + vkGetBufferMemoryRequirements(g->device, *buffer, &req); + + VkMemoryAllocateInfo alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = req.size, + .memoryTypeIndex = find_memory_type(&g->deviceMemoryProperties, req.memoryTypeBits, properties) + }; + res = vkAllocateMemory(g->device, &alloc, NULL, memory); + checkFatal(res, "vkAllocateMemory"); + + res = vkBindBufferMemory(g->device, *buffer, *memory, 0); + checkFatal(res, "vkBindBufferMemory"); +} + +/** + * Descriptors are used to bind and describe memory blocks + * to shaders. + * + * *Pool is used to allocate descriptors, it is per-device. + * *Layout is used to group descriptors for a given pipeline, + * The descriptors describe individually-addressable blocks. + */ +static void init_descriptor(struct state *g) +{ + VkResult res; + /* Create descriptorset layout */ + VkDescriptorSetLayoutBinding layout_binding = { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + }; + VkDescriptorSetLayoutCreateInfo descriptor_layout = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = &layout_binding, + }; + res = vkCreateDescriptorSetLayout(g->device, &descriptor_layout, NULL, &g->descriptorSetLayout); + checkFatal(res, "vkCreateDescriptorSetLayout"); + + /* Create descriptor pool */ + VkDescriptorPoolSize type_count[] = { + { + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + } + }; + VkDescriptorPoolCreateInfo descriptor_pool = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .maxSets = 1, + .poolSizeCount = ARRAY_SIZEOF(type_count), + .pPoolSizes = type_count, + }; + + res = vkCreateDescriptorPool(g->device, &descriptor_pool, NULL, &g->descriptorPool); + checkFatal(res, "vkCreateDescriptorPool"); + + /* Allocate from pool */ + VkDescriptorSetAllocateInfo alloc_info[] = { + { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = g->descriptorPool, + .descriptorSetCount = 1, + .pSetLayouts = &g->descriptorSetLayout, + }, + }; + res = vkAllocateDescriptorSets(g->device, alloc_info, g->descriptorSets); + checkFatal(res, "vkAllocateDescriptorSets"); + + /* Bind a buffer to the descriptor */ + VkDescriptorBufferInfo bufferInfo = { + .buffer = g->dstBuffer, + .offset = 0, + .range = g->dstBufferSize, + }; + VkWriteDescriptorSet writeSet[] = { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = g->descriptorSets[0], + .dstBinding = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &bufferInfo, + } + }; + + vkUpdateDescriptorSets(g->device, ARRAY_SIZEOF(writeSet), writeSet, 0, NULL); +} + +/** + * Create the compute pipeline. This is the shader and data layouts for it. + */ +static void init_pipeline(struct state *g) +{ + VkResult res; + /* Set shader code */ + VkShaderModuleCreateInfo vsInfo = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .codeSize = sizeof(mandelbrot_cs), + .pCode = mandelbrot_cs, + }; + + res = vkCreateShaderModule(g->device, &vsInfo, NULL, &g->mandelbrotShader); + checkFatal(res, "vkCreateShaderModule"); + + /* Link shader to layout */ + VkPipelineLayoutCreateInfo pipelineinfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &g->descriptorSetLayout, + }; + + res = vkCreatePipelineLayout(g->device, &pipelineinfo, NULL, &g->pipelineLayout); + checkFatal(res, "vkCreatePipelineLayout"); + + /* Create pipeline */ + VkComputePipelineCreateInfo pipeline = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = g->mandelbrotShader, + .pName = mandelbrot_entry, + }, + .layout = g->pipelineLayout + + }; + res = vkCreateComputePipelines(g->device, NULL, 1, &pipeline, NULL, g->computePipeline); + checkFatal(res, "vkCreateComputePipeline"); +} + +/** + * Create a command buffer, this is somewhat like a display list. + */ +static void init_command_buffer(struct state *g) +{ + VkResult res; + VkCommandPoolCreateInfo poolinfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .queueFamilyIndex = g->computeQueueIndex, + }; + + res = vkCreateCommandPool(g->device, &poolinfo, NULL, &g->commandPool); + checkFatal(res, "vkCreateCommandPool"); + + VkCommandBufferAllocateInfo cmdinfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = g->commandPool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }; + + res = vkAllocateCommandBuffers(g->device, &cmdinfo, g->commandBuffers); + checkFatal(res, "vkAllocateCommandBuffers"); + + /* Fill command buffer with commands for later operation */ + VkCommandBufferBeginInfo beginInfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + res = vkBeginCommandBuffer(g->commandBuffers[0], &beginInfo); + checkFatal(res, "vkBeginCommandBuffer"); + + /* Bind the compute operation and data */ + vkCmdBindPipeline(g->commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, g->computePipeline[0]); + vkCmdBindDescriptorSets(g->commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, g->pipelineLayout, 0, 1, g->descriptorSets, 0, NULL); + + /* Run it */ + vkCmdDispatch(g->commandBuffers[0], round_up(WIDTH, LWS_X), round_up(HEIGHT, LWS_Y), 1); + + res = vkEndCommandBuffer(g->commandBuffers[0]); + checkFatal(res, "vkEndCommandBuffer"); +} + +/** + * Execute the pre-created command buffer. + * + * A fence is used to wait for completion. + */ +static void execute(struct state *g) +{ + VkResult res; + VkSubmitInfo submitInfo[] = { + { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = g->commandBuffers + }, + }; + + /* Create fence to mark the task completion */ + VkFence fence; + VkFenceCreateInfo fenceInfo = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO + }; + res = vkCreateFence(g->device, &fenceInfo, NULL, &fence); + checkFatal(res, "vkCreateFence"); + + /* Await completion */ + res = vkQueueSubmit(g->computeQueue, ARRAY_SIZEOF(submitInfo), submitInfo, fence); + checkFatal(res, "vkQueueSubmit"); + + do { + res = vkWaitForFences(g->device, 1, &fence, VK_TRUE, 1000000); + } while (res == VK_TIMEOUT); + + vkDestroyFence(g->device, fence, NULL); +} + +/** + * Trivial pnm format image output. + */ +static void pam_save(const char *name, unsigned int width, unsigned int height, unsigned int depth, const uint8_t *pixels) +{ + FILE *fp = fopen(name, "wb"); + + if (fp) { + fprintf(fp, "P6\n%d\n%d\n255\n", width, height); + fwrite(pixels, depth, width*height, fp); + fclose(fp); + printf("wrote: %s\n", name); + } +} + +/** + * Accesses the gpu buffer, converts it to RGB byte, and saves it as a pam file. + */ +void save_result(struct state *g) +{ + VkResult res; + struct Pixel *dst; + + res = vkMapMemory(g->device, g->dstMemory, 0, g->dstBufferSize, 0, (void **)&dst); + checkFatal(res, "vkMapMemory"); + + uint8_t *pixels = malloc(WIDTH * HEIGHT * 3); + + // this is super-slow! + for (int i = 0; i < WIDTH * HEIGHT; i++) { + pixels[i * 3 + 0] = (uint8_t)(255.0f * dst[i].r); + pixels[i * 3 + 1] = (uint8_t)(255.0f * dst[i].g); + pixels[i * 3 + 2] = (uint8_t)(255.0f * dst[i].b); + } + + vkUnmapMemory(g->device, g->dstMemory); + + pam_save("mandelbrot.pam", WIDTH, HEIGHT, 3, pixels); + free(pixels); +} + +static void shutdown(struct state *g) +{ + vkDestroyCommandPool(g->device, g->commandPool, NULL); + + vkDestroyPipeline(g->device, g->computePipeline[0], NULL); + vkDestroyPipelineLayout(g->device, g->pipelineLayout, NULL); + vkDestroyShaderModule(g->device, g->mandelbrotShader, NULL); + + vkDestroyDescriptorPool(g->device, g->descriptorPool, NULL); + vkDestroyDescriptorSetLayout(g->device, g->descriptorSetLayout, NULL); + + vkFreeMemory(g->device, g->dstMemory, NULL); + vkDestroyBuffer(g->device, g->dstBuffer, NULL); + + vkDestroyDevice(g->device, NULL); + vkDestroyInstance(g->instance, NULL); +} + +int main(int argc, char** argv) +{ + struct state g = { + .dstBufferSize = WIDTH * HEIGHT * sizeof(struct Pixel), + }; + + init_instance(&g); + init_device(&g); + + init_buffer(&g, g.dstBufferSize, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &g.dstBuffer, &g.dstMemory); + + init_descriptor(&g); + init_pipeline(&g); + init_command_buffer(&g); + + execute(&g); + + save_result(&g); + + shutdown(&g); + + return(EXIT_SUCCESS); +} diff --git a/mandelbrot-cs/mandelbrot.comp b/mandelbrot-cs/mandelbrot.comp new file mode 100644 index 0000000..ed13bf2 --- /dev/null +++ b/mandelbrot-cs/mandelbrot.comp @@ -0,0 +1,63 @@ +#version 450 + +#define WIDTH (1920*2) +#define HEIGHT (1200*2) +#define LWS_X 8 +#define LWS_Y 8 + +layout (local_size_x = LWS_X, local_size_y = LWS_Y, local_size_z = 1 ) in; + +struct Pixel{ + vec4 value; +}; + +layout(std140, binding = 0) buffer buf +{ + Pixel imageData[]; +}; + +void main() { + + /* + In order to fit the work into workgroups, some unnecessary threads are launched. + We terminate those threads here. + */ + if(gl_GlobalInvocationID.x >= WIDTH || gl_GlobalInvocationID.y >= HEIGHT) + return; + + float x = float(gl_GlobalInvocationID.x) / float(WIDTH); + float y = float(gl_GlobalInvocationID.y) / float(HEIGHT); + + /* + What follows is code for rendering the mandelbrot set. + */ + vec2 uv = vec2(x, (y - 0.5) * (12.0 / 19.0) + 0.5); + float n = 0.0; + vec2 c = vec2(-.445, 0.0) + (uv - 0.5)*(4.0); + vec2 z = vec2(0.0); + const int M = 1000; + + for (int i = 0; i 4) + break; + n++; + } + + // we use a simple cosine palette to determine color: + // http://iquilezles.org/www/articles/palettes/palettes.htm + float t = float(n) * 10.0 / float(M); + vec3 d = vec3(0.5, 0.5, 0.5); + vec3 e = vec3(0.5, 0.5, 0.5); + vec3 f = vec3(1.0, 1.0, 1.0); + vec3 g = vec3(0.00, 0.33, 0.67); + + vec4 color = vec4( d + e*cos( 6.28318*(f*t+g) ) ,1.0); + + if (n == M) + color = vec4(0, 0, 0, 1); + + // store the rendered mandelbrot set into a storage buffer: + imageData[WIDTH * gl_GlobalInvocationID.y + gl_GlobalInvocationID.x].value = color; +} diff --git a/util/glmaths.h b/util/glmaths.h new file mode 100644 index 0000000..55bbadf --- /dev/null +++ b/util/glmaths.h @@ -0,0 +1,137 @@ + +/* + Some basic GL maths routines, in C. + */ + +#ifndef GLMATHS_H +#define GLMATHS_H + +#include + +static __inline__ void identity4f(float *matrix) { + for (int i = 0; i < 16; i++) + matrix[i] = 0.0f; + for (int i = 0; i < 4; i++) + matrix[i * 4 + i] = 1.0f; +} + +static __inline__ float length3f(float * __restrict a) { + float sum = 0; + for (int i = 0; i < 3; i++) + sum += a[i] * a[i]; + return sqrtf(sum); +} + +static __inline__ void sub3f(float * __restrict c, float * __restrict a, float * __restrict b) { + for (int i = 0; i < 3; i++) + c[i] = a[i] - b[i]; +} + +static __inline__ void norm3f(float * __restrict vec) { + float fix = 1.0f / length3f(vec); + for (int i = 0; i < 3; i++) + vec[i] *= fix; +} + +static __inline__ void cross3f(float * __restrict c, float * __restrict a, float * __restrict b) { + c[0] = a[1] * b[2] - a[2] * b[1]; + c[1] = a[2] * b[0] - a[0] * b[2]; + c[2] = a[0] * b[1] - a[1] * b[0]; +} + +static __inline__ float dot3f(float * __restrict a, float * __restrict b) { + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; +} + +static float * mult4x4f(float * __restrict c, float * __restrict b, float * __restrict a) { + c[0] = a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12]; + c[1] = a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13]; + c[2] = a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14]; + c[3] = a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15]; + + c[4] = a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12]; + c[5] = a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13]; + c[6] = a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14]; + c[7] = a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15]; + + c[8] = a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12]; + c[9] = a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13]; + c[10] = a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14]; + c[11] = a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15]; + + c[12] = a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12]; + c[13] = a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13]; + c[14] = a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14]; + c[15] = a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15]; + + return c; +} + +static __inline__ void lookAt(float *mat, float *eye, float *centre, float *up) { + float forward[3], side[3], u[3]; + + sub3f(forward, centre, eye); + norm3f(forward); + cross3f(side, forward, up); + norm3f(side); + cross3f(u, side, forward); + + mat[0] = side[0]; + mat[4] = side[1]; + mat[8] = side[2]; + + mat[1] = u[0]; + mat[5] = u[1]; + mat[9] = u[2]; + + mat[2] = -forward[0]; + mat[6] = -forward[1]; + mat[10] = -forward[2]; + + mat[12] = -dot3f(side, eye); + mat[13] = -dot3f(u, eye); + mat[14] = dot3f(forward, eye); + + mat[3] = 0.0f; + mat[7] = 0.0f; + mat[11] = 0.0f; + + mat[15] = 1.0f; +} + +static __inline__ void frustum(float *mat, float left, float right, float bottom, float top, float znear, float zfar) { + float temp, temp2, temp3, temp4; + + temp = 2.0 * znear; + temp2 = right - left; + temp3 = top - bottom; + temp4 = zfar - znear; + mat[0] = temp / temp2; + mat[1] = 0.0; + mat[2] = 0.0; + mat[3] = 0.0; + mat[4] = 0.0; + mat[5] = temp / temp3; + mat[6] = 0.0; + mat[7] = 0.0; + mat[8] = (right + left) / temp2; + mat[9] = (top + bottom) / temp3; + mat[10] = (-zfar - znear) / temp4; + mat[11] = -1.0; + mat[12] = 0.0; + mat[13] = 0.0; + mat[14] = (-temp * zfar) / temp4; + mat[15] = 0.0; +} + +static __inline void perspective(float *mat, float fovy, float aspect, float znear, float zfar) { + float ymax, xmax; + + ymax = znear * tanf(fovy * 0.5f); + xmax = ymax * aspect; + + frustum(mat, -xmax, xmax, -ymax, ymax, znear, zfar); +} + +#endif + diff --git a/util/pam.h b/util/pam.h new file mode 100644 index 0000000..0a7fd6e --- /dev/null +++ b/util/pam.h @@ -0,0 +1,20 @@ + +#ifndef PAM_H +#define PAM_H + +/** + * Trivial pnm format image output. + */ +static void pam_save(const char *name, unsigned int width, unsigned int height, unsigned int depth, const uint8_t *pixels) +{ + FILE *fp = fopen(name, "wb"); + + if (fp) { + fprintf(fp, "P6\n%d\n%d\n255\n", width, height); + fwrite(pixels, depth, width*height, fp); + + fclose(fp); + } +} + +#endif diff --git a/util/timing.h b/util/timing.h new file mode 100644 index 0000000..bddf1d4 --- /dev/null +++ b/util/timing.h @@ -0,0 +1,30 @@ + +/* + Very basic timing utilities. + */ + +#ifndef TIMING_H + +#ifdef HAVE_CLOCK_GETTIME +#include + +static __inline__ uint64_t time_stamp(void) +{ + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + + return ts.tv_nsec + ts.tv_sec * 1000000000ULL; +} + +#define TIME_START uint64_t stamp = time_stamp() +#define TIME_PRINT printf(" %12.9f %s\n", 1E-9 * (time_stamp() - stamp), __FUNCTION__) +#define TIME_ITEM(x) printf(" %12.9f %s\n", 1E-9 * (time_stamp() - stamp), x) + +#else + +#define TIME_START do { } while(0) +#define TIME_PRINT do { } while(0) + +#endif + +#endif -- 2.39.5