By far most of the execution time is spent on the CPU side,
converting the float4 array to uchar3 for output.
+opencl-args/
+
+ How do you pass arguments?
+
LINKS
-----
--- /dev/null
+
+#.SUFFIXES:
+
+include ../config.make
+
+CFLAGS += -Wall -Og -g $(compute_CFLAGS)
+LDLIBS += $(compute_LDLIBS)
+
+all: hello
+
+hello.o: hello-host.h
+hello: hello.o hello-host.o compute.o zvk.o
+
+%-host.c %-host.h: %.cl.csv %.cl.spv compile-desc
+ ./compile-desc --prefix $*_ -h $*-host.h -o $*-host.c $*.cl.spv $*.cl.csv
+
+clean:
+ rm -f *.cl.csv *.cl.spv *.cl.inc
+ rm -f *-host.c *-host.h
+ rm -f *.o
+ rm -f hello
+
+.SECONDARY: hello.cl.csv hello.cl.spv
--- /dev/null
+#!/usr/bin/perl
+
+use List::Util qw[min max];
+
+# usage: compile-desc [--prefix func-prefix] -o output.c [ -h output.h ] file.spv file.csv
+
+$prefix = "";
+$outc = "";
+$outh = "";
+@files = ();
+
+while (@ARGV) {
+ my $arg = shift @ARGV;
+
+ if ($arg eq "--prefix") {
+ $prefix = shift @ARGV;
+ } elsif ($arg eq "-o") {
+ $outc = shift @ARGV;
+ } elsif ($arg eq "-h") {
+ $outh = shift @ARGV;
+ } else {
+ push @files, $arg;
+ }
+}
+
+$spv = $files[0];
+$desc = $files[1];
+
+die ("Missing descriptor file") if ($desc eq "");
+die ("Missing output file") if ($outc eq "");
+
+# This takes the opencl kernel plus the descriptorsets description and ... does something with it.
+#$unit = "arguments";
+#$desc = "arguments.cl.csv";
+#$src = "arguments.cl";
+
+%info = (
+ "prefix" => $prefix,
+ );
+
+# map for primitive types which need renaming
+%type_map = (
+ "uint32", "uint32_t",
+ "int32", "int32_t",
+ );
+
+%bind_map = (
+ "pod", "ZVK_POD",
+ "buffer", "ZVK_BUFFER",
+ "ro_image", "ZVK_ROIMAGE",
+ "wo_image", "ZVK_WOIMAGE",
+ "sampler", "ZVK_SAMPLER",
+ );
+
+%kernels = ();
+
+#
+# kernel{name} = (
+# @bindings = (
+# index = index
+# kind = pod | buffer
+# sizeof = total byte size for pod
+# )
+# @params = (
+# index = index
+# name = parameter name
+# type = opencl param type
+# binding = binding index
+# offset =
+# size = parameter size for pod
+# )
+# )
+
+# read description
+open IN,"<$desc";
+while (<IN>) {
+ chop;
+ if (m/^kernel,([^,]*),arg,([^,]*),argOrdinal,([^,]*),descriptorSet,([^,]*),binding,([^,]*),offset,([^,]*),argKind,([^,]*)(.*)/) {
+ my $kname = $1;
+ my $pname = $2;
+ my $index = $3;
+ my $set = $4;
+ my $bindid = $5;
+ my $offset = $6;
+ my $kind = $7;
+ my $rest = $8;
+ my %kern = %{$kernels{$kname}};
+ my %rec = %{@{$kern{params}}[$index]};
+
+ die ("cl parser <> descriptor mismatch") if ($rec{name} != $name);
+
+ $rec{name} = $pname;
+ $rec{index} = $index;
+ $rec{binding} = $bindid;
+ $rec{offset} = $offset;
+ ($rec{size}) = ($rest =~ m/argSize,(\d*)/);
+ ($rec{type}) = ($rest =~ m/argType,([^,]*)/);
+ @{$kern{params}}[$index] = \%rec;
+
+ my %bind = %{@{$kern{bindings}}[$bindid]};
+
+ $bind{index} = $bindid;
+ $bind{kind} = $kind;
+ if (defined $rec{size}) {
+ $bind{sizeof} = max($offset+$rec{size}, $bind{sizeof});
+ }
+
+ @{$kern{bindings}}[$bindid] = \%bind;
+
+ $kern{name} = $kname;
+
+ $kernels{$kname} = \%kern;
+ }
+}
+close IN;
+
+# get kernel arg types, unfortunately not available in the descriptor file
+# requires strict source format:
+# kernel void\n
+# func(args,...)\n
+if (0) {
+open IN,"<$src";
+$state = 0;
+while (<IN>) {
+ if ($state == 0) {
+ if (m/^kernel void/) {
+ $state = 1;
+ }
+ } else {
+ # this only includes kernels which are already defined
+ if (m/([^\(]*)\((.*)\)/) {
+ my $kname = $1;
+ my @args = $2 =~ m/\s*([^,]+)\s*,?/g;
+ my %kern = %{$kernels{$kname}};
+ my $i = 0;
+
+ for $arg (@args) {
+ my ($type, $name) = $arg =~ m/^(.*?) ?(\w*)$/;
+ my %rec = %{$kern{params}[$i]};
+
+ print STDERR "arg: $type <- $name\n";
+
+ $rec{type} = $type;
+
+ $kern{params}[$i] = \%rec;
+
+ $i++;
+ }
+ }
+ $state = 0;
+ }
+}
+close IN;
+}
+
+open C,">$outc" || die("opening $outc");
+
+print C <<END;
+/* autogenerated from $desc */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "compute.h"
+END
+
+if ($outh ne "") {
+ open H,">$outh" || die("open $outh");
+}
+
+# Data tables - kernels
+for $kname (sort keys %kernels) {
+ my %kern = %{$kernels{$kname}};
+ my @bindings = @{$kern{bindings}};
+ my @params = @{$kern{params}};
+
+ print C "static const struct bindinfo $info{prefix}$kern{name}_bindings[] = {\n";
+ for $binds (@bindings) {
+ my %bind = %{$binds};
+
+ print C " { $bind_map{$bind{kind}}, $bind{sizeof} },\n";
+ }
+ print C "};\n";
+
+ print C "static const struct paraminfo $info{prefix}$kern{name}_params[] = {\n";
+ for $recs (@params) {
+ my %rec = %{$recs};
+
+ print C " { $rec{binding}, $rec{offset}, $rec{size} },\n";
+ }
+ print C "};\n";
+
+ my $nbindings = $#bindings + 1;
+ my $nparams = $#params + 1;
+
+ print C <<END;
+static const struct kerninfo $info{prefix}$kern{name} = {
+ .name = "$kern{name}",
+ .nbindings = $nbindings,
+ .nparams = $nparams,
+ .bindings = $info{prefix}$kern{name}_bindings,
+ .params = $info{prefix}$kern{name}_params
+ };
+END
+}
+
+# Data tables - module
+open SPV,"<:raw", $spv;
+my $i = 0;
+print C "/* $spv */\n";
+print C "static const uint32_t module_spv[] = {\n";
+while (read SPV,my $code,4) {
+ printf C " 0x%08x,", unpack("V*", $code);
+ if (($i++) % 8 == 7) {
+ print C "\n";
+ }
+}
+print C "\n};\n";
+close SPV;
+
+#
+# module definition
+#
+$nkernels = keys %kernels;
+print C <<END;
+const struct modinfo $info{prefix}modinfo = {
+ .code = module_spv,
+ .codeSize = sizeof(module_spv),
+ .nkernels = $nkernels,
+END
+if (H) {
+ print H "extern const struct modinfo $info{prefix}modinfo;\n";
+}
+
+print C " .kernels = {\n";
+for $kname (sort keys %kernels) {
+ my %kern = %{$kernels{$kname}};
+
+ print C " &$info{prefix}$kern{name},\n";
+}
+print C " },\n";
+print C "};\n";
+
+#
+# create method to instantiate 1 kernel
+#
+if (0) {
+ for $kname (sort keys %kernels) {
+ my %kern = %{$kernels{$kname}};
+ my @bindings = @{$kern{bindings}};
+ my @params = @{$kern{params}};
+
+ my @buffers;
+ my @pods;
+ my @images;
+
+ print C "struct kernstate *$info{prefix}$kern{name}_create(struct zvk *zvk) {\n";
+ print C " return compute_create(zvk, &$info{prefix}$kern{name});\n";
+ print C "}\n\n";
+
+ if (H) {
+ print H "struct kernstate *$info{prefix}$kern{name}_create(struct zvk *zvk);\n";
+ }
+ }
+}
+
+# create method to update arguments
+#
+for $kname (sort keys %kernels) {
+ my %kern = %{$kernels{$kname}};
+ my @bindings = @{$kern{bindings}};
+ my @params = @{$kern{params}};
+
+ my @buffers;
+ my @pods;
+ my @images;
+
+ my $proto = "void $info{prefix}$kern{name}_setArgs(struct kernstate *state";
+
+ for $recs (@params) {
+ my %rec = %{$recs};
+ my %bind = %{$bindings[$rec{binding}]};
+
+ if ($bind{kind} eq "buffer") {
+ $proto .= ", VkDescriptorBufferInfo $rec{name}";
+ push @buffers, \%rec;
+ } elsif ($bind{kind} eq "pod") {
+ if (defined($type_map{$rec{type}})) {
+ $proto .= ", $type_map{$rec{type}} $rec{name}";
+ push @pods, \%rec;
+ } else {
+ $proto .= ", $rec{type} $rec{name}";
+ push @pods, \%rec;
+ }
+ } elsif ($bind{kind} eq "ro_image" || $bind{kind} eq "wo_image") {
+ $proto .= ", VkDescriptorImageInfo $rec{name}";
+ push @images, \%rec;
+ }
+ }
+ $proto .= ")";
+ print C "$proto {\n";
+ if (H) {
+ print H "$proto;\n";
+ }
+ print C " struct zvk *zvk = state->mod->zvk;\n";
+
+ if (@buffers) {
+ print C " /* update buffer pointers */\n";
+ print C " VkDescriptorBufferInfo *bufferSet[] = {\n";
+ for $recs (@buffers) {
+ my %rec = %{$recs};
+ print C " &$rec{name},\n";
+ }
+ print C " };\n\n";
+ print C " compute_setBuffers(state, bufferSet);\n";
+ }
+
+ if (@images) {
+ print C " /* update image pointers */\n";
+ print C " VkDescriptorImageInfo *imageSet[] = {\n";
+ for $recs (@images) {
+ my %rec = %{$recs};
+ print C " &$rec{name},\n";
+ }
+ print C " };\n";
+ print C " compute_setImages(state, imageSet);\n";
+ }
+
+ if (@pods) {
+ my @bindpods = grep { %{$_}{kind} eq "pod" } @bindings;
+ my $podSize = %{$bindpods[0]}{sizeof};
+
+ print C " /* update pod memory */\n";
+ print C " void *pod __attribute__ ((aligned(16)));\n";
+ print C " ZVK_FATAL(vkMapMemory(zvk->device, state->podMemory, 0, $podSize, 0, &pod));\n";
+
+ for $recs (@pods) {
+ my %rec = %{$recs};
+
+ print C " memcpy(pod + $rec{offset}, &$rec{name}, $rec{size});\n";
+ }
+
+ print C " vkUnmapMemory(zvk->device, state->podMemory);\n";
+ }
+
+ print C "}\n\n";
+}
+
+if (H) {
+ close H;
+}
+close C;
+
--- /dev/null
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "compute.h"
+
+/* ********************************************************************** */
+
+static int mapDescriptorType(int binding) {
+ switch (binding) {
+ case ZVK_BUFFER:
+ case ZVK_POD:
+ return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+ case ZVK_ROIMAGE:
+ return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
+ case ZVK_WOIMAGE:
+ return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+ case ZVK_SAMPLER:
+ return VK_DESCRIPTOR_TYPE_SAMPLER;
+ }
+ return -1;
+}
+
+struct modstate *compute_createModule(struct zvk *zvk, const struct modinfo *mod) {
+ struct modstate *ms = calloc(1, sizeof(*ms));
+
+ ms->zvk = zvk;
+ ms->modinfo = mod;
+
+ VkShaderModuleCreateInfo createInfo = {
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .codeSize = mod->codeSize,
+ .pCode = mod->code,
+ };
+
+ ZVK_FATAL(vkCreateShaderModule(zvk->device, &createInfo, NULL, &ms->shader));
+
+ return ms;
+}
+
+void compute_destroyModule(struct modstate *mod) {
+ vkDestroyShaderModule(mod->zvk->device, mod->shader, NULL);
+ free(mod);
+}
+
+static int kernelIndex(const struct modinfo *mi, const char * name) {
+ for (int i=0;i<mi->nkernels;i++) {
+ if (strcmp(mi->kernels[i]->name, name) == 0)
+ return i;
+ }
+ return -1;
+}
+
+struct kernstate *compute_createKernel(struct modstate *mod, const char *name) {
+ int ki = kernelIndex(mod->modinfo, name);
+
+ if (ki < 0)
+ return NULL;
+
+ struct zvk *zvk = mod->zvk;
+ struct kernstate *ks = calloc(1, sizeof(*ks));
+ const struct kerninfo *kern = mod->modinfo->kernels[ki];
+
+ ks->mod = mod;
+ ks->kern = kern;
+
+ /* allocate data bindings */
+ VkDescriptorSetLayoutBinding layout_bindings[kern->nbindings];
+ VkDescriptorBindingFlagBitsEXT layout_bindings_flags[kern->nbindings];
+
+ for (int i=0;i<kern->nbindings;i++) {
+ memset(&layout_bindings[i], 0, sizeof(layout_bindings[0]));
+ layout_bindings_flags[i] = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT;
+ layout_bindings[i].binding = i;
+ layout_bindings[i].descriptorType = mapDescriptorType(kern->bindings[i].type);
+ layout_bindings[i].descriptorCount = 1;
+ layout_bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
+ }
+ VkDescriptorSetLayoutBindingFlagsCreateInfoEXT flags = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT,
+ .bindingCount = kern->nbindings,
+ .pBindingFlags = layout_bindings_flags,
+ };
+ VkDescriptorSetLayoutCreateInfo descriptor_layout = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = &flags,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT,
+ .bindingCount = kern->nbindings,
+ .pBindings = layout_bindings,
+ };
+
+ ZVK_FATAL(vkCreateDescriptorSetLayout(zvk->device, &descriptor_layout, NULL, &ks->descriptorSetLayout));
+
+ VkDescriptorSetAllocateInfo alloc_info[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = zvk->descriptorPool,
+ .descriptorSetCount = 1,
+ .pSetLayouts = &ks->descriptorSetLayout,
+ },
+ };
+
+ ZVK_FATAL(vkAllocateDescriptorSets(zvk->device, alloc_info, ks->descriptorSets));
+
+ /* Check for the (1) 'pod' binding, allocate memory for it */
+ for (int i=0;i<kern->nbindings;i++) {
+ if (kern->bindings[i].type == ZVK_POD) {
+ zvkAllocBuffer(zvk, kern->bindings[i].size,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ &ks->podBuffer, &ks->podMemory);
+
+ VkDescriptorBufferInfo bufferInfo = {
+ .buffer = ks->podBuffer,
+ .offset = 0,
+ .range = kern->bindings[i].size,
+ };
+ VkWriteDescriptorSet writeSet = {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = ks->descriptorSets[0],
+ .dstBinding = i,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &bufferInfo,
+ };
+ vkUpdateDescriptorSets(zvk->device, 1, &writeSet, 0, NULL);
+ break;
+ }
+ }
+
+ /* Create pipeline */
+ VkPipelineLayoutCreateInfo pipelineinfo = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &ks->descriptorSetLayout,
+ };
+
+ ZVK_FATAL(vkCreatePipelineLayout(zvk->device, &pipelineinfo, NULL, &ks->pipelineLayout));
+
+ VkComputePipelineCreateInfo pipeline = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage =
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = mod->shader,
+ .pName = kern->name,
+ },
+ .layout = ks->pipelineLayout
+
+ };
+ ZVK_FATAL(vkCreateComputePipelines(zvk->device, NULL, 1, &pipeline, NULL, &ks->pipeline));
+
+ return ks;
+}
+
+void compute_destroyKernel(struct kernstate *ks) {
+ struct zvk *zvk = ks->mod->zvk;
+
+ if (ks->podMemory) {
+ vkFreeMemory(zvk->device, ks->podMemory, NULL);
+ vkDestroyBuffer(zvk->device, ks->podBuffer, NULL);
+ }
+
+ vkDestroyPipeline(zvk->device, ks->pipeline, NULL);
+ vkDestroyPipelineLayout(zvk->device, ks->pipelineLayout, NULL);
+
+ vkFreeDescriptorSets(zvk->device, zvk->descriptorPool, 1, ks->descriptorSets);
+ vkDestroyDescriptorSetLayout(zvk->device, ks->descriptorSetLayout, NULL);
+
+ free(ks);
+}
+
+// one time optional?
+VkCommandBuffer compute_createCommand(struct kernstate *ks, uint32_t sizex, uint32_t sizey, uint32_t sizez) {
+ struct zvk *zvk = ks->mod->zvk;
+ VkCommandBuffer commandBuffers[1];
+
+ /* Create a command buffer to run this kernel with it's data set for the given size */
+ VkCommandBufferAllocateInfo cmdinfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .commandPool = zvk->commandPool,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = 1,
+ };
+
+ ZVK_FATAL(vkAllocateCommandBuffers(zvk->device, &cmdinfo, commandBuffers));
+
+ VkCommandBufferBeginInfo beginInfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ //.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ .flags = 0,
+ };
+ ZVK_FATAL(vkBeginCommandBuffer(commandBuffers[0], &beginInfo));
+
+ vkCmdBindPipeline(commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, ks->pipeline);
+ vkCmdBindDescriptorSets(commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, ks->pipelineLayout, 0, 1, ks->descriptorSets, 0, NULL);
+
+ vkCmdDispatch(commandBuffers[0], sizex, sizey, sizez);
+
+ ZVK_FATAL(vkEndCommandBuffer(commandBuffers[0]));
+
+ return commandBuffers[0];
+}
+
+// rather inefficient way to set arguments one at a time
+void compute_setArg(struct kernstate *ks, int index, void *data, size_t size) {
+ struct zvk *zvk = ks->mod->zvk;
+ const struct kerninfo *kern = ks->kern;
+ const struct paraminfo *pi = &kern->params[index];
+ const struct bindinfo *bi = &kern->bindings[pi->binding];
+
+ switch (bi->type) {
+ case ZVK_POD: {
+ void *pod __attribute__ ((aligned(16)));
+
+ ZVK_FATAL(!(size == pi->size));
+ ZVK_FATAL(vkMapMemory(zvk->device, ks->podMemory, 0, VK_WHOLE_SIZE, 0, &pod));
+ memcpy(pod + pi->offset, data, size);
+ vkUnmapMemory(zvk->device, ks->podMemory);
+ break;
+ }
+ case ZVK_BUFFER:
+ ZVK_FATAL(!(size == sizeof(VkDescriptorBufferInfo)));
+ VkWriteDescriptorSet bufferSet = {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = ks->descriptorSets[0],
+ .dstBinding = pi->binding,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = data
+ };
+ vkUpdateDescriptorSets(zvk->device, 1, &bufferSet, 0, NULL);
+ break;
+ case ZVK_WOIMAGE:
+ ZVK_FATAL(!(size == sizeof(VkDescriptorImageInfo)));
+ VkWriteDescriptorSet woSet = {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = ks->descriptorSets[0],
+ .dstBinding = pi->binding,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = data
+ };
+ vkUpdateDescriptorSets(zvk->device, 1, &woSet, 0, NULL);
+ break;
+ case ZVK_ROIMAGE:
+ ZVK_FATAL(!(size == sizeof(VkDescriptorImageInfo)));
+ VkWriteDescriptorSet roSet = {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = ks->descriptorSets[0],
+ .dstBinding = pi->binding,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo = data
+ };
+ vkUpdateDescriptorSets(zvk->device, 1, &roSet, 0, NULL);
+ break;
+ // shader?
+ }
+}
+
+void compute_setBuffers(struct kernstate *ks, VkDescriptorBufferInfo *buffers[]) {
+ VkWriteDescriptorSet writeSet = {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = ks->descriptorSets[0],
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ };
+ const struct kerninfo *kern = ks->kern;
+ struct zvk *zvk = ks->mod->zvk;
+
+ for (int i=0,bindex=0;i<kern->nparams;i++) {
+ const struct paraminfo *pi = &kern->params[i];
+ const struct bindinfo *bi = &kern->bindings[pi->binding];
+
+ if (bi->type == ZVK_BUFFER) {
+ writeSet.pBufferInfo = buffers[bindex];
+ writeSet.dstBinding = pi->binding;
+
+ vkUpdateDescriptorSets(zvk->device, 1, &writeSet, 0, NULL);
+ bindex++;
+ }
+ }
+}
+
+void compute_setImages(struct kernstate *ks, VkDescriptorImageInfo *images[]) {
+ VkWriteDescriptorSet writeSet = {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = ks->descriptorSets[0],
+ .descriptorCount = 1,
+ };
+ const struct kerninfo *kern = ks->kern;
+ struct zvk *zvk = ks->mod->zvk;
+
+ for (int i=0,bindex=0;i<kern->nparams;i++) {
+ const struct paraminfo *pi = &kern->params[i];
+ const struct bindinfo *bi = &kern->bindings[pi->binding];
+
+ if (bi->type == ZVK_WOIMAGE) {
+ writeSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+ } else if (bi->type == ZVK_ROIMAGE) {
+ writeSet.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
+ } else
+ continue;
+
+ writeSet.pImageInfo = images[bindex];
+ writeSet.dstBinding = pi->binding;
+
+ vkUpdateDescriptorSets(zvk->device, 1, &writeSet, 0, NULL);
+ bindex++;
+ }
+}
--- /dev/null
+
+#ifndef COMPUTE_H
+#define COMPUTE_H
+
+#include "zvk.h"
+
+/* ********************************************************************** */
+
+struct paraminfo {
+ int binding;
+ int offset;
+ int size;
+};
+
+struct bindinfo {
+ int type;
+ int size;
+};
+
+struct kerninfo {
+ const char *name;
+ int nbindings;
+ int nparams;
+ const struct bindinfo *bindings;
+ const struct paraminfo *params;
+};
+
+struct kernstate {
+ struct modstate *mod;
+ const struct kerninfo *kern;
+
+ VkDeviceMemory podMemory;
+ VkBuffer podBuffer;
+
+ VkDescriptorSetLayout descriptorSetLayout;
+ VkDescriptorSet descriptorSets[1];
+
+ VkPipelineLayout pipelineLayout;
+ VkPipeline pipeline;
+};
+
+/* ********************************************************************** */
+
+struct modinfo {
+ const uint32_t *code;
+ size_t codeSize;
+ int nkernels;
+ const struct kerninfo *kernels[];
+};
+
+struct modstate {
+ struct zvk *zvk;
+ const struct modinfo *modinfo;
+
+ VkShaderModule shader;
+};
+
+struct modstate *compute_createModule(struct zvk *zvk, const struct modinfo *mod);
+void compute_destroyModule(struct modstate *mod);
+
+struct kernstate *compute_createKernel(struct modstate *mod, const char *name);
+void compute_destroyKernel(struct kernstate *ks);
+
+VkCommandBuffer compute_createCommand(struct kernstate *ks, uint32_t sizex, uint32_t sizey, uint32_t sizez);
+
+void compute_setArg(struct kernstate *ks, int index, void *data, size_t size);
+
+void compute_setBuffers(struct kernstate *ks, VkDescriptorBufferInfo *buffers[]);
+void compute_setImages(struct kernstate *ks, VkDescriptorImageInfo *images[]);
+
+#endif
--- /dev/null
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "compute.h"
+
+#include "hello-host.h"
+
+void zvkAllocBuffer2(struct zvk *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, struct zvkBuffer *buffer) {
+ zvkAllocBuffer(g, dataSize, usage, properties, &buffer->info.buffer, &buffer->memory);
+ buffer->info.offset = 0;
+ buffer->info.range = dataSize;
+}
+
+void zvkFreeBuffer(struct zvk *zvk, struct zvkBuffer *buffer) {
+ vkFreeMemory(zvk->device, buffer->memory, NULL);
+ vkDestroyBuffer(zvk->device, buffer->info.buffer, NULL);
+}
+
+void zvkSetBuffer(struct zvk *zvk, struct zvkBuffer *buffer, void *data, size_t size) {
+ void *mem;
+
+ vkMapMemory(zvk->device, buffer->memory, 0, buffer->info.range, 0, &mem);
+ memcpy(mem, data, size);
+ vkUnmapMemory(zvk->device, buffer->memory);
+}
+
+int main(int argc, char **argv) {
+ struct zvk *zvk = zvkInit();
+ struct modstate *mod = compute_createModule(zvk, &hello_modinfo);
+ struct kernstate *hello = compute_createKernel(mod, "hello");
+ struct zvkBuffer dstBuffer, srcBuffer;
+
+ zvkAllocBuffer2(zvk, 64, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ &dstBuffer);
+
+ zvkAllocBuffer2(zvk, 13, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ &srcBuffer);
+
+ zvkSetBuffer(zvk, &srcBuffer, "Hello World!\n", 13);
+
+ hello_hello_setArgs(hello, dstBuffer.info, 64, srcBuffer.info, 13);
+
+ VkCommandBuffer cmd = compute_createCommand(hello, 64, 1, 1);
+
+ {
+ VkResult res;
+ VkSubmitInfo submitInfo[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &cmd,
+ },
+ };
+
+ VkFence fence;
+ VkFenceCreateInfo fenceInfo = {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO
+ };
+ ZVK_FATAL(vkCreateFence(zvk->device, &fenceInfo, NULL, &fence));
+
+ ZVK_FATAL(vkQueueSubmit(zvk->computeQueue, ZVK_ARRAY_SIZEOF(submitInfo), submitInfo, fence));
+
+ do {
+ res = vkWaitForFences(zvk->device, 1, &fence, VK_TRUE, 1000000);
+ } while (res == VK_TIMEOUT);
+
+ vkDestroyFence(zvk->device, fence, NULL);
+
+ char *dst;
+ printf("dst:\n");
+ vkMapMemory(zvk->device, dstBuffer.memory, 0, dstBuffer.info.range, 0, (void **)&dst);
+ fwrite(dst, 1, dstBuffer.info.range, stdout);
+ printf("\n");
+ vkUnmapMemory(zvk->device, dstBuffer.memory);
+ }
+
+ hello_hello_setArgs(hello, dstBuffer.info, 64, srcBuffer.info, 6);
+
+
+ {
+ VkResult res;
+ VkSubmitInfo submitInfo[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &cmd,
+ },
+ };
+
+ VkFence fence;
+ VkFenceCreateInfo fenceInfo = {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO
+ };
+ ZVK_FATAL(vkCreateFence(zvk->device, &fenceInfo, NULL, &fence));
+
+ ZVK_FATAL(vkQueueSubmit(zvk->computeQueue, ZVK_ARRAY_SIZEOF(submitInfo), submitInfo, fence));
+
+ do {
+ res = vkWaitForFences(zvk->device, 1, &fence, VK_TRUE, 1000000);
+ } while (res == VK_TIMEOUT);
+
+ vkDestroyFence(zvk->device, fence, NULL);
+
+ char *dst;
+ printf("dst:\n");
+ vkMapMemory(zvk->device, dstBuffer.memory, 0, dstBuffer.info.range, 0, (void **)&dst);
+ fwrite(dst, 1, dstBuffer.info.range, stdout);
+ printf("\n");
+ vkUnmapMemory(zvk->device, dstBuffer.memory);
+ }
+
+
+ vkFreeCommandBuffers(zvk->device, zvk->commandPool, 1, &cmd);
+
+ zvkFreeBuffer(zvk, &srcBuffer);
+ zvkFreeBuffer(zvk, &dstBuffer);
+
+ compute_destroyKernel(hello);
+ compute_destroyModule(mod);
+ zvkDestroy(zvk);
+}
--- /dev/null
+
+kernel void hello(global char *dst, uint dsize, constant char *src, uint ssize) {
+ uint x = get_global_id(0);
+
+ if (x < dsize) {
+ dst[x] = src[x % ssize];
+ }
+}
--- /dev/null
+ /*
+The MIT License (MIT)
+
+Copyright (C) 2017 Eric Arnebäck
+Copyright (C) 2019 Michael Zucchi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+ */
+
+/*
+ Convert a few re-usable parts of compute-only code to a library.
+
+ It removes a little bit of the clutter in later experiments.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "zvk.h"
+
+/**
+ * This finds the memory type index for the memory on a specific device.
+ */
+static int find_memory_type(VkPhysicalDeviceMemoryProperties *memory, uint32_t typeMask, VkMemoryPropertyFlagBits query)
+{
+ for (int i = 0; i < memory->memoryTypeCount; i++) {
+ if (((1 << i) & typeMask) && (memory->memoryTypes[i].propertyFlags & query) == query)
+ return i;
+ }
+ return -1;
+}
+
+/**
+ * Create vulkan instance.
+ *
+ * Compute-only requires no extensions so this is simple.
+ *
+ * To turn on debugging layers:
+ * export VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_standard_validation
+ */
+static void init_instance(struct zvk *g)
+{
+ VkApplicationInfo app = {
+ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pApplicationName = "mandelbrot",
+ .pEngineName = "none",
+ .apiVersion = VK_API_VERSION_1_0,
+ };
+ const char * const extensions[] = {
+ "VK_KHR_get_physical_device_properties2"
+ };
+ VkInstanceCreateInfo info = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ .pApplicationInfo = &app,
+ .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions),
+ .ppEnabledExtensionNames = extensions,
+ };
+
+ ZVK_FATAL(vkCreateInstance(&info, NULL, &g->instance));
+}
+
+/**
+ * This finds a suitable device and queue family.
+ *
+ * In this case it is a device that supports a compute queue. It
+ * preferentially looks for a non-graphics compute queue.
+ *
+ * It could also make use of:
+ * VkPhysicalDeviceProperties props;
+ * vkGetPhysicalDeviceProperties(devs[i], &props);
+ */
+static void init_device(struct zvk *g)
+{
+ uint32_t devcount;
+
+ vkEnumeratePhysicalDevices(g->instance, &devcount, NULL);
+ VkPhysicalDevice devs[devcount];
+ ZVK_FATAL(vkEnumeratePhysicalDevices(g->instance, &devcount, devs));
+
+ int best = 0;
+ int devid = -1;
+ int queueid = -1;
+
+ for (int i = 0; i < devcount; i++) {
+ uint32_t family_count;
+
+ vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, NULL);
+ VkQueueFamilyProperties famprops[family_count];
+ vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, famprops);
+
+ for (uint32_t j = 0; j < family_count; j++) {
+ int score = 0;
+
+ if ((famprops[j].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
+ score += 1;
+ if ((famprops[j].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0)
+ score += 1;
+
+ if (score > best) {
+ score = best;
+ devid = i;
+ queueid = j;
+ }
+ }
+ }
+
+ if (devid == -1) {
+ fprintf(stderr, "Unable to find suitable device\n");
+ exit(1);
+ }
+
+ g->physicalDevice = devs[devid];
+ g->computeDeviceIndex = devid;
+ g->computeQueueIndex = queueid;
+#if 0
+ {
+ VkExtensionProperties prop[200];
+ uint32_t count = 200;
+ vkEnumerateDeviceExtensionProperties(g->physicalDevice, NULL, &count, prop);
+ for (int i=0;i<count;i++) {
+ printf(" %s\n", prop[i].extensionName);
+ }
+ fflush(stdout);
+ }
+ {
+ VkPhysicalDeviceShaderFloat16Int8FeaturesKHR ff = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR,
+ };
+ VkPhysicalDeviceFeatures2 feat = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
+ .pNext = &ff,
+ };
+ vkGetPhysicalDeviceFeatures2(g->physicalDevice, &feat);
+
+ printf("shaderFloat16 = %d\n", ff.shaderFloat16);
+ printf("shaderInt8 = %d\n", ff.shaderInt8);
+ }
+ {
+ VkPhysicalDeviceProperties2 prop = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
+ };
+ vkGetPhysicalDeviceProperties2(g->physicalDevice, &prop);
+
+ }
+#endif
+ float qpri[] = {0.0f};
+ VkDeviceQueueCreateInfo qinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .queueCount = 1,
+ .pQueuePriorities = qpri, // Note: cannot be null
+ .queueFamilyIndex = g->computeQueueIndex,
+ };
+ const char * const extensions[] = {
+ VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
+ "VK_EXT_descriptor_indexing",
+ "VK_KHR_maintenance3",
+ };
+ VkPhysicalDeviceDescriptorIndexingFeaturesEXT di = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT,
+ .descriptorBindingStorageBufferUpdateAfterBind = 1,
+ };
+ VkPhysicalDeviceShaderFloat16Int8FeaturesKHR ff = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR,
+ .pNext = &di,
+ .shaderInt8 = 1,
+ };
+ VkDeviceCreateInfo devinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .pNext = &ff,
+ .queueCreateInfoCount = 1,
+ .pQueueCreateInfos = &qinfo,
+ .enabledExtensionCount = ZVK_ARRAY_SIZEOF(extensions),
+ .ppEnabledExtensionNames = extensions,
+ };
+
+ ZVK_FATAL(vkCreateDevice(g->physicalDevice, &devinfo, NULL, &g->device));
+
+ /* These values are cached for convenience */
+ vkGetPhysicalDeviceMemoryProperties(g->physicalDevice, &g->deviceMemoryProperties);
+ vkGetDeviceQueue(g->device, g->computeQueueIndex, 0, &g->computeQueue);
+}
+
+/**
+ * Create a descriptor pool for later use.
+ */
+static void init_descriptor_pool(struct zvk *g)
+{
+ VkDescriptorPoolSize type_count[] = {
+ {
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 32,
+ }
+ };
+ VkDescriptorPoolCreateInfo descriptor_pool = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
+ | VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT,
+ .maxSets = 1,
+ .poolSizeCount = ZVK_ARRAY_SIZEOF(type_count),
+ .pPoolSizes = type_count,
+ };
+
+ ZVK_FATAL(vkCreateDescriptorPool(g->device, &descriptor_pool, NULL, &g->descriptorPool));
+}
+
+/**
+ * Create a command pool for later use.
+ */
+static void init_command_pool(struct zvk *g)
+{
+ VkCommandPoolCreateInfo poolinfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .queueFamilyIndex = g->computeQueueIndex,
+ };
+
+ ZVK_FATAL(vkCreateCommandPool(g->device, &poolinfo, NULL, &g->commandPool));
+}
+
+struct zvk *zvkInit(void) {
+ struct zvk *state = calloc(1, sizeof(*state));
+
+ init_instance(state);
+ init_device(state);
+ init_descriptor_pool(state);
+ init_command_pool(state);
+
+ return state;
+}
+
+void zvkDestroy(struct zvk *zvk) {
+ vkDestroyCommandPool(zvk->device, zvk->commandPool, NULL);
+ vkDestroyDescriptorPool(zvk->device, zvk->descriptorPool, NULL);
+ vkDestroyDevice(zvk->device, NULL);
+ vkDestroyInstance(zvk->instance, NULL);
+ free(zvk);
+}
+
+/**
+ * Buffers are created in three steps:
+ * 1) create buffer, specifying usage and size
+ * 2) allocate memory based on memory requirements
+ * 3) bind memory
+ *
+ */
+void zvkAllocBuffer(struct zvk *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer *buffer, VkDeviceMemory *memory){
+ VkMemoryRequirements req;
+ VkBufferCreateInfo buf_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .size = dataSize,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE
+ };
+ ZVK_FATAL(vkCreateBuffer(g->device, &buf_info, NULL, buffer));
+
+ vkGetBufferMemoryRequirements(g->device, *buffer, &req);
+
+ VkMemoryAllocateInfo alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .allocationSize = req.size,
+ .memoryTypeIndex = find_memory_type(&g->deviceMemoryProperties, req.memoryTypeBits, properties)
+ };
+ ZVK_FATAL(vkAllocateMemory(g->device, &alloc, NULL, memory));
+
+ ZVK_FATAL(vkBindBufferMemory(g->device, *buffer, *memory, 0));
+}
--- /dev/null
+
+#ifndef ZVK_H
+#define ZVK_H
+
+#include <vulkan/vulkan.h>
+//#include <stdlib.h>
+//#include <stdio.h>
+
+struct zvk {
+ VkInstance instance;
+ VkPhysicalDevice physicalDevice;
+ VkDevice device;
+ VkPhysicalDeviceMemoryProperties deviceMemoryProperties;
+
+ VkDescriptorPool descriptorPool;
+ VkCommandPool commandPool;
+
+ uint32_t computeQueueIndex;
+ uint32_t computeDeviceIndex;
+
+ VkQueue computeQueue;
+};
+
+struct zvkBuffer {
+ VkDescriptorBufferInfo info;
+ VkDeviceMemory memory;
+};
+
+#define ZVK_BUFFER 0
+#define ZVK_POD 1
+#define ZVK_ROIMAGE 2
+#define ZVK_WOIMAGE 3
+#define ZVK_SAMPLER 4
+
+#define ZVK_FATAL(r) do { \
+ VkResult res = (r); \
+ if (res) { \
+ fprintf(stderr, "failed: %s = %d", #r, res); \
+ abort(); \
+ } \
+} while (0)
+
+struct zvk *zvkInit(void);
+void zvkAllocBuffer(struct zvk *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer *buffer, VkDeviceMemory *memory);
+void zvkAllocBuffer2(struct zvk *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, struct zvkBuffer *buffer);
+
+void zvkDestroy(struct zvk *zvk);
+
+
+#define ZVK_ARRAY_SIZEOF(a) (sizeof(a)/sizeof(*a))
+
+#endif