From: Not Zed Date: Sun, 29 Dec 2019 06:03:23 +0000 (+1030) Subject: More complex opencl argument passing. X-Git-Url: https://code.zedzone.au/cvs?a=commitdiff_plain;p=zproto-vulkan More complex opencl argument passing. --- diff --git a/README b/README index ecb8cb9..c6729ef 100644 --- a/README +++ b/README @@ -36,6 +36,10 @@ mandelbrot-cl/ By far most of the execution time is spent on the CPU side, converting the float4 array to uchar3 for output. +opencl-args/ + + How do you pass arguments? + LINKS ----- diff --git a/opencl-args/Makefile b/opencl-args/Makefile new file mode 100644 index 0000000..bece3aa --- /dev/null +++ b/opencl-args/Makefile @@ -0,0 +1,23 @@ + +#.SUFFIXES: + +include ../config.make + +CFLAGS += -Wall -Og -g $(compute_CFLAGS) +LDLIBS += $(compute_LDLIBS) + +all: hello + +hello.o: hello-host.h +hello: hello.o hello-host.o compute.o zvk.o + +%-host.c %-host.h: %.cl.csv %.cl.spv compile-desc + ./compile-desc --prefix $*_ -h $*-host.h -o $*-host.c $*.cl.spv $*.cl.csv + +clean: + rm -f *.cl.csv *.cl.spv *.cl.inc + rm -f *-host.c *-host.h + rm -f *.o + rm -f hello + +.SECONDARY: hello.cl.csv hello.cl.spv diff --git a/opencl-args/compile-desc b/opencl-args/compile-desc new file mode 100755 index 0000000..94bdb4c --- /dev/null +++ b/opencl-args/compile-desc @@ -0,0 +1,352 @@ +#!/usr/bin/perl + +use List::Util qw[min max]; + +# usage: compile-desc [--prefix func-prefix] -o output.c [ -h output.h ] file.spv file.csv + +$prefix = ""; +$outc = ""; +$outh = ""; +@files = (); + +while (@ARGV) { + my $arg = shift @ARGV; + + if ($arg eq "--prefix") { + $prefix = shift @ARGV; + } elsif ($arg eq "-o") { + $outc = shift @ARGV; + } elsif ($arg eq "-h") { + $outh = shift @ARGV; + } else { + push @files, $arg; + } +} + +$spv = $files[0]; +$desc = $files[1]; + +die ("Missing descriptor file") if ($desc eq ""); +die ("Missing output file") if ($outc eq ""); + +# This takes the opencl kernel plus the descriptorsets description and ... does something with it. +#$unit = "arguments"; +#$desc = "arguments.cl.csv"; +#$src = "arguments.cl"; + +%info = ( + "prefix" => $prefix, + ); + +# map for primitive types which need renaming +%type_map = ( + "uint32", "uint32_t", + "int32", "int32_t", + ); + +%bind_map = ( + "pod", "ZVK_POD", + "buffer", "ZVK_BUFFER", + "ro_image", "ZVK_ROIMAGE", + "wo_image", "ZVK_WOIMAGE", + "sampler", "ZVK_SAMPLER", + ); + +%kernels = (); + +# +# kernel{name} = ( +# @bindings = ( +# index = index +# kind = pod | buffer +# sizeof = total byte size for pod +# ) +# @params = ( +# index = index +# name = parameter name +# type = opencl param type +# binding = binding index +# offset = +# size = parameter size for pod +# ) +# ) + +# read description +open IN,"<$desc"; +while () { + chop; + if (m/^kernel,([^,]*),arg,([^,]*),argOrdinal,([^,]*),descriptorSet,([^,]*),binding,([^,]*),offset,([^,]*),argKind,([^,]*)(.*)/) { + my $kname = $1; + my $pname = $2; + my $index = $3; + my $set = $4; + my $bindid = $5; + my $offset = $6; + my $kind = $7; + my $rest = $8; + my %kern = %{$kernels{$kname}}; + my %rec = %{@{$kern{params}}[$index]}; + + die ("cl parser <> descriptor mismatch") if ($rec{name} != $name); + + $rec{name} = $pname; + $rec{index} = $index; + $rec{binding} = $bindid; + $rec{offset} = $offset; + ($rec{size}) = ($rest =~ m/argSize,(\d*)/); + ($rec{type}) = ($rest =~ m/argType,([^,]*)/); + @{$kern{params}}[$index] = \%rec; + + my %bind = %{@{$kern{bindings}}[$bindid]}; + + $bind{index} = $bindid; + $bind{kind} = $kind; + if (defined $rec{size}) { + $bind{sizeof} = max($offset+$rec{size}, $bind{sizeof}); + } + + @{$kern{bindings}}[$bindid] = \%bind; + + $kern{name} = $kname; + + $kernels{$kname} = \%kern; + } +} +close IN; + +# get kernel arg types, unfortunately not available in the descriptor file +# requires strict source format: +# kernel void\n +# func(args,...)\n +if (0) { +open IN,"<$src"; +$state = 0; +while () { + if ($state == 0) { + if (m/^kernel void/) { + $state = 1; + } + } else { + # this only includes kernels which are already defined + if (m/([^\(]*)\((.*)\)/) { + my $kname = $1; + my @args = $2 =~ m/\s*([^,]+)\s*,?/g; + my %kern = %{$kernels{$kname}}; + my $i = 0; + + for $arg (@args) { + my ($type, $name) = $arg =~ m/^(.*?) ?(\w*)$/; + my %rec = %{$kern{params}[$i]}; + + print STDERR "arg: $type <- $name\n"; + + $rec{type} = $type; + + $kern{params}[$i] = \%rec; + + $i++; + } + } + $state = 0; + } +} +close IN; +} + +open C,">$outc" || die("opening $outc"); + +print C < +#include +#include +#include "compute.h" +END + +if ($outh ne "") { + open H,">$outh" || die("open $outh"); +} + +# Data tables - kernels +for $kname (sort keys %kernels) { + my %kern = %{$kernels{$kname}}; + my @bindings = @{$kern{bindings}}; + my @params = @{$kern{params}}; + + print C "static const struct bindinfo $info{prefix}$kern{name}_bindings[] = {\n"; + for $binds (@bindings) { + my %bind = %{$binds}; + + print C " { $bind_map{$bind{kind}}, $bind{sizeof} },\n"; + } + print C "};\n"; + + print C "static const struct paraminfo $info{prefix}$kern{name}_params[] = {\n"; + for $recs (@params) { + my %rec = %{$recs}; + + print C " { $rec{binding}, $rec{offset}, $rec{size} },\n"; + } + print C "};\n"; + + my $nbindings = $#bindings + 1; + my $nparams = $#params + 1; + + print C <mod->zvk;\n"; + + if (@buffers) { + print C " /* update buffer pointers */\n"; + print C " VkDescriptorBufferInfo *bufferSet[] = {\n"; + for $recs (@buffers) { + my %rec = %{$recs}; + print C " &$rec{name},\n"; + } + print C " };\n\n"; + print C " compute_setBuffers(state, bufferSet);\n"; + } + + if (@images) { + print C " /* update image pointers */\n"; + print C " VkDescriptorImageInfo *imageSet[] = {\n"; + for $recs (@images) { + my %rec = %{$recs}; + print C " &$rec{name},\n"; + } + print C " };\n"; + print C " compute_setImages(state, imageSet);\n"; + } + + if (@pods) { + my @bindpods = grep { %{$_}{kind} eq "pod" } @bindings; + my $podSize = %{$bindpods[0]}{sizeof}; + + print C " /* update pod memory */\n"; + print C " void *pod __attribute__ ((aligned(16)));\n"; + print C " ZVK_FATAL(vkMapMemory(zvk->device, state->podMemory, 0, $podSize, 0, &pod));\n"; + + for $recs (@pods) { + my %rec = %{$recs}; + + print C " memcpy(pod + $rec{offset}, &$rec{name}, $rec{size});\n"; + } + + print C " vkUnmapMemory(zvk->device, state->podMemory);\n"; + } + + print C "}\n\n"; +} + +if (H) { + close H; +} +close C; + diff --git a/opencl-args/compute.c b/opencl-args/compute.c new file mode 100644 index 0000000..61af5ee --- /dev/null +++ b/opencl-args/compute.c @@ -0,0 +1,314 @@ + +#include +#include +#include + +#include "compute.h" + +/* ********************************************************************** */ + +static int mapDescriptorType(int binding) { + switch (binding) { + case ZVK_BUFFER: + case ZVK_POD: + return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + case ZVK_ROIMAGE: + return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + case ZVK_WOIMAGE: + return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + case ZVK_SAMPLER: + return VK_DESCRIPTOR_TYPE_SAMPLER; + } + return -1; +} + +struct modstate *compute_createModule(struct zvk *zvk, const struct modinfo *mod) { + struct modstate *ms = calloc(1, sizeof(*ms)); + + ms->zvk = zvk; + ms->modinfo = mod; + + VkShaderModuleCreateInfo createInfo = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .codeSize = mod->codeSize, + .pCode = mod->code, + }; + + ZVK_FATAL(vkCreateShaderModule(zvk->device, &createInfo, NULL, &ms->shader)); + + return ms; +} + +void compute_destroyModule(struct modstate *mod) { + vkDestroyShaderModule(mod->zvk->device, mod->shader, NULL); + free(mod); +} + +static int kernelIndex(const struct modinfo *mi, const char * name) { + for (int i=0;inkernels;i++) { + if (strcmp(mi->kernels[i]->name, name) == 0) + return i; + } + return -1; +} + +struct kernstate *compute_createKernel(struct modstate *mod, const char *name) { + int ki = kernelIndex(mod->modinfo, name); + + if (ki < 0) + return NULL; + + struct zvk *zvk = mod->zvk; + struct kernstate *ks = calloc(1, sizeof(*ks)); + const struct kerninfo *kern = mod->modinfo->kernels[ki]; + + ks->mod = mod; + ks->kern = kern; + + /* allocate data bindings */ + VkDescriptorSetLayoutBinding layout_bindings[kern->nbindings]; + VkDescriptorBindingFlagBitsEXT layout_bindings_flags[kern->nbindings]; + + for (int i=0;inbindings;i++) { + memset(&layout_bindings[i], 0, sizeof(layout_bindings[0])); + layout_bindings_flags[i] = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT; + layout_bindings[i].binding = i; + layout_bindings[i].descriptorType = mapDescriptorType(kern->bindings[i].type); + layout_bindings[i].descriptorCount = 1; + layout_bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + } + VkDescriptorSetLayoutBindingFlagsCreateInfoEXT flags = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT, + .bindingCount = kern->nbindings, + .pBindingFlags = layout_bindings_flags, + }; + VkDescriptorSetLayoutCreateInfo descriptor_layout = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = &flags, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT, + .bindingCount = kern->nbindings, + .pBindings = layout_bindings, + }; + + ZVK_FATAL(vkCreateDescriptorSetLayout(zvk->device, &descriptor_layout, NULL, &ks->descriptorSetLayout)); + + VkDescriptorSetAllocateInfo alloc_info[] = { + { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = zvk->descriptorPool, + .descriptorSetCount = 1, + .pSetLayouts = &ks->descriptorSetLayout, + }, + }; + + ZVK_FATAL(vkAllocateDescriptorSets(zvk->device, alloc_info, ks->descriptorSets)); + + /* Check for the (1) 'pod' binding, allocate memory for it */ + for (int i=0;inbindings;i++) { + if (kern->bindings[i].type == ZVK_POD) { + zvkAllocBuffer(zvk, kern->bindings[i].size, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &ks->podBuffer, &ks->podMemory); + + VkDescriptorBufferInfo bufferInfo = { + .buffer = ks->podBuffer, + .offset = 0, + .range = kern->bindings[i].size, + }; + VkWriteDescriptorSet writeSet = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = ks->descriptorSets[0], + .dstBinding = i, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &bufferInfo, + }; + vkUpdateDescriptorSets(zvk->device, 1, &writeSet, 0, NULL); + break; + } + } + + /* Create pipeline */ + VkPipelineLayoutCreateInfo pipelineinfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &ks->descriptorSetLayout, + }; + + ZVK_FATAL(vkCreatePipelineLayout(zvk->device, &pipelineinfo, NULL, &ks->pipelineLayout)); + + VkComputePipelineCreateInfo pipeline = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = mod->shader, + .pName = kern->name, + }, + .layout = ks->pipelineLayout + + }; + ZVK_FATAL(vkCreateComputePipelines(zvk->device, NULL, 1, &pipeline, NULL, &ks->pipeline)); + + return ks; +} + +void compute_destroyKernel(struct kernstate *ks) { + struct zvk *zvk = ks->mod->zvk; + + if (ks->podMemory) { + vkFreeMemory(zvk->device, ks->podMemory, NULL); + vkDestroyBuffer(zvk->device, ks->podBuffer, NULL); + } + + vkDestroyPipeline(zvk->device, ks->pipeline, NULL); + vkDestroyPipelineLayout(zvk->device, ks->pipelineLayout, NULL); + + vkFreeDescriptorSets(zvk->device, zvk->descriptorPool, 1, ks->descriptorSets); + vkDestroyDescriptorSetLayout(zvk->device, ks->descriptorSetLayout, NULL); + + free(ks); +} + +// one time optional? +VkCommandBuffer compute_createCommand(struct kernstate *ks, uint32_t sizex, uint32_t sizey, uint32_t sizez) { + struct zvk *zvk = ks->mod->zvk; + VkCommandBuffer commandBuffers[1]; + + /* Create a command buffer to run this kernel with it's data set for the given size */ + VkCommandBufferAllocateInfo cmdinfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = zvk->commandPool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }; + + ZVK_FATAL(vkAllocateCommandBuffers(zvk->device, &cmdinfo, commandBuffers)); + + VkCommandBufferBeginInfo beginInfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + //.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .flags = 0, + }; + ZVK_FATAL(vkBeginCommandBuffer(commandBuffers[0], &beginInfo)); + + vkCmdBindPipeline(commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, ks->pipeline); + vkCmdBindDescriptorSets(commandBuffers[0], VK_PIPELINE_BIND_POINT_COMPUTE, ks->pipelineLayout, 0, 1, ks->descriptorSets, 0, NULL); + + vkCmdDispatch(commandBuffers[0], sizex, sizey, sizez); + + ZVK_FATAL(vkEndCommandBuffer(commandBuffers[0])); + + return commandBuffers[0]; +} + +// rather inefficient way to set arguments one at a time +void compute_setArg(struct kernstate *ks, int index, void *data, size_t size) { + struct zvk *zvk = ks->mod->zvk; + const struct kerninfo *kern = ks->kern; + const struct paraminfo *pi = &kern->params[index]; + const struct bindinfo *bi = &kern->bindings[pi->binding]; + + switch (bi->type) { + case ZVK_POD: { + void *pod __attribute__ ((aligned(16))); + + ZVK_FATAL(!(size == pi->size)); + ZVK_FATAL(vkMapMemory(zvk->device, ks->podMemory, 0, VK_WHOLE_SIZE, 0, &pod)); + memcpy(pod + pi->offset, data, size); + vkUnmapMemory(zvk->device, ks->podMemory); + break; + } + case ZVK_BUFFER: + ZVK_FATAL(!(size == sizeof(VkDescriptorBufferInfo))); + VkWriteDescriptorSet bufferSet = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = ks->descriptorSets[0], + .dstBinding = pi->binding, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = data + }; + vkUpdateDescriptorSets(zvk->device, 1, &bufferSet, 0, NULL); + break; + case ZVK_WOIMAGE: + ZVK_FATAL(!(size == sizeof(VkDescriptorImageInfo))); + VkWriteDescriptorSet woSet = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = ks->descriptorSets[0], + .dstBinding = pi->binding, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = data + }; + vkUpdateDescriptorSets(zvk->device, 1, &woSet, 0, NULL); + break; + case ZVK_ROIMAGE: + ZVK_FATAL(!(size == sizeof(VkDescriptorImageInfo))); + VkWriteDescriptorSet roSet = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = ks->descriptorSets[0], + .dstBinding = pi->binding, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = data + }; + vkUpdateDescriptorSets(zvk->device, 1, &roSet, 0, NULL); + break; + // shader? + } +} + +void compute_setBuffers(struct kernstate *ks, VkDescriptorBufferInfo *buffers[]) { + VkWriteDescriptorSet writeSet = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = ks->descriptorSets[0], + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + }; + const struct kerninfo *kern = ks->kern; + struct zvk *zvk = ks->mod->zvk; + + for (int i=0,bindex=0;inparams;i++) { + const struct paraminfo *pi = &kern->params[i]; + const struct bindinfo *bi = &kern->bindings[pi->binding]; + + if (bi->type == ZVK_BUFFER) { + writeSet.pBufferInfo = buffers[bindex]; + writeSet.dstBinding = pi->binding; + + vkUpdateDescriptorSets(zvk->device, 1, &writeSet, 0, NULL); + bindex++; + } + } +} + +void compute_setImages(struct kernstate *ks, VkDescriptorImageInfo *images[]) { + VkWriteDescriptorSet writeSet = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = ks->descriptorSets[0], + .descriptorCount = 1, + }; + const struct kerninfo *kern = ks->kern; + struct zvk *zvk = ks->mod->zvk; + + for (int i=0,bindex=0;inparams;i++) { + const struct paraminfo *pi = &kern->params[i]; + const struct bindinfo *bi = &kern->bindings[pi->binding]; + + if (bi->type == ZVK_WOIMAGE) { + writeSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + } else if (bi->type == ZVK_ROIMAGE) { + writeSet.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + } else + continue; + + writeSet.pImageInfo = images[bindex]; + writeSet.dstBinding = pi->binding; + + vkUpdateDescriptorSets(zvk->device, 1, &writeSet, 0, NULL); + bindex++; + } +} diff --git a/opencl-args/compute.h b/opencl-args/compute.h new file mode 100644 index 0000000..203e34f --- /dev/null +++ b/opencl-args/compute.h @@ -0,0 +1,71 @@ + +#ifndef COMPUTE_H +#define COMPUTE_H + +#include "zvk.h" + +/* ********************************************************************** */ + +struct paraminfo { + int binding; + int offset; + int size; +}; + +struct bindinfo { + int type; + int size; +}; + +struct kerninfo { + const char *name; + int nbindings; + int nparams; + const struct bindinfo *bindings; + const struct paraminfo *params; +}; + +struct kernstate { + struct modstate *mod; + const struct kerninfo *kern; + + VkDeviceMemory podMemory; + VkBuffer podBuffer; + + VkDescriptorSetLayout descriptorSetLayout; + VkDescriptorSet descriptorSets[1]; + + VkPipelineLayout pipelineLayout; + VkPipeline pipeline; +}; + +/* ********************************************************************** */ + +struct modinfo { + const uint32_t *code; + size_t codeSize; + int nkernels; + const struct kerninfo *kernels[]; +}; + +struct modstate { + struct zvk *zvk; + const struct modinfo *modinfo; + + VkShaderModule shader; +}; + +struct modstate *compute_createModule(struct zvk *zvk, const struct modinfo *mod); +void compute_destroyModule(struct modstate *mod); + +struct kernstate *compute_createKernel(struct modstate *mod, const char *name); +void compute_destroyKernel(struct kernstate *ks); + +VkCommandBuffer compute_createCommand(struct kernstate *ks, uint32_t sizex, uint32_t sizey, uint32_t sizez); + +void compute_setArg(struct kernstate *ks, int index, void *data, size_t size); + +void compute_setBuffers(struct kernstate *ks, VkDescriptorBufferInfo *buffers[]); +void compute_setImages(struct kernstate *ks, VkDescriptorImageInfo *images[]); + +#endif diff --git a/opencl-args/hello.c b/opencl-args/hello.c new file mode 100644 index 0000000..0c5a723 --- /dev/null +++ b/opencl-args/hello.c @@ -0,0 +1,125 @@ + +#include +#include +#include + +#include "compute.h" + +#include "hello-host.h" + +void zvkAllocBuffer2(struct zvk *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, struct zvkBuffer *buffer) { + zvkAllocBuffer(g, dataSize, usage, properties, &buffer->info.buffer, &buffer->memory); + buffer->info.offset = 0; + buffer->info.range = dataSize; +} + +void zvkFreeBuffer(struct zvk *zvk, struct zvkBuffer *buffer) { + vkFreeMemory(zvk->device, buffer->memory, NULL); + vkDestroyBuffer(zvk->device, buffer->info.buffer, NULL); +} + +void zvkSetBuffer(struct zvk *zvk, struct zvkBuffer *buffer, void *data, size_t size) { + void *mem; + + vkMapMemory(zvk->device, buffer->memory, 0, buffer->info.range, 0, &mem); + memcpy(mem, data, size); + vkUnmapMemory(zvk->device, buffer->memory); +} + +int main(int argc, char **argv) { + struct zvk *zvk = zvkInit(); + struct modstate *mod = compute_createModule(zvk, &hello_modinfo); + struct kernstate *hello = compute_createKernel(mod, "hello"); + struct zvkBuffer dstBuffer, srcBuffer; + + zvkAllocBuffer2(zvk, 64, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &dstBuffer); + + zvkAllocBuffer2(zvk, 13, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &srcBuffer); + + zvkSetBuffer(zvk, &srcBuffer, "Hello World!\n", 13); + + hello_hello_setArgs(hello, dstBuffer.info, 64, srcBuffer.info, 13); + + VkCommandBuffer cmd = compute_createCommand(hello, 64, 1, 1); + + { + VkResult res; + VkSubmitInfo submitInfo[] = { + { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &cmd, + }, + }; + + VkFence fence; + VkFenceCreateInfo fenceInfo = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO + }; + ZVK_FATAL(vkCreateFence(zvk->device, &fenceInfo, NULL, &fence)); + + ZVK_FATAL(vkQueueSubmit(zvk->computeQueue, ZVK_ARRAY_SIZEOF(submitInfo), submitInfo, fence)); + + do { + res = vkWaitForFences(zvk->device, 1, &fence, VK_TRUE, 1000000); + } while (res == VK_TIMEOUT); + + vkDestroyFence(zvk->device, fence, NULL); + + char *dst; + printf("dst:\n"); + vkMapMemory(zvk->device, dstBuffer.memory, 0, dstBuffer.info.range, 0, (void **)&dst); + fwrite(dst, 1, dstBuffer.info.range, stdout); + printf("\n"); + vkUnmapMemory(zvk->device, dstBuffer.memory); + } + + hello_hello_setArgs(hello, dstBuffer.info, 64, srcBuffer.info, 6); + + + { + VkResult res; + VkSubmitInfo submitInfo[] = { + { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &cmd, + }, + }; + + VkFence fence; + VkFenceCreateInfo fenceInfo = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO + }; + ZVK_FATAL(vkCreateFence(zvk->device, &fenceInfo, NULL, &fence)); + + ZVK_FATAL(vkQueueSubmit(zvk->computeQueue, ZVK_ARRAY_SIZEOF(submitInfo), submitInfo, fence)); + + do { + res = vkWaitForFences(zvk->device, 1, &fence, VK_TRUE, 1000000); + } while (res == VK_TIMEOUT); + + vkDestroyFence(zvk->device, fence, NULL); + + char *dst; + printf("dst:\n"); + vkMapMemory(zvk->device, dstBuffer.memory, 0, dstBuffer.info.range, 0, (void **)&dst); + fwrite(dst, 1, dstBuffer.info.range, stdout); + printf("\n"); + vkUnmapMemory(zvk->device, dstBuffer.memory); + } + + + vkFreeCommandBuffers(zvk->device, zvk->commandPool, 1, &cmd); + + zvkFreeBuffer(zvk, &srcBuffer); + zvkFreeBuffer(zvk, &dstBuffer); + + compute_destroyKernel(hello); + compute_destroyModule(mod); + zvkDestroy(zvk); +} diff --git a/opencl-args/hello.cl b/opencl-args/hello.cl new file mode 100644 index 0000000..b04f099 --- /dev/null +++ b/opencl-args/hello.cl @@ -0,0 +1,8 @@ + +kernel void hello(global char *dst, uint dsize, constant char *src, uint ssize) { + uint x = get_global_id(0); + + if (x < dsize) { + dst[x] = src[x % ssize]; + } +} diff --git a/opencl-args/zvk.c b/opencl-args/zvk.c new file mode 100644 index 0000000..04977ce --- /dev/null +++ b/opencl-args/zvk.c @@ -0,0 +1,282 @@ + /* +The MIT License (MIT) + +Copyright (C) 2017 Eric Arnebäck +Copyright (C) 2019 Michael Zucchi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + + */ + +/* + Convert a few re-usable parts of compute-only code to a library. + + It removes a little bit of the clutter in later experiments. + */ + +#include +#include + +#include "zvk.h" + +/** + * This finds the memory type index for the memory on a specific device. + */ +static int find_memory_type(VkPhysicalDeviceMemoryProperties *memory, uint32_t typeMask, VkMemoryPropertyFlagBits query) +{ + for (int i = 0; i < memory->memoryTypeCount; i++) { + if (((1 << i) & typeMask) && (memory->memoryTypes[i].propertyFlags & query) == query) + return i; + } + return -1; +} + +/** + * Create vulkan instance. + * + * Compute-only requires no extensions so this is simple. + * + * To turn on debugging layers: + * export VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_standard_validation + */ +static void init_instance(struct zvk *g) +{ + VkApplicationInfo app = { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pApplicationName = "mandelbrot", + .pEngineName = "none", + .apiVersion = VK_API_VERSION_1_0, + }; + const char * const extensions[] = { + "VK_KHR_get_physical_device_properties2" + }; + VkInstanceCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pApplicationInfo = &app, + .enabledExtensionCount = sizeof(extensions) / sizeof(*extensions), + .ppEnabledExtensionNames = extensions, + }; + + ZVK_FATAL(vkCreateInstance(&info, NULL, &g->instance)); +} + +/** + * This finds a suitable device and queue family. + * + * In this case it is a device that supports a compute queue. It + * preferentially looks for a non-graphics compute queue. + * + * It could also make use of: + * VkPhysicalDeviceProperties props; + * vkGetPhysicalDeviceProperties(devs[i], &props); + */ +static void init_device(struct zvk *g) +{ + uint32_t devcount; + + vkEnumeratePhysicalDevices(g->instance, &devcount, NULL); + VkPhysicalDevice devs[devcount]; + ZVK_FATAL(vkEnumeratePhysicalDevices(g->instance, &devcount, devs)); + + int best = 0; + int devid = -1; + int queueid = -1; + + for (int i = 0; i < devcount; i++) { + uint32_t family_count; + + vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, NULL); + VkQueueFamilyProperties famprops[family_count]; + vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &family_count, famprops); + + for (uint32_t j = 0; j < family_count; j++) { + int score = 0; + + if ((famprops[j].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0) + score += 1; + if ((famprops[j].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0) + score += 1; + + if (score > best) { + score = best; + devid = i; + queueid = j; + } + } + } + + if (devid == -1) { + fprintf(stderr, "Unable to find suitable device\n"); + exit(1); + } + + g->physicalDevice = devs[devid]; + g->computeDeviceIndex = devid; + g->computeQueueIndex = queueid; +#if 0 + { + VkExtensionProperties prop[200]; + uint32_t count = 200; + vkEnumerateDeviceExtensionProperties(g->physicalDevice, NULL, &count, prop); + for (int i=0;iphysicalDevice, &feat); + + printf("shaderFloat16 = %d\n", ff.shaderFloat16); + printf("shaderInt8 = %d\n", ff.shaderInt8); + } + { + VkPhysicalDeviceProperties2 prop = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + }; + vkGetPhysicalDeviceProperties2(g->physicalDevice, &prop); + + } +#endif + float qpri[] = {0.0f}; + VkDeviceQueueCreateInfo qinfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .queueCount = 1, + .pQueuePriorities = qpri, // Note: cannot be null + .queueFamilyIndex = g->computeQueueIndex, + }; + const char * const extensions[] = { + VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, + "VK_EXT_descriptor_indexing", + "VK_KHR_maintenance3", + }; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT di = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, + .descriptorBindingStorageBufferUpdateAfterBind = 1, + }; + VkPhysicalDeviceShaderFloat16Int8FeaturesKHR ff = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR, + .pNext = &di, + .shaderInt8 = 1, + }; + VkDeviceCreateInfo devinfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pNext = &ff, + .queueCreateInfoCount = 1, + .pQueueCreateInfos = &qinfo, + .enabledExtensionCount = ZVK_ARRAY_SIZEOF(extensions), + .ppEnabledExtensionNames = extensions, + }; + + ZVK_FATAL(vkCreateDevice(g->physicalDevice, &devinfo, NULL, &g->device)); + + /* These values are cached for convenience */ + vkGetPhysicalDeviceMemoryProperties(g->physicalDevice, &g->deviceMemoryProperties); + vkGetDeviceQueue(g->device, g->computeQueueIndex, 0, &g->computeQueue); +} + +/** + * Create a descriptor pool for later use. + */ +static void init_descriptor_pool(struct zvk *g) +{ + VkDescriptorPoolSize type_count[] = { + { + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 32, + } + }; + VkDescriptorPoolCreateInfo descriptor_pool = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT + | VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT, + .maxSets = 1, + .poolSizeCount = ZVK_ARRAY_SIZEOF(type_count), + .pPoolSizes = type_count, + }; + + ZVK_FATAL(vkCreateDescriptorPool(g->device, &descriptor_pool, NULL, &g->descriptorPool)); +} + +/** + * Create a command pool for later use. + */ +static void init_command_pool(struct zvk *g) +{ + VkCommandPoolCreateInfo poolinfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .queueFamilyIndex = g->computeQueueIndex, + }; + + ZVK_FATAL(vkCreateCommandPool(g->device, &poolinfo, NULL, &g->commandPool)); +} + +struct zvk *zvkInit(void) { + struct zvk *state = calloc(1, sizeof(*state)); + + init_instance(state); + init_device(state); + init_descriptor_pool(state); + init_command_pool(state); + + return state; +} + +void zvkDestroy(struct zvk *zvk) { + vkDestroyCommandPool(zvk->device, zvk->commandPool, NULL); + vkDestroyDescriptorPool(zvk->device, zvk->descriptorPool, NULL); + vkDestroyDevice(zvk->device, NULL); + vkDestroyInstance(zvk->instance, NULL); + free(zvk); +} + +/** + * Buffers are created in three steps: + * 1) create buffer, specifying usage and size + * 2) allocate memory based on memory requirements + * 3) bind memory + * + */ +void zvkAllocBuffer(struct zvk *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer *buffer, VkDeviceMemory *memory){ + VkMemoryRequirements req; + VkBufferCreateInfo buf_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = dataSize, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE + }; + ZVK_FATAL(vkCreateBuffer(g->device, &buf_info, NULL, buffer)); + + vkGetBufferMemoryRequirements(g->device, *buffer, &req); + + VkMemoryAllocateInfo alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = req.size, + .memoryTypeIndex = find_memory_type(&g->deviceMemoryProperties, req.memoryTypeBits, properties) + }; + ZVK_FATAL(vkAllocateMemory(g->device, &alloc, NULL, memory)); + + ZVK_FATAL(vkBindBufferMemory(g->device, *buffer, *memory, 0)); +} diff --git a/opencl-args/zvk.h b/opencl-args/zvk.h new file mode 100644 index 0000000..70afd44 --- /dev/null +++ b/opencl-args/zvk.h @@ -0,0 +1,52 @@ + +#ifndef ZVK_H +#define ZVK_H + +#include +//#include +//#include + +struct zvk { + VkInstance instance; + VkPhysicalDevice physicalDevice; + VkDevice device; + VkPhysicalDeviceMemoryProperties deviceMemoryProperties; + + VkDescriptorPool descriptorPool; + VkCommandPool commandPool; + + uint32_t computeQueueIndex; + uint32_t computeDeviceIndex; + + VkQueue computeQueue; +}; + +struct zvkBuffer { + VkDescriptorBufferInfo info; + VkDeviceMemory memory; +}; + +#define ZVK_BUFFER 0 +#define ZVK_POD 1 +#define ZVK_ROIMAGE 2 +#define ZVK_WOIMAGE 3 +#define ZVK_SAMPLER 4 + +#define ZVK_FATAL(r) do { \ + VkResult res = (r); \ + if (res) { \ + fprintf(stderr, "failed: %s = %d", #r, res); \ + abort(); \ + } \ +} while (0) + +struct zvk *zvkInit(void); +void zvkAllocBuffer(struct zvk *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer *buffer, VkDeviceMemory *memory); +void zvkAllocBuffer2(struct zvk *g, VkDeviceSize dataSize, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, struct zvkBuffer *buffer); + +void zvkDestroy(struct zvk *zvk); + + +#define ZVK_ARRAY_SIZEOF(a) (sizeof(a)/sizeof(*a)) + +#endif