本部分主要介紹:將計算着色器與單獨的計算隊列一起使用,以將不同的卷積內核(和效果)實時應用於輸入圖像。
一、卷積
卷積在信號處理領域有極其廣泛的應用, 也有嚴格的物理和數學定義. 本文只討論卷積在數字圖像處理中的應用.
在數字圖像處理中, 有一種基本的處理方法:線性濾波. 待處理的平面數字圖像可被看做一個大矩陣, 圖像的每個像素對應着矩陣的每個元素, 假設我們平面的分辨率是 1024*768, 那麼對應的大矩陣的行數= 1024, 列數=768.
用於濾波的是一個濾波器小矩陣(也叫卷積核), 濾波器小矩陣一般是個方陣, 也就是 行數 和 列數 相同, 比如常見的用於邊緣檢測的 Sobel 算子 就是兩個 3*3 的小矩陣.
進行濾波就是對於大矩陣中的每個像素, 計算它周圍像素和濾波器矩陣對應位置元素的乘積, 然後把結果相加到一起, 最終得到的值就作爲該像素的新值, 這樣就完成了一次濾波.
上面的處理過程可以參考這個示意圖:
可具體學習此網站學習常用卷積對圖形的處理。
二、實現
2.1 常規創建
首先,我們需要定義圖形和計算管線的數據結構體:
// 圖形部分的資源
struct {
VkDescriptorSetLayout descriptorSetLayout; // 圖像顯示着色器綁定佈局
VkDescriptorSet descriptorSetPreCompute; // 圖像顯示着色器綁定之前,計算着色器圖像操作
VkDescriptorSet descriptorSetPostCompute; // 圖像顯示着色器綁定後,計算着色器圖像操作
VkPipeline pipeline; // 圖像顯示管道
VkPipelineLayout pipelineLayout; // 圖形管線的佈局
} graphics;
// 計算部分的資源
struct Compute {
VkQueue queue; // 用於計算命令的獨立隊列(隊列族可能不同於用於圖形的隊列)
VkCommandPool commandPool; // 使用單獨的命令池(隊列族可能不同於用於圖形的命令池)
VkCommandBuffer commandBuffer; // 存儲調度命令和屏障的命令緩衝區
VkFence fence; // 同步圍欄,以避免重寫計算CB如果仍在使用
VkDescriptorSetLayout descriptorSetLayout; // 計算着色綁定佈局
VkDescriptorSet descriptorSet; // 計算着色器綁定
VkPipelineLayout pipelineLayout; // 計算管道的佈局
std::vector<VkPipeline> pipelines; // 爲圖像過濾器計算管道
int32_t pipelineIndex = 0; // 當前圖像濾波計算流水線索引
uint32_t queueFamilyIndex; // 圖形隊列的族索引,用於屏障
} compute;
之後,我們需要創建一個函數prepareTextureTarget來準備一個用於存儲計算着色器計算的紋理目標:
void prepareTextureTarget(vks::Texture *tex, uint32_t width, uint32_t height, VkFormat format)
{
VkFormatProperties formatProperties;
// 獲取請求的紋理格式的設備屬性
vkGetPhysicalDeviceFormatProperties(physicalDevice, format, &formatProperties);
// 檢查所請求的圖像格式是否支持圖像存儲操作
assert(formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT);
// 準備目標紋理寬高
tex->width = width;
tex->height = height;
VkImageCreateInfo imageCreateInfo = vks::initializers::imageCreateInfo();
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
imageCreateInfo.format = format;
imageCreateInfo.extent = { width, height, 1 };
imageCreateInfo.mipLevels = 1;
imageCreateInfo.arrayLayers = 1;
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
// 圖像將在片段着色器中採樣,並在計算着色器中用作存儲目標
imageCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
imageCreateInfo.flags = 0;
// 共享模式獨佔意味着不需要在計算隊列和圖形隊列之間顯式地轉移映像的所有權
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VkMemoryAllocateInfo memAllocInfo = vks::initializers::memoryAllocateInfo();
VkMemoryRequirements memReqs;
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, nullptr, &tex->image));
vkGetImageMemoryRequirements(device, tex->image, &memReqs);
memAllocInfo.allocationSize = memReqs.size;
memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
VK_CHECK_RESULT(vkAllocateMemory(device, &memAllocInfo, nullptr, &tex->deviceMemory));
VK_CHECK_RESULT(vkBindImageMemory(device, tex->image, tex->deviceMemory, 0));
VkCommandBuffer layoutCmd = vulkanDevice->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);
tex->imageLayout = VK_IMAGE_LAYOUT_GENERAL;
vks::tools::setImageLayout(
layoutCmd, tex->image,
VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_UNDEFINED,
tex->imageLayout);
vulkanDevice->flushCommandBuffer(layoutCmd, queue, true);
// 創建取樣器
VkSamplerCreateInfo sampler = vks::initializers::samplerCreateInfo();
sampler.magFilter = VK_FILTER_LINEAR;
sampler.minFilter = VK_FILTER_LINEAR;
sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
sampler.addressModeV = sampler.addressModeU;
sampler.addressModeW = sampler.addressModeU;
sampler.mipLodBias = 0.0f;
sampler.maxAnisotropy = 1.0f;
sampler.compareOp = VK_COMPARE_OP_NEVER;
sampler.minLod = 0.0f;
sampler.maxLod = tex->mipLevels;
sampler.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
VK_CHECK_RESULT(vkCreateSampler(device, &sampler, nullptr, &tex->sampler));
// 創建圖像視圖
VkImageViewCreateInfo view = vks::initializers::imageViewCreateInfo();
view.image = VK_NULL_HANDLE;
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
view.format = format;
view.components = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
view.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
view.image = tex->image;
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &tex->view));
// 初始化描述符供以後使用
tex->descriptor.imageLayout = tex->imageLayout;
tex->descriptor.imageView = tex->view;
tex->descriptor.sampler = tex->sampler;
tex->device = vulkanDevice;
}
接下來,我們正常創建一個圖形管線來渲染一個正方形即可。其中用於顯示的頂點及片元着色器很簡單,僅是用到了一個紋理貼圖如下:
頂點着色器:
#version 450
layout (location = 0) in vec3 inPos;
layout (location = 1) in vec2 inUV;
layout (binding = 0) uniform UBO
{
mat4 projection;
mat4 model;
} ubo;
layout (location = 0) out vec2 outUV;
out gl_PerVertex
{
vec4 gl_Position;
};
void main()
{
outUV = inUV;
gl_Position = ubo.projection * ubo.model * vec4(inPos.xyz, 1.0);
}
片元着色器:
#version 450
layout (binding = 1) uniform sampler2D samplerColor;
layout (location = 0) in vec2 inUV;
layout (location = 0) out vec4 outFragColor;
void main()
{
outFragColor = texture(samplerColor, inUV);
}
其中,應注意在創建描述符池的時候,我們應加入一個用於計算管道使用存儲映像進行映像讀寫的描述符池:
void setupDescriptorPool()
{
std::vector<VkDescriptorPoolSize> poolSizes = {
// 圖形管線統一緩衝區
vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2),
// 圖形管線圖像採樣器顯示計算輸出圖像
vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 2),
// 計算管道使用存儲映像進行映像讀寫
vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2),
};
VkDescriptorPoolCreateInfo descriptorPoolInfo = vks::initializers::descriptorPoolCreateInfo(poolSizes, 3);
VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolInfo, nullptr, &descriptorPool));
}
在創建描述符集的時候我們也要對應的使用vkAllocateDescriptorSets函數分配輸入圖像(計算後處理前)和最終圖像(經過計算着色處理後)兩個描述符集對象並更新。
2.2 計算着色器與單獨的計算隊列
最後重點來了!!!!創建計算着色器與單獨的計算隊列。
首先,我們需要查找並創建一個可計算的設備隊列:
// 查找並創建一個可計算的設備隊列
void getComputeQueue()
{
uint32_t queueFamilyCount;
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, NULL);
assert(queueFamilyCount >= 1);
std::vector<VkQueueFamilyProperties> queueFamilyProperties;
queueFamilyProperties.resize(queueFamilyCount);
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueFamilyProperties.data());
//一些設備有專門的計算隊列,因此我們首先嚐試找到一個支持計算而不支持圖形的隊列
bool computeQueueFound = false;
for (uint32_t i = 0; i < static_cast<uint32_t>(queueFamilyProperties.size()); i++)
{
if ((queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) && ((queueFamilyProperties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0))
{
compute.queueFamilyIndex = i;
computeQueueFound = true;
break;
}
}
//如果沒有專用的計算隊列,只需找到支持計算的第一個隊列族即可
if (!computeQueueFound)
{
for (uint32_t i = 0; i < static_cast<uint32_t>(queueFamilyProperties.size()); i++)
{
if (queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT)
{
compute.queueFamilyIndex = i;
computeQueueFound = true;
break;
}
}
}
// 計算在Vulkan中是必需的,因此必須至少有一個隊列家族支持計算
assert(computeQueueFound);
// 從設備獲取一個計算隊列
vkGetDeviceQueue(device, compute.queueFamilyIndex, 0, &compute.queue);
}
再次,我們來創建計算管線相關,在描述符佈局中我們綁定輸入和輸出兩個位置:
std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings = {
// Binding 0: Input image (read-only) 輸入圖像(只讀)
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 0),
// Binding 1: Output image (write) 輸出圖像(寫)
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 1),
};
創建描述符佈局,管線佈局,描述符分配等基本步驟不再累述…
接下來,我們根據之前的這些佈局數據,創建一個邊緣檢測卷積計算着色器,其着色器如下:
#version 450
layout (local_size_x = 16, local_size_y = 16) in;
layout (binding = 0, rgba8) uniform readonly image2D inputImage;
layout (binding = 1, rgba8) uniform image2D resultImage;
float conv(in float[9] kernel, in float[9] data, in float denom, in float offset)
{
float res = 0.0;
for (int i=0; i<9; ++i)
{
res += kernel[i] * data[i];
}
return clamp(res/denom + offset, 0.0, 1.0);
}
struct ImageData
{
float avg[9];
} imageData;
void main()
{
// 取相鄰的像素數據
int n = -1;
for (int i=-1; i<2; ++i)
{
for(int j=-1; j<2; ++j)
{
n++;
vec3 rgb = imageLoad(inputImage, ivec2(gl_GlobalInvocationID.x + i, gl_GlobalInvocationID.y + j)).rgb;
imageData.avg[n] = (rgb.r + rgb.g + rgb.b) / 3.0;
}
}
float[9] kernel;
kernel[0] = -1.0; kernel[1] = 0.0; kernel[2] = 0.0;
kernel[3] = 0.0; kernel[4] = -1.0; kernel[5] = 0.0;
kernel[6] = 0.0; kernel[7] = 0.0; kernel[8] = 2.0;
vec4 res = vec4(vec3(conv(kernel, imageData.avg, 1.0, 0.50)), 1.0);
imageStore(resultImage, ivec2(gl_GlobalInvocationID.xy), res);
}
記載此着色器,並創建對應的計算管線:
//創建計算着色器管道
std::string fileName = getAssetPath() + "shaders/computeshader/edgedetect.comp.spv";
computePipelineCreateInfo.stage = loadShader(fileName, VK_SHADER_STAGE_COMPUTE_BIT);
VkPipeline pipeline;
VK_CHECK_RESULT(vkCreateComputePipelines(device, pipelineCache, 1, &computePipelineCreateInfo, nullptr, &pipeline));
之後,便是常規操作:我們需要創建單獨的命令池作爲計算的隊列族,並且爲這個計算管線爲計算操作創建一個命令緩衝區,然後創建一個柵欄用於同步計算。
最後,我們還要構建一個buildComputeCommandBuffer函數包含計算調度命令的命令緩衝區(類似常規的buildCommandBuffer)。
void buildComputeCommandBuffer()
{
//如果我們在管道更改後重新構建命令緩衝區以確保它當前未被使用,則刷新隊列
vkQueueWaitIdle(compute.queue);
VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo();
VK_CHECK_RESULT(vkBeginCommandBuffer(compute.commandBuffer, &cmdBufInfo));
vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelines[compute.pipelineIndex]);
vkCmdBindDescriptorSets(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineLayout, 0, 1, &compute.descriptorSet, 0, 0);
vkCmdDispatch(compute.commandBuffer, textureComputeTarget.width / 16, textureComputeTarget.height / 16, 1);
vkEndCommandBuffer(compute.commandBuffer);
}
運行着色器,可以看到下圖效果:
再比如,我們使用浮雕卷積,修改計算着色器代碼:
float[9] kernel;
kernel[0] = -1.0; kernel[1] = 0.0; kernel[2] = 0.0;
kernel[3] = 0.0; kernel[4] = -1.0; kernel[5] = 0.0;
kernel[6] = 0.0; kernel[7] = 0.0; kernel[8] = 2.0;
編譯着色器運行,可見: