引言
在《CUDA與OpenGL互操作之頂點緩存映射》中講解了如何使用CUDA操作OpenGL中的頂點緩存數(shù)據(jù)扒寄,從而實現(xiàn)CUDA與OpenGL互操作烙常。有時候我們常常使用CUDA對圖像進行后處理国葬,比如降噪增強心包。然后把結(jié)果顯示在窗口上跪腹。這個時候使用VBO進行互操作顯然是不合適的畏梆,CUDA為我們提供了另外一種手段來實現(xiàn)該目的闸婴,即使用紋理映射方式坏挠。
相關(guān)接口介紹
操作紋理的方式和操作頂點緩沖的方式基本一致,先注冊資源邪乍,接著映射到CUDA流中降狠,獲取數(shù)組指針,將Cuda計算結(jié)果拷貝到紋理數(shù)組中庇楞。完成之后榜配,取消資源映射,最后取消注冊資源吕晌。
注冊顯卡資源
__host__ ?cudaError_t cudaGraphicsGLRegisterImage ( cudaGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags )
注冊一個OpenGL紋理對象蛋褥,參數(shù)說明:
- resource - 指向返回的對象句柄
- image - 被注冊的紋理名稱
- flags - 注冊標(biāo)識
- cudaGraphicsRegisterFlagsNone: 注冊的資源在CUDA程序可以進行讀寫,默認為該枚舉
- cudaGraphicsRegisterFlagsReadOnly: 指定CUDA程序只能讀該資源
- cudaGraphicsRegisterFlagsWriteDiscard: 指定CUDA程序不會讀該資源對象睛驳,每次寫資源時資源的內(nèi)容完全被覆蓋烙心,不會保留之前的任何數(shù)據(jù)
- cudaGraphicsRegisterFlagsSurfaceLoadStore:指定CUDA將這個資源綁定到一個表面引用
- cudaGraphicsRegisterFlagsTextureGather:指定CUDA將對該資源執(zhí)行紋理收集操作
取消注冊函數(shù)
__host__ ?cudaError_t cudaGraphicsUnregisterResource ( cudaGraphicsResource_t resource )
映射顯卡資源
__host__ ?cudaError_t cudaGraphicsMapResources ( int count, cudaGraphicsResource_t* resources, cudaStream_t stream = 0 )
映射已經(jīng)注冊的資源到CUDA流中,參數(shù)說明:
- count:映射的資源數(shù)
- resources:映射的資源
- stream:映射的CUDA同步流
該函數(shù)具備默認同步動作
取消映射函數(shù)
cudaError_t cudaGraphicsUnmapResources(int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0));
獲取紋理數(shù)組
__host__ ?cudaError_t cudaGraphicsSubResourceGetMappedArray ( cudaArray_t* array, cudaGraphicsResource_t resource, unsigned int arrayIndex, unsigned int mipLevel )
獲取一個數(shù)組乏沸,通過該數(shù)組可以訪問已映射圖形資源的子資源淫茵,參數(shù)說明:
- array:返回的數(shù)組,通過該數(shù)組可以訪問資源的子資源
- resource:用于訪問的已映射資源
- arrayIndex:數(shù)組索引蹬跃,數(shù)組紋理或cubemap面的索引匙瘪,通過cudaGraphicsCubeFace對cubemap紋理進行子資源訪問
- mipLevel:用于子資源訪問的Mipmap級別
使用示例
參照CUDA simpleCUDA2GL示例,使用Qt進行改寫:
#pragma once
#include <QOpenGLWidget>
#include <QOpenGLFunctions>
#include <QOpenGLBuffer>
#include <QOpenGLVertexArrayObject>
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
class Shader;
class QOpenGLTexture;
class CuTexImg : public QOpenGLWidget, protected QOpenGLFunctions
{
public:
CuTexImg(QWidget *parent = nullptr);
~CuTexImg();
protected:
virtual void initializeGL();
virtual void resizeGL(int w, int h);
virtual void paintGL();
private:
void makeObject();
void runCuda();
void cleanup();
private:
Shader* m_shader;
QOpenGLTexture* m_texture;
QOpenGLBuffer m_vbo, m_ibo;
QOpenGLVertexArrayObject m_vao;
uchar4* m_cuPtr;
cudaGraphicsResource_t m_resource;
};
#include "CuTexImg.h"
#include "Shader.h"
#include <QOpenGLTexture>
#include "helper_cuda.h"
static const int DIM = 512;
extern "C" void
launch_cudaProcess(dim3 grid, dim3 block, uchar4 *g_odata, int imgw);
CuTexImg::CuTexImg(QWidget *parent) : QOpenGLWidget(parent), m_vbo(QOpenGLBuffer::VertexBuffer), m_ibo(QOpenGLBuffer::IndexBuffer)
{
resize(DIM, DIM);
}
CuTexImg::~CuTexImg()
{
}
void CuTexImg::initializeGL()
{
initializeOpenGLFunctions();
makeObject();
checkCudaErrors(cudaGraphicsGLRegisterImage(&m_resource, m_texture->textureId(), GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard));
checkCudaErrors(cudaMalloc((void**)&m_cuPtr, DIM*DIM * sizeof(uchar4)));
runCuda();
connect(context(), &QOpenGLContext::aboutToBeDestroyed, this, &CuTexImg::cleanup);
}
void CuTexImg::resizeGL(int w, int h)
{
glViewport(0, 0, w, h);
}
void CuTexImg::paintGL()
{
glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT);
m_shader->bind();
m_texture->bind();
m_vao.bind();
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0);
}
void CuTexImg::makeObject()
{
m_shader = new Shader("shaders/cuTexImg.vert", "shaders/cuTexImg.frag", this);
float vertices[] = {
// positions // texture coords
1.0f, 1.0f, 0.0f, 1.0f, 1.0f, // top right
1.0f, -1.0f, 0.0f, 1.0f, 0.0f, // bottom right
-1.0f, -1.0f, 0.0f, 0.0f, 0.0f, // bottom left
-1.0f, 1.0f, 0.0f, 0.0f, 1.0f // top left
};
unsigned int indices[] = { // note that we start from 0!
0, 1, 3, // first Triangle
1, 2, 3 // second Triangle
};
m_vao.create();
m_vao.bind();
m_vbo.create();
m_vbo.bind();
m_vbo.allocate(vertices, sizeof(vertices));
m_ibo.create();
m_ibo.bind();
m_ibo.allocate(indices, sizeof(indices));
m_shader->setAttributeBuffer("aPos", GL_FLOAT, 0, 3, 5 * sizeof(float));
m_shader->enableAttributeArray("aPos");
m_shader->setAttributeBuffer("aTexCoord", GL_FLOAT, 3 * sizeof(float), 2, 5 * sizeof(float));
m_shader->enableAttributeArray("aTexCoord");
m_texture = new QOpenGLTexture(QOpenGLTexture::Target2D);
m_texture->setMagnificationFilter(QOpenGLTexture::Linear);
m_texture->setMinificationFilter(QOpenGLTexture::Nearest);
m_texture->setWrapMode(QOpenGLTexture::Repeat);
m_texture->setFormat(QOpenGLTexture::RGBA8U); // 格式5骸5び鳌!m_cuPtr的數(shù)據(jù)類型必須與該格式匹配,采樣器的類型也必須一致
m_texture->setSize(DIM, DIM); // m_cuPtr的內(nèi)存尺寸必須與紋理尺寸一致
m_texture->allocateStorage();
}
void CuTexImg::runCuda()
{
cudaArray_t glArray;
checkCudaErrors(cudaGraphicsMapResources(1, &m_resource, NULL));
checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&glArray, m_resource, 0, 0));
dim3 blocks(16, 16);
dim3 grids((DIM - 1) / 16 + 1, (DIM - 1) / 16 + 1);
launch_cudaProcess(grids, blocks, m_cuPtr, DIM);
checkCudaErrors(cudaMemcpyToArray(glArray, 0, 0, m_cuPtr, DIM * DIM * sizeof(uchar4), cudaMemcpyDeviceToDevice));
checkCudaErrors(cudaGraphicsUnmapResources(1, &m_resource, nullptr));
}
void CuTexImg::cleanup()
{
makeCurrent();
// 釋放顯存
checkCudaErrors(cudaFree(m_cuPtr));
// 取消資源注冊
checkCudaErrors(cudaGraphicsUnregisterResource(m_resource));
// 銷毀VAO/VBO
m_vao.destroy();
m_vbo.destroy();
// 釋放紋理
delete m_texture;
doneCurrent();
}
接下來是cuda代碼扼劈,cuda核函數(shù)基本與示例代碼保持一致
#include "helper_cuda.h"
// clamp x to range [a, b]
__device__ float clamp(float x, float a, float b)
{
return max(a, min(b, x));
}
// convert floating point rgb color to 8-bit integer
__device__ uchar4 rgbToInt(float r, float g, float b)
{
r = clamp(r, 0.0f, 255.0f);
g = clamp(g, 0.0f, 255.0f);
b = clamp(b, 0.0f, 255.0f);
return make_uchar4(r, g, b, 0);
}
static __global__ void cudaProcess(uchar4* g_odata, int imgw)
{
int tx = threadIdx.x;
int ty = threadIdx.y;
int bw = blockDim.x;
int bh = blockDim.y;
int x = blockIdx.x*bw + tx;
int y = blockIdx.y*bh + ty;
uchar4 c4 = make_uchar4((x & 0x20) ? 100 : 0, 0, (y & 0x20) ? 100 : 0, 0);
g_odata[y*imgw + x] = rgbToInt(c4.z, c4.y, c4.x);
}
extern "C" void
launch_cudaProcess(dim3 grid, dim3 block, uchar4 *g_odata, int imgw)
{
cudaProcess << < grid, block >> > (g_odata, imgw);
}
最后是頂點著色器代碼和片段著色器代碼
#version 330 core
layout (location = 0) in vec3 aPos;
layout (location = 1) in vec2 aTexCoord;
out vec2 TexCoord;
void main()
{
gl_Position = vec4(aPos, 1.0);
TexCoord = aTexCoord;
}
#version 330
in vec2 TexCoord;
uniform usampler2D texImage;
void main()
{
vec4 c = texture(texImage, TexCoord);
gl_FragColor = c / 255.0;
}
運行結(jié)果:
Cuda&GL.jpg
參考文章:
https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/glTexImage2D.xhtml