引言
CUDA相比GLSL更加靈活疲憋,逐漸成為主流的GPGPU編程技術(shù)。CUDA加速計算后的結(jié)果往往需要在顯示器中繪制出來埃脏,如果我們將在顯存中的計算結(jié)果拷貝到主存秋忙,然后使用QPainter繪制到窗口中,這中間的顯存-主存堵幽,主存-顯存的數(shù)據(jù)拷貝會顯得冗余弹澎。既然CUDA計算的結(jié)果和圖形繪制的內(nèi)容都在顯存里,那么可以通過一些技術(shù)手段直接將CUDA計算結(jié)果繪制到顯示器上殴胧,即CUDA與Graphics的互操作佩迟。
相關(guān)接口介紹
注冊顯卡資源
__host__ cudaError_t cudaGraphicsGLRegisterBuffer(struct cudaGraphicsResource **resource, GLuint buffer, unsigned int flags);
注冊一個OpenGL緩沖對象,參數(shù)說明:
- resource - 指向返回的對象句柄
*buffer - 被注冊的緩沖對象名 - flags - 注冊標識
- cudaGraphicsRegisterFlagsNone: 注冊的資源在CUDA程序可以進行讀寫惫撰,默認為該枚舉
- cudaGraphicsRegisterFlagsReadOnly: 指定CUDA程序只能讀該資源
- cudaGraphicsRegisterFlagsWriteDiscard: 指定CUDA程序不會讀該資源對象躺涝,每次寫資源時資源的內(nèi)容完全被覆蓋,不會保留之前的任何數(shù)據(jù)
取消注冊函數(shù)
cudaError_t cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource);
映射顯卡資源
cudaError_t cudaGraphicsMapResources(int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0));
映射已經(jīng)注冊的資源到CUDA流中夯膀,參數(shù)說明:
- count:映射的資源數(shù)
- resources:映射的資源
- stream:映射的CUDA同步流
該函數(shù)具備默認同步動作苍蔬。
取消映射函數(shù)
cudaError_t cudaGraphicsUnmapResources(int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0));
獲取設(shè)備指針
cudaError_t cudaGraphicsResourceGetMappedPointer(void **devPtr, size_t *size, cudaGraphicsResource_t resource);
獲取已映射資源的設(shè)備指針碟绑,參數(shù)說明:
- devPtr:設(shè)備指針茎匠,CUDA程序可以使用該指針進行數(shù)據(jù)更新
- size:緩沖數(shù)據(jù)尺寸
- resources:已映射的資源
使用示例
參照CUDA simpleGL示例押袍,該示例是使用CUDA對頂點數(shù)據(jù)進行操作后,再將頂點繪制到窗口上汽馋。使用Qt進行改寫:
class CuGl : public QOpenGLWidget, protected QOpenGLFunctions
{
public:
CuGl(QWidget* parent = nullptr);
~CuGl();
protected:
virtual void initializeGL();
virtual void resizeGL(int w, int h);
virtual void paintGL();
private:
void runCuda();
void cleanup();
private:
float m_fAnim;
Shader *m_shader;
QOpenGLBuffer m_vbo;
QOpenGLVertexArrayObject m_vao;
cudaGraphicsResource_t m_vboResource;
};
static const int DIM = 512;
static __global__ void kernel(float3* pos, unsigned int width, unsigned int height, float time)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
// calculate uv coordinates
float u = x / (float)width;
float v = y / (float)height;
u = u * 2.0f - 1.0f;
v = v * 2.0f - 1.0f;
// calculate simple sine wave pattern
float freq = 4.0f;
float w = sinf(u*freq + time) * cosf(v*freq + time) * 0.5f;
// write output vertex
pos[y*width + x] = make_float3(u, w, v);
}
CuGl::CuGl(QWidget* parent) : QOpenGLWidget(parent), m_fAnim(0.0f)
{
resize(DIM, DIM);
}
CuGl::~CuGl()
{
}
void CuGl::initializeGL()
{
initializeOpenGLFunctions();
m_shader = new Shader("shaders/cuda.vert", "shaders/cuda.frag", this);
// 創(chuàng)建VAO/VBO
if (m_vao.create())
{
m_vao.bind();
m_vbo.create();
m_vbo.bind();
unsigned int nbytes = DIM * DIM * 3 * sizeof(GLfloat);
m_vbo.allocate(nbytes);
m_shader->setAttributeBuffer("aPos", GL_FLOAT, 0, 3, 3 * sizeof(float));
m_shader->enableAttributeArray("aPos");
}
// 注冊cuda資源
checkCudaErrors(cudaGraphicsGLRegisterBuffer(&m_vboResource, m_vbo.bufferId(), cudaGraphicsMapFlagsWriteDiscard));
// 設(shè)置清屏顏色
glClearColor(0.1f, 0.1f, 0.1f, 1.0f);
connect(context(), &QOpenGLContext::aboutToBeDestroyed, this, &CuGl::cleanup);
}
void CuGl::resizeGL(int w, int h)
{
glViewport(0, 0, w, h);
QMatrix4x4 model, view, projection;
view.lookAt(QVector3D(0, 0, 3), QVector3D(0, 0, 0), QVector3D(0, 1, 0));
projection.perspective(45.0f, (float)w / h, 0.1f, 100.0f);
m_shader->bind();
m_shader->setUniformValue("model", model);
m_shader->setUniformValue("view", view);
m_shader->setUniformValue("projection", projection);
}
void CuGl::paintGL()
{
// 使用CUDA進行計算
runCuda();
// 使用OPenGL進行繪制
glClear(GL_COLOR_BUFFER_BIT);
m_shader->bind();
m_vao.bind();
glDrawArrays(GL_POINTS, 0, DIM*DIM);
update();
}
void CuGl::runCuda()
{
float3* glPtr;
size_t size;
// 映射已注冊的資源
checkCudaErrors(cudaGraphicsMapResources(1, &m_vboResource, 0));
// 獲取映射后的資源的設(shè)備指針
checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void**)&glPtr, &size, m_vboResource));
// 利用設(shè)備指針對數(shù)據(jù)做計算
dim3 blocks(16, 16);
dim3 grids((DIM - 1) / 16 + 1, (DIM - 1) / 16 + 1);
kernel << <grids, blocks >> > (glPtr, DIM, DIM, m_fAnim);
m_fAnim += 0.01f;
// 取消資源映射
checkCudaErrors(cudaGraphicsUnmapResources(1, &m_vboResource, 0));
}
void CuGl::cleanup()
{
// 取消資源注冊
checkCudaErrors(cudaGraphicsUnregisterResource(m_vboResource));
// 銷毀VAO/VBO
m_vao.destroy();
m_vbo.destroy();
}
頂點著色器和片段著色器程序:
#version 330 core
layout(location = 0) in vec3 aPos;
uniform mat4 model;
uniform mat4 view;
uniform mat4 projection;
void main()
{
gl_Position = projection * view * model * vec4(aPos,1.0f);
}
#version 330 core
out vec4 fragColor;
void main()
{
fragColor = vec4(0.4, 0.7, 0.6, 1.0);
}
運行效果: