更改cuda内核时，为什么cudaGraphicsGLRegisterBuffer出现段错误？

不列颠

我正在使用带有cuda工具包6.5，nvidia驱动程序版本340.29的Ubuntu 14.04。我的应用程序从openGL注册一个像素缓冲区，并在每个循环中将图像写入缓冲区，使用glTexSubImage2D将PBO复制到纹理，然后绘制纹理。在更改图像生成内核之前，这一切都可以正常工作，然后我的gdb报告cudaGraphicsGLRegisterBuffer中的分段错误。我的猜测是这是一个错误，因为cuda内核与cudaGraphicsGLRegisterBuffer完全无关，后者在任何处理之前都会被调用。

生成文件

CUDA=nvcc
CPP=g++

OUT=out

INC=-I/usr/local/cuda-6.5/include

LINK=-lcudart -lglfw -lGLEW -lGL

FLAGS=-std=gnu++11
CUFLAGS=-std=c++11

all: main.cu GLdisplay.cu
    $(CUDA) main.cu GLdisplay.cu -o $(OUT) $(CUFLAGS) $(INC) $(LINK)

clean:
    rm ./$(OUT)

add:
    git add -A
    git status

main.cu

#define  GLEW_STATIC

// C++ headers
#include <iostream>
#include <fstream>
#include <cstring>

// openGL headers
#include <GL/glew.h>
#include <GLFW/glfw3.h>

// CUDA headers
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>

#include "GLdisplay.h"

#define WINDOW_WIDTH  640
#define WINDOW_HEIGHT 480

#define TEX_WIDTH     1920
#define TEX_HEIGHT    1080

using std::cout;
using std::cerr;
using std::endl;
using std::string;
using std::ifstream;

GLFWwindow* window;
GLuint vao, vbo, pbo;
GLuint vtx, frg, shaders;
GLuint tex;

uint8_t* cudaPBOptr;
size_t cudaPBOsize;

cudaGraphicsResource_t cuGfxPBO;

string loadTxtFileAsString( string filename )
{
    string source;
    string buf = "";
    ifstream file( filename, std::ios::in );

    while( file.good( ) )
    {
        std::getline( file, buf );
        source.append( buf + "\n" );
    }

    file.close( );

    return source;
}

void shaderCompileCheck( void )
{
    GLint status;

    // vertex
    glGetShaderiv( vtx, GL_COMPILE_STATUS, &status );

    if( GL_TRUE != status )
    {
        char buffer[ 512 ];
        glGetShaderInfoLog( vtx, 512, NULL, buffer );
        cerr << "vtx err | " << buffer << endl;
    }

    // fragment
    glGetShaderiv( frg, GL_COMPILE_STATUS, &status );

    if( GL_TRUE != status )
    {
        char buffer[ 512 ];
        glGetShaderInfoLog( frg, 512, NULL, buffer );
        cerr << "frg err | " << buffer << endl;
    }
}

// added exit on !cudaSuccess
#define cudaErr(err) cudaError( err, __FILE__, __LINE__ )
inline void cudaError( cudaError_t err, const char* file, uint32_t line, bool abort=true )
{
    if( cudaSuccess != err )
    {
        cerr << "[" << file << ":" << line << "] ";
        cerr << cudaGetErrorName( err ) << endl; // print the name instead of description
        if( abort ) exit( err );
    }
}

// added GL error checking
#define glErr( ) glError( glGetError( ), __FILE__, __LINE__ )
inline void glError( GLenum err, const char* file, uint32_t line, bool abort=false )
{
    if( GL_NO_ERROR != err )
    {
        cerr << "[" << file << ":" << line << "] ";
        cerr << glewGetErrorString( err ) << endl;
        if( abort ) exit( err );
    }
}

// main function
int main( int argc, char* argv[] )
{
    string vString = loadTxtFileAsString( "vert.glsl" );
    string fString = loadTxtFileAsString( "frag.glsl" );

    const GLchar* vtxSource = vString.c_str( );
    const GLchar* frgSource = fString.c_str( );

/////////////////////////////////////////////////

    glfwInit( );

    glfwWindowHint( GLFW_CONTEXT_VERSION_MAJOR, 3 );
    glfwWindowHint( GLFW_CONTEXT_VERSION_MINOR, 2 );
    glfwWindowHint( GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE );
    glfwWindowHint( GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE );

    glfwWindowHint( GLFW_RESIZABLE, GL_FALSE );

    window = glfwCreateWindow( WINDOW_WIDTH, WINDOW_HEIGHT, "CUDA-GL", NULL, NULL );

    glfwMakeContextCurrent( window );

    glewExperimental = GL_TRUE;
    glewInit( );
    glErr(); // added GL error checking

    cudaErr( cudaSetDevice( 0 ) );
    glErr(); // added GL error checking

/////////////////////////////////////////////////

    GLfloat vertices[] = {
    //   X      Y     U     V
        -1.0f,  1.0f, 0.0f, 1.0f, // t l
         1.0f,  1.0f, 1.0f, 1.0f, // t r
        -1.0f, -1.0f, 0.0f, 0.0f, // b l

        -1.0f, -1.0f, 0.0f, 0.0f, // b l
         1.0f, -1.0f, 1.0f, 0.0f, // b r
         1.0f,  1.0f, 1.0f, 1.0f  // t r
    };

    GLbyte pboInit[ TEX_WIDTH * TEX_HEIGHT * 4 ];
    memset( pboInit, 127, sizeof( pboInit ) );

/////////////////////////////////////////////////

    glGenVertexArrays( 1, &vao );
    glBindVertexArray( vao );
    glErr(); // added GL error checking

    glGenBuffers( 1, &pbo );
    glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo );
    glBufferData( GL_PIXEL_UNPACK_BUFFER, TEX_WIDTH * TEX_HEIGHT * 4, pboInit, GL_STREAM_DRAW );
    glBindBuffer( GL_PIXEL_UNPACK_BUFFER, 0 );
    glErr(); // added GL error checking

    glActiveTexture( GL_TEXTURE0 );
    glGenTextures( 1, &tex );
    glBindTexture( GL_TEXTURE_2D, tex );
    glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, TEX_WIDTH, TEX_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL );
    glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
    glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
    glErr(); // added GL error checking

    glGenBuffers( 1, &vbo );
    glBindBuffer( GL_ARRAY_BUFFER, vbo );
    glBufferData( GL_ARRAY_BUFFER, sizeof( vertices ), vertices, GL_STATIC_DRAW );
    glErr(); // added GL error checking

/////////////////////////////////////////////////

    cudaErr( cudaGraphicsGLRegisterBuffer( &cuGfxPBO, pbo, cudaGraphicsMapFlagsNone ) );
    glErr(); // added GL error checking

/////////////////////////////////////////////////

    vtx = glCreateShader( GL_VERTEX_SHADER );
    glShaderSource( vtx, 1, &vtxSource, NULL );
    glCompileShader( vtx );

    frg = glCreateShader( GL_FRAGMENT_SHADER );
    glShaderSource( frg, 1, &frgSource, NULL );
    glCompileShader( frg );

    shaderCompileCheck( );

    shaders = glCreateProgram( );
    glAttachShader( shaders, vtx );
    glAttachShader( shaders, frg );

    glBindFragDataLocation( shaders, 0, "outColor" );

    glLinkProgram( shaders );
    glUseProgram( shaders );

/////////////////////////////////////////////////

    GLuint posAtt = glGetAttribLocation( shaders, "position" );
    glEnableVertexAttribArray( posAtt );
    glVertexAttribPointer( posAtt, 2, GL_FLOAT, GL_FALSE, 4 * sizeof( GLfloat ), NULL );

    GLuint texAtt = glGetAttribLocation( shaders, "texcoord" );
    glEnableVertexAttribArray( texAtt );
    glVertexAttribPointer( texAtt, 2, GL_FLOAT, GL_FALSE, 4 * sizeof( GLfloat ), ( void* )( 2 * sizeof( GLfloat ) ) );

/////////////////////////////////////////////////

    glClearColor( 0.0f, 0.0f, 0.0f, 1.0f );

    while( !glfwWindowShouldClose( window ) )
    {
        cudaErr( cudaGraphicsMapResources( 1, &cuGfxPBO, 0 ) );
        cudaErr( cudaGraphicsResourceGetMappedPointer( ( void** )&cudaPBOptr, &cudaPBOsize, cuGfxPBO ) );
        glErr(); // added GL error checking

        dim3 gridDim( ( int32_t )ceil( ( float )TEX_WIDTH / 16 ), ( int32_t )ceil( ( float )TEX_HEIGHT / 16 ) );
        dim3 blockDim( 16, 16 );

        makeImage<<< gridDim, blockDim >>>( cudaPBOptr, TEX_WIDTH, TEX_HEIGHT );
        cudaDeviceSynchronize( ); cudaErr( cudaGetLastError( ) );
        glErr(); // added GL error checking

        cudaErr( cudaGraphicsUnmapResources( 1, &cuGfxPBO, 0 ) );
        glErr(); // added GL error checking

/////////////////////////////////////////////////

        glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo );
        glErr(); // added GL error checking

        glTexSubImage2D( GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, NULL );
        glErr(); // added GL error checking

        glBindBuffer( GL_PIXEL_UNPACK_BUFFER, 0 );
        glErr(); // added GL error checking

/////////////////////////////////////////////////

        glClear( GL_COLOR_BUFFER_BIT );

        glDrawArrays( GL_TRIANGLES, 0, 6 );
        glErr(); // added GL error checking

        glfwSwapBuffers( window );

        glfwPollEvents( );
    }

/////////////////////////////////////////////////

    glDeleteTextures( 1, &tex );

    glDeleteProgram( shaders );
    glDeleteShader( frg );
    glDeleteShader( vtx );

    glDeleteBuffers( 1, &pbo );
    glDeleteBuffers( 1, &vbo );

    glDeleteVertexArrays( 1, &vao );

    return 0;
}

GLdisplay.cu

#include <cuda_runtime.h>
#include <stdint.h>

#include "GLdisplay.h"

__global__ void makeImage( uint8_t* output, uint32_t width, uint32_t height )
{
    uint32_t xIdx = threadIdx.x + blockIdx.x * blockDim.x;
    uint32_t yIdx = threadIdx.y + blockIdx.y * blockDim.y;
    uint32_t oIdx = ( xIdx + yIdx * width ) * 4;

    if( xIdx >= width || yIdx >= height ) return;

// segfault occurs if any of the lines below are commented or set to write 0
    uint8_t red = ( uint8_t )( 255.0f * ( float )( xIdx + yIdx * width ) / ( width * height ) );
    uint8_t gre = ( uint8_t )( 255.0f * ( 1.0f - ( float )( xIdx + yIdx * width ) / ( width * height ) ) );
    uint8_t blu = ( uint8_t )( 510.0f * abs( ( float )( xIdx + yIdx * width ) / ( width * height ) ) - 0.5f );

    output[ oIdx + 0 ] = red;
    output[ oIdx + 1 ] = gre;
    output[ oIdx + 2 ] = blu;
    output[ oIdx + 3 ] = 255;

 }

GLdisplay.h

#ifndef GL_DISPLAY_H
#define GL_DISPLAY_H

__global__ void makeImage( uint8_t*, uint32_t, uint32_t );

#endif

vert.glsl

#version 150

in vec2 position;
in vec2 texcoord;

out vec2 Texcoord;

void main( )
{
    Texcoord = texcoord;
    gl_Position = vec4( position, 0.0, 1.0 );
}

frag.glsl

#version 150

in vec2 Texcoord;

out vec4 outColor;

uniform sampler2D tex;

void main( )
{
    outColor = texture( tex, Texcoord );
}

不列颠

更新为nvidia显示驱动器346.47完全解决了我的问题。不确定的是，请nvidia-smi在终端窗口中运行，如果看不到NVIDIA-SMI 346.xx（xx是任意数字），请使用nvidia提供的最新驱动程序进行更新。在撰写本文时，CUDA工具包6.5附带了过时的图形驱动程序。

本文收集自互联网，转载请注明来源。

如有侵权，请联系 [email protected] 删除。

编辑于 2021-03-27

我来说两句

0 条评论

登录后参与评论

TOP 榜单

文章

更改cuda内核时，为什么cudaGraphicsGLRegisterBuffer出现段错误？

更改cuda内核时，为什么cudaGraphicsGLRegisterBuffer出现段错误？

UITableView的项目向下滚动后更改颜色，然后快速备份

Linux的官方Adobe Flash存储库是否已过时？

用日期数据透视表和日期顺序查询

应用发明者仅从列表中选择一个随机项一次

Mac OS X更新后的GRUB 2问题

验证REST API参数

Java Eclipse中的错误13，如何解决？

带有错误“ where”条件的查询如何返回结果？

ggplot：对齐多个分面图-所有大小不同的分面

尝试反复更改屏幕上按钮的位置 - kotlin android studio

如何从视图一次更新多行（ASP.NET - Core）

计算数据帧中每行的NA

蓝屏死机没有修复解决方案

在 Python 2.7 中。如何从文件中读取特定文本并分配给变量

离子动态工具栏背景色

VB.net将2条特定行导出到DataGridView

通过 Git 在运行 Jenkins 作业时获取 ClassNotFoundException

在Windows 7中无法删除文件（2）

python中的boto3文件上传

当我尝试下载 StanfordNLP en 模型时，出现错误

Node.js中未捕获的异常错误，发生调用