使用DirectX11像素着色器将颜色从DXGI_FORMAT_B8G8R8A8_UNORM转换为GPU中的NV12

内存

我正在编写一个代码,以使用桌面复制捕获桌面,并使用英特尔hardwareMFT将其编码为h264。编码器仅接受NV12格式作为输入。我有一个DXGI_FORMAT_B8G8R8A8_UNORM到NV12转换器(https://github.com/NVIDIA/video-sdk-samples/blob/master/nvEncDXGIOutputDuplicationSample/Preproc.cpp)可以正常工作,并且基于DirectX VideoProcessor。

问题是某些英特尔图形硬件上的VideoProcessor仅支持从DXGI_FORMAT_B8G8R8A8_UNORM到YUY2的转换,但不支持NV12的转换,我已经通过GetVideoProcessorOutputFormats枚举支持的格式来确认了这一点。尽管VideoProcessor Blt成功完成,没有任何错误,而且我可以看到输出视频中的帧有些像素化,但是如果仔细观察,我会注意到它。

我猜想,VideoProcessor只是故障转移到了下一个受支持的输出格式(YUY2),而我在不知不觉中将其馈送到认为输入已按照配置在NV12中的编码器。NV12和YUY2之间几乎没有像字节顺序和子采样之类的差异,因此不会出现帧的故障或严重损坏。另外,在支持NV12转换的硬件上我也没有像素化问题。

所以我决定使用基于此代码的像素着色器进行颜色转换(https://github.com/bavulapati/DXGICaptureDXColorSpaceConversionIntelEncode/blob/master/DXGICaptureDXColorSpaceConversionIntelEncode/DuplicationManager.cpp)。我能够使像素着色器正常工作,我也已经在这里上传了我的代码(https://codeshare.io/5PJjxP)以供参考(尽可能简化)。

现在,我剩下两个通道,分别是色度和亮度(ID3D11Texture2D纹理)。对于将两个独立的通道有效地打包到一个ID3D11Texture2D纹理中,以便将其提供给编码器,我确实感到困惑。有没有一种方法可以将Y和UV通道有效地打包到GPU中的单个ID3D11Texture2D中?我非常厌倦基于CPU的方法,因为它价格昂贵,并且无法提供最佳的帧速率。实际上,我什至不愿意将纹理复制到CPU。我正在考虑一种在GPU中执行此操作的方法,而无需在CPU和GPU之间来回复制。

我已经对此进行了相当长时间的研究,没有任何进展,我们将不胜感激。

/**
* This method is incomplete. It's just a template of what I want to achieve.
*/

HRESULT CreateNV12TextureFromLumaAndChromaSurface(ID3D11Texture2D** pOutputTexture)
{
    HRESULT hr = S_OK;

    try
    {
        //Copying from GPU to CPU. Bad :(
        m_pD3D11DeviceContext->CopyResource(m_CPUAccessibleLuminanceSurf, m_LuminanceSurf);

        D3D11_MAPPED_SUBRESOURCE resource;
        UINT subresource = D3D11CalcSubresource(0, 0, 0);

        HRESULT hr = m_pD3D11DeviceContext->Map(m_CPUAccessibleLuminanceSurf, subresource, D3D11_MAP_READ, 0, &resource);

        BYTE* sptr = reinterpret_cast<BYTE*>(resource.pData);
        BYTE* dptrY = nullptr; // point to the address of Y channel in output surface

        //Store Image Pitch
        int m_ImagePitch = resource.RowPitch;

        int height = GetImageHeight();
        int width = GetImageWidth();

        for (int i = 0; i < height; i++)
        {
            memcpy_s(dptrY, m_ImagePitch, sptr, m_ImagePitch);

            sptr += m_ImagePitch;
            dptrY += m_ImagePitch;
        }

        m_pD3D11DeviceContext->Unmap(m_CPUAccessibleLuminanceSurf, subresource);

        //Copying from GPU to CPU. Bad :(
        m_pD3D11DeviceContext->CopyResource(m_CPUAccessibleChrominanceSurf, m_ChrominanceSurf);
        hr = m_pD3D11DeviceContext->Map(m_CPUAccessibleChrominanceSurf, subresource, D3D11_MAP_READ, 0, &resource);

        sptr = reinterpret_cast<BYTE*>(resource.pData);
        BYTE* dptrUV = nullptr; // point to the address of UV channel in output surface

        m_ImagePitch = resource.RowPitch;
        height /= 2;
        width /= 2;

        for (int i = 0; i < height; i++)
        {
            memcpy_s(dptrUV, m_ImagePitch, sptr, m_ImagePitch);

            sptr += m_ImagePitch;
            dptrUV += m_ImagePitch;
        }

        m_pD3D11DeviceContext->Unmap(m_CPUAccessibleChrominanceSurf, subresource);
    }
    catch(HRESULT){}

    return hr;
}

抽奖NV12:

 //
// Draw frame for NV12 texture
//
HRESULT DrawNV12Frame(ID3D11Texture2D* inputTexture)
{
    HRESULT hr;

    // If window was resized, resize swapchain
    if (!m_bIntialized)
    {
        HRESULT Ret = InitializeNV12Surfaces(inputTexture);
        if (!SUCCEEDED(Ret))
        {
            return Ret;
        }

        m_bIntialized = true;
    }

    m_pD3D11DeviceContext->CopyResource(m_ShaderResourceSurf, inputTexture);

    D3D11_TEXTURE2D_DESC FrameDesc;
    m_ShaderResourceSurf->GetDesc(&FrameDesc);

    D3D11_SHADER_RESOURCE_VIEW_DESC ShaderDesc;
    ShaderDesc.Format = FrameDesc.Format;
    ShaderDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
    ShaderDesc.Texture2D.MostDetailedMip = FrameDesc.MipLevels - 1;
    ShaderDesc.Texture2D.MipLevels = FrameDesc.MipLevels;

    // Create new shader resource view
    ID3D11ShaderResourceView* ShaderResource = nullptr;
    hr = m_pD3D11Device->CreateShaderResourceView(m_ShaderResourceSurf, &ShaderDesc, &ShaderResource);

    IF_FAILED_THROW(hr);

    m_pD3D11DeviceContext->PSSetShaderResources(0, 1, &ShaderResource);

    // Set resources
    m_pD3D11DeviceContext->OMSetRenderTargets(1, &m_pLumaRT, nullptr);
    m_pD3D11DeviceContext->PSSetShader(m_pPixelShaderLuma, nullptr, 0);
    m_pD3D11DeviceContext->RSSetViewports(1, &m_VPLuminance);

    // Draw textured quad onto render target
    m_pD3D11DeviceContext->Draw(NUMVERTICES, 0);

    m_pD3D11DeviceContext->OMSetRenderTargets(1, &m_pChromaRT, nullptr);
    m_pD3D11DeviceContext->PSSetShader(m_pPixelShaderChroma, nullptr, 0);
    m_pD3D11DeviceContext->RSSetViewports(1, &m_VPChrominance);

    // Draw textured quad onto render target
    m_pD3D11DeviceContext->Draw(NUMVERTICES, 0);

    // Release shader resource
    ShaderResource->Release();
    ShaderResource = nullptr;

    return S_OK;
}

初始化着色器:

void SetViewPort(D3D11_VIEWPORT* VP, UINT Width, UINT Height)
{
    VP->Width = static_cast<FLOAT>(Width);
    VP->Height = static_cast<FLOAT>(Height);
    VP->MinDepth = 0.0f;
    VP->MaxDepth = 1.0f;
    VP->TopLeftX = 0;
    VP->TopLeftY = 0;
}

HRESULT MakeRTV(ID3D11RenderTargetView** pRTV, ID3D11Texture2D* pSurf)
{
    if (*pRTV)
    {
        (*pRTV)->Release();
        *pRTV = nullptr;
    }
    // Create a render target view
    HRESULT hr = m_pD3D11Device->CreateRenderTargetView(pSurf, nullptr, pRTV);

    IF_FAILED_THROW(hr);

    return S_OK;
}

HRESULT InitializeNV12Surfaces(ID3D11Texture2D* inputTexture)
{
    ReleaseSurfaces();

    D3D11_TEXTURE2D_DESC lOutputDuplDesc;
    inputTexture->GetDesc(&lOutputDuplDesc);


    // Create shared texture for all duplication threads to draw into
    D3D11_TEXTURE2D_DESC DeskTexD;
    RtlZeroMemory(&DeskTexD, sizeof(D3D11_TEXTURE2D_DESC));
    DeskTexD.Width = lOutputDuplDesc.Width;
    DeskTexD.Height = lOutputDuplDesc.Height;
    DeskTexD.MipLevels = 1;
    DeskTexD.ArraySize = 1;
    DeskTexD.Format = lOutputDuplDesc.Format;
    DeskTexD.SampleDesc.Count = 1;
    DeskTexD.Usage = D3D11_USAGE_DEFAULT;
    DeskTexD.BindFlags = D3D11_BIND_SHADER_RESOURCE;

    HRESULT hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, nullptr, &m_ShaderResourceSurf);
    IF_FAILED_THROW(hr);

    DeskTexD.Format = DXGI_FORMAT_R8_UNORM;
    DeskTexD.BindFlags = D3D11_BIND_RENDER_TARGET;

    hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, nullptr, &m_LuminanceSurf);
    IF_FAILED_THROW(hr);

    DeskTexD.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
    DeskTexD.Usage = D3D11_USAGE_STAGING;
    DeskTexD.BindFlags = 0;

    hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, NULL, &m_CPUAccessibleLuminanceSurf);
    IF_FAILED_THROW(hr);

    SetViewPort(&m_VPLuminance, DeskTexD.Width, DeskTexD.Height);

    HRESULT Ret = MakeRTV(&m_pLumaRT, m_LuminanceSurf);
    if (!SUCCEEDED(Ret))
        return Ret;

    DeskTexD.Width = lOutputDuplDesc.Width / 2;
    DeskTexD.Height = lOutputDuplDesc.Height / 2;
    DeskTexD.Format = DXGI_FORMAT_R8G8_UNORM;

    DeskTexD.Usage = D3D11_USAGE_DEFAULT;
    DeskTexD.CPUAccessFlags = 0;
    DeskTexD.BindFlags = D3D11_BIND_RENDER_TARGET;

    hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, nullptr, &m_ChrominanceSurf);
    IF_FAILED_THROW(hr);

    DeskTexD.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
    DeskTexD.Usage = D3D11_USAGE_STAGING;
    DeskTexD.BindFlags = 0;

    hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, NULL, &m_CPUAccessibleChrominanceSurf);
    IF_FAILED_THROW(hr);

    SetViewPort(&m_VPChrominance, DeskTexD.Width, DeskTexD.Height);
    return MakeRTV(&m_pChromaRT, m_ChrominanceSurf);
}

HRESULT InitVertexShader(ID3D11VertexShader** ppID3D11VertexShader)
{
    HRESULT hr = S_OK;
    UINT Size = ARRAYSIZE(g_VS);

    try
    {
        IF_FAILED_THROW(m_pD3D11Device->CreateVertexShader(g_VS, Size, NULL, ppID3D11VertexShader));;

        m_pD3D11DeviceContext->VSSetShader(m_pVertexShader, nullptr, 0);

        // Vertices for drawing whole texture
        VERTEX Vertices[NUMVERTICES] =
        {
            { XMFLOAT3(-1.0f, -1.0f, 0), XMFLOAT2(0.0f, 1.0f) },
            { XMFLOAT3(-1.0f, 1.0f, 0), XMFLOAT2(0.0f, 0.0f) },
            { XMFLOAT3(1.0f, -1.0f, 0), XMFLOAT2(1.0f, 1.0f) },
            { XMFLOAT3(1.0f, -1.0f, 0), XMFLOAT2(1.0f, 1.0f) },
            { XMFLOAT3(-1.0f, 1.0f, 0), XMFLOAT2(0.0f, 0.0f) },
            { XMFLOAT3(1.0f, 1.0f, 0), XMFLOAT2(1.0f, 0.0f) },
        };

        UINT Stride = sizeof(VERTEX);
        UINT Offset = 0;

        D3D11_BUFFER_DESC BufferDesc;
        RtlZeroMemory(&BufferDesc, sizeof(BufferDesc));
        BufferDesc.Usage = D3D11_USAGE_DEFAULT;
        BufferDesc.ByteWidth = sizeof(VERTEX) * NUMVERTICES;
        BufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
        BufferDesc.CPUAccessFlags = 0;
        D3D11_SUBRESOURCE_DATA InitData;
        RtlZeroMemory(&InitData, sizeof(InitData));
        InitData.pSysMem = Vertices;

        // Create vertex buffer
        IF_FAILED_THROW(m_pD3D11Device->CreateBuffer(&BufferDesc, &InitData, &m_VertexBuffer));

        m_pD3D11DeviceContext->IASetVertexBuffers(0, 1, &m_VertexBuffer, &Stride, &Offset);
        m_pD3D11DeviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

        D3D11_INPUT_ELEMENT_DESC Layout[] =
        {
            { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
            { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0 }
        };

        UINT NumElements = ARRAYSIZE(Layout);
        hr = m_pD3D11Device->CreateInputLayout(Layout, NumElements, g_VS, Size, &m_pVertexLayout);

        m_pD3D11DeviceContext->IASetInputLayout(m_pVertexLayout);
    }
    catch (HRESULT) {}

    return hr;
}

HRESULT InitPixelShaders()
{
    HRESULT hr = S_OK;
    // Refer https://codeshare.io/5PJjxP for g_PS_Y & g_PS_UV blobs
    try
    {
        UINT Size = ARRAYSIZE(g_PS_Y);
        hr = m_pD3D11Device->CreatePixelShader(g_PS_Y, Size, nullptr, &m_pPixelShaderChroma);

        IF_FAILED_THROW(hr);

        Size = ARRAYSIZE(g_PS_UV);
        hr = m_pD3D11Device->CreatePixelShader(g_PS_UV, Size, nullptr, &m_pPixelShaderLuma);

        IF_FAILED_THROW(hr);
    }
    catch (HRESULT) {}

    return hr;
}
mofo77

我正在使用DirectX11在RGB中仅将RGBA转换为NV12进行实验。

这是一个很好的挑战。我对Directx11不熟悉,所以这是我的第一个实验。

检查此项目以获取更新:D3D11ShaderNV12

在我当前的实现中(可能不是最后一个),这是我要做的:

  • 步骤1:使用DXGI_FORMAT_B8G8R8A8_UNORM作为输入纹理
  • 步骤2:制作1st pass着色器以获取3个纹理(Y:Luma,U:ChromaCb和V:ChromaCr):请参见YCbCrPS2.hlsl
  • 步骤3:Y为DXGI_FORMAT_R8_UNORM,并准备进行最终的NV12纹理
  • 步骤4:需要在第二遍着色器中对UV进行下采样:请参见ScreenPS2.hlsl(使用线性过滤)
  • 步骤5:第三遍着色器以采样Y纹理
  • 第6步:第四遍着色器使用移位纹理对UV纹理进行采样(我认为可以使用其他技术)

着色器NV12

我的最终纹理不是DXGI_FORMAT_NV12,而是类似的DXGI_FORMAT_R8_UNORM纹理。我的计算机是Windows7,因此未处理DXGI_FORMAT_NV12。稍后我将在另一台计算机上尝试。

图片处理:

渲染目标

本文收集自互联网,转载请注明来源。

如有侵权,请联系 [email protected] 删除。

编辑于
0

我来说两句

0 条评论
登录 后参与评论

相关文章

当后缓冲区格式为DXGI_FORMAT_B8G8R8A8_UNORM时,为什么像素着色器返回float4?

如何使用IPP将RGB转换为NV12色彩空间

android-Renderscript将NV12 yuv转换为RGB

DirectX 11顶点和像素着色器如何工作

将颜色从RGB转换为NV12

DirectX11 C ++着色器缓冲区在多边形布局描述中为空

如何将bgra8Unorm iOS金属纹理转换为rgba8Unorm纹理?

如何在DirectX11着色器中使用swizzle .rrrg

将MTLTexture从depth32Float转换为bgra8UNorm

如何使用OpenGL将RGBA转换为NV12?

我可以将R8G8B8A8放入UBO中并将其用作vec4吗?

OpenGL使用着色器将NV12转换为RGB24

Oracle 11g:将RAW(8)转换为整数范围0..255的表

随机访问HLSL中具有R8G8B8A8_UNorm格式的D3D11缓冲区

C ++中的着色器和Directx11编译

使用顶点着色器的WebGL高度图,使用32位而不是8位

在DirectX 11中将纹理数组发送到着色器

DirectX11 HLSL着色器未运行

表面格式为B8G8R8A8_UNORM,但vkCmdClearColorImage是否为float?

编译DirectX11着色器文件

将像素缓冲区从16Bit转换为B8G8R8A8_UNorm

将“ uint8 RGB像素”转换为“ uint8灰度像素”值的公式是什么?

将 R8G8B8A8 映像复制到 R8G8B8

编译着色器时出错 - DirectX11

DirectX11 - 几何着色器流输出流未定义

将 12 位单色图像转换为 8 位灰度

像素值在 DirectX 11 HLSL 着色器中的表现如何?

无法从 Godot 的着色器中正确读取 FORMAT_R8 统一的 sampler2D 纹理

AV_PIX_FMT_NV12 表示它具有 12bpp 位像素颜色,但数据为 uint8_t 如何修改帧中的像素?