Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/nvenc/nvenc_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ namespace nvenc {
};

auto buffer_is_yuv444 = [&]() {
return buffer_format == NV_ENC_BUFFER_FORMAT_AYUV || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
return buffer_format == NV_ENC_BUFFER_FORMAT_AYUV || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444 || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
};

{
Expand Down
3 changes: 3 additions & 0 deletions src/nvenc/nvenc_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ namespace nvenc {
case platf::pix_fmt_e::ayuv:
return NV_ENC_BUFFER_FORMAT_AYUV;

case platf::pix_fmt_e::yuv444p:
return NV_ENC_BUFFER_FORMAT_YUV444;

case platf::pix_fmt_e::yuv444p16:
return NV_ENC_BUFFER_FORMAT_YUV444_10BIT;

Expand Down
64 changes: 35 additions & 29 deletions src/nvhttp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,39 @@ namespace nvhttp {
return true;
}

uint32_t get_codec_mode_flags() {
uint32_t codec_mode_flags = SCM_H264;
if (video::last_encoder_probe_supported_yuv444_for_codec[0]) {
codec_mode_flags |= SCM_H264_HIGH8_444;
}
if (video::active_hevc_mode >= 2) {
codec_mode_flags |= SCM_HEVC;
if (video::last_encoder_probe_supported_yuv444_for_codec[1]) {
codec_mode_flags |= SCM_HEVC_REXT8_444;
}
}
if (video::active_hevc_mode == 3 || video::active_hevc_mode == 5) {
codec_mode_flags |= SCM_HEVC_MAIN10;
}
if ((video::active_hevc_mode == 4 || video::active_hevc_mode == 5) && video::last_encoder_probe_supported_yuv444_for_codec[1]) {
codec_mode_flags |= SCM_HEVC_REXT10_444;
}

if (video::active_av1_mode >= 2) {
codec_mode_flags |= SCM_AV1_MAIN8;
if (video::last_encoder_probe_supported_yuv444_for_codec[2]) {
codec_mode_flags |= SCM_AV1_HIGH8_444;
}
}
if (video::active_av1_mode == 3 || video::active_av1_mode == 5) {
codec_mode_flags |= SCM_AV1_MAIN10;
}
if ((video::active_av1_mode == 4 || video::active_av1_mode == 5) && video::last_encoder_probe_supported_yuv444_for_codec[2]) {
codec_mode_flags |= SCM_AV1_HIGH10_444;
}
return codec_mode_flags;
}

template<class T>
void serverinfo(std::shared_ptr<typename SimpleWeb::ServerBase<T>::Response> response, std::shared_ptr<typename SimpleWeb::ServerBase<T>::Request> request) {
print_req<T>(request);
Expand Down Expand Up @@ -731,34 +764,7 @@ namespace nvhttp {
tree.put("root.LocalIP", net::addr_to_normalized_string(local_endpoint.address()));
}

uint32_t codec_mode_flags = SCM_H264;
if (video::last_encoder_probe_supported_yuv444_for_codec[0]) {
codec_mode_flags |= SCM_H264_HIGH8_444;
}
if (video::active_hevc_mode >= 2) {
codec_mode_flags |= SCM_HEVC;
if (video::last_encoder_probe_supported_yuv444_for_codec[1]) {
codec_mode_flags |= SCM_HEVC_REXT8_444;
}
}
if (video::active_hevc_mode >= 3) {
codec_mode_flags |= SCM_HEVC_MAIN10;
if (video::last_encoder_probe_supported_yuv444_for_codec[1]) {
codec_mode_flags |= SCM_HEVC_REXT10_444;
}
}
if (video::active_av1_mode >= 2) {
codec_mode_flags |= SCM_AV1_MAIN8;
if (video::last_encoder_probe_supported_yuv444_for_codec[2]) {
codec_mode_flags |= SCM_AV1_HIGH8_444;
}
}
if (video::active_av1_mode >= 3) {
codec_mode_flags |= SCM_AV1_MAIN10;
if (video::last_encoder_probe_supported_yuv444_for_codec[2]) {
codec_mode_flags |= SCM_AV1_HIGH10_444;
}
}
const uint32_t codec_mode_flags = get_codec_mode_flags();
tree.put("root.ServerCodecModeSupport", codec_mode_flags);

auto current_appid = proc::proc.running();
Expand Down Expand Up @@ -807,7 +813,7 @@ namespace nvhttp {
for (auto &proc : proc::proc.get_apps()) {
pt::ptree app;

app.put("IsHdrSupported"s, video::active_hevc_mode == 3 ? 1 : 0);
app.put("IsHdrSupported"s, video::active_hevc_mode >= 3 ? 1 : 0);
app.put("AppTitle"s, proc.name);
app.put("ID", proc.id);

Expand Down
2 changes: 2 additions & 0 deletions src/platform/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ namespace platf {
p010, ///< P010
ayuv, ///< AYUV
yuv444p16, ///< Planar 10-bit (shifted to 16-bit) YUV 4:4:4
yuv444p, ///< Planar 8-bit YUV 4:4:4
y410, ///< Y410
unknown ///< Unknown
};
Expand All @@ -259,6 +260,7 @@ namespace platf {
_CONVERT(p010);
_CONVERT(ayuv);
_CONVERT(yuv444p16);
_CONVERT(yuv444p);
_CONVERT(y410);
_CONVERT(unknown);
}
Expand Down
131 changes: 97 additions & 34 deletions src/platform/linux/cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,10 @@ namespace cuda {
this->frame = frame;

auto hwframe_ctx = (AVHWFramesContext *) hw_frames_ctx->data;
if (hwframe_ctx->sw_format != AV_PIX_FMT_NV12) {
BOOST_LOG(error) << "cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12"sv;

if (hwframe_ctx->sw_format != AV_PIX_FMT_NV12 &&
hwframe_ctx->sw_format != AV_PIX_FMT_YUV444P) {
BOOST_LOG(error) << "cuda::cuda_t doesn't support any format other than AV_PIX_FMT_NV12 and AV_PIX_FMT_YUV444P"sv;
return -1;
}

Expand Down Expand Up @@ -178,7 +180,12 @@ namespace cuda {
return;
}

sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, stream.get(), {frame->width, frame->height, 0, 0});
//frame->data[2] not null on YUV444 conversion
if (frame->data[2]) {
sws.convert_yuv444(frame->data[0], frame->data[1], frame->data[2], frame->linesize[0], tex->texture.linear, stream.get(), {frame->width, frame->height, 0, 0});
} else {
sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex->texture.linear, stream.get(), {frame->width, frame->height, 0, 0});
}
}

cudaTextureObject_t tex_obj(const tex_t &tex) const {
Expand All @@ -200,6 +207,11 @@ namespace cuda {
class cuda_ram_t: public cuda_t {
public:
int convert(platf::img_t &img) override {

//frame->data[2] not null on YUV444 conversion
if (frame->data[2]) {
return sws.load_ram(img, tex.array) || sws.convert_yuv444(frame->data[0], frame->data[1], frame->data[2], frame->linesize[0], tex_obj(tex), stream.get());
}
return sws.load_ram(img, tex.array) || sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(tex), stream.get());
}

Expand All @@ -224,6 +236,11 @@ namespace cuda {
class cuda_vram_t: public cuda_t {
public:
int convert(platf::img_t &img) override {

//frame->data[2] not null on YUV444 conversion
if (frame->data[2]) {
return sws.convert_yuv444(frame->data[0], frame->data[1], frame->data[2], frame->linesize[0], tex_obj(((img_t *) &img)->tex), stream.get());
}
return sws.convert(frame->data[0], frame->data[1], frame->linesize[0], frame->linesize[1], tex_obj(((img_t *) &img)->tex), stream.get());
}
};
Expand Down Expand Up @@ -345,18 +362,26 @@ namespace cuda {
auto hw_frames_ctx = (AVHWFramesContext *) hw_frames_ctx_buf->data;
sw_format = hw_frames_ctx->sw_format;

auto nv12_opt = egl::create_target(frame->width, frame->height, sw_format);
if (!nv12_opt) {
return -1;
}

auto sws_opt = egl::sws_t::make(width, height, frame->width, frame->height, sw_format);
if (!sws_opt) {
return -1;
}

this->sws = std::move(*sws_opt);
this->nv12 = std::move(*nv12_opt);

if (sw_format == AV_PIX_FMT_YUV444P) {
auto yuv444_opt = egl::create_yuv444_target(frame->width, frame->height, sw_format);
if (!yuv444_opt) {
return -1;
}
this->yuv444 = std::move(*yuv444_opt);
} else {
auto nv12_opt = egl::create_nv12_target(frame->width, frame->height, sw_format);
if (!nv12_opt) {
return -1;
}
this->nv12 = std::move(*nv12_opt);
}

auto cuda_ctx = (AVCUDADeviceContext *) hw_frames_ctx->device_ctx->hwctx;

Expand All @@ -367,9 +392,14 @@ namespace cuda {

cuda_ctx->stream = stream.get();

CU_CHECK(cdf->cuGraphicsGLRegisterImage(&y_res, nv12->tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register Y plane texture");
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&uv_res, nv12->tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register UV plane texture");

if (sw_format == AV_PIX_FMT_YUV444P) {
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&y_res,yuv444->tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register Y texture");
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&u_res,yuv444->tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register U texture");
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&v_res,yuv444->tex[2], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register V texture");
} else {
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&y_res, nv12->tex[0], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register Y plane texture");
CU_CHECK(cdf->cuGraphicsGLRegisterImage(&uv_res, nv12->tex[1], GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY), "Couldn't register UV plane texture");
}
return 0;
}

Expand Down Expand Up @@ -399,32 +429,62 @@ namespace cuda {
}

// Perform the color conversion and scaling in GL
sws.load_vram(descriptor, offset_x, offset_y, rgb->tex[0]);
sws.convert(nv12->buf);
if (sw_format == AV_PIX_FMT_YUV444P) {
sws.load_vram(descriptor, offset_x, offset_y, rgb->tex[0], true);
sws.convert_yuv444(yuv444->buf);
} else {
sws.load_vram(descriptor, offset_x, offset_y, rgb->tex[0], false);
sws.convert_nv12(nv12->buf);
}

auto fmt_desc = av_pix_fmt_desc_get(sw_format);

if (sw_format == AV_PIX_FMT_YUV444P) {

// Map the GL textures to read for CUDA
std::array<CUgraphicsResource, 3> resources = {{y_res.get(), u_res.get(), v_res.get()}};
CU_CHECK(cdf->cuGraphicsMapResources(resources.size(), resources.data(), stream.get()), "Couldn't map GL textures in CUDA");

// Copy from the GL textures to the target CUDA frame
for (int i = 0; i < 3; i++) {
CUDA_MEMCPY2D cpy = {};
cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY;
CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&cpy.srcArray, resources[i], 0, 0), "Couldn't get mapped plane array");

cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE;
cpy.dstDevice = (CUdeviceptr) frame->data[i];
cpy.dstPitch = frame->linesize[i];
cpy.WidthInBytes = (frame->width * fmt_desc->comp[i].step);
cpy.Height = frame->height;

CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame");
}
// Unmap the textures to allow modification from GL again
CU_CHECK(cdf->cuGraphicsUnmapResources(resources.size(), resources.data(), stream.get()), "Couldn't unmap GL textures from CUDA");

} else {
std::array<CUgraphicsResource, 2> resources = {{y_res.get(), uv_res.get()}};
CU_CHECK(cdf->cuGraphicsMapResources(resources.size(), resources.data(), stream.get()), "Couldn't map GL textures in CUDA");

// Copy from the GL textures to the target CUDA frame
for (int i = 0; i < 2; i++) {
CUDA_MEMCPY2D cpy = {};
cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY;
CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&cpy.srcArray, resources[i], 0, 0), "Couldn't get mapped plane array");

cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE;
cpy.dstDevice = (CUdeviceptr) frame->data[i];
cpy.dstPitch = frame->linesize[i];
cpy.WidthInBytes = (frame->width * fmt_desc->comp[i].step) >> (i ? fmt_desc->log2_chroma_w : 0);
cpy.Height = frame->height >> (i ? fmt_desc->log2_chroma_h : 0);

CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame");
}
// Unmap the textures to allow modification from GL again
CU_CHECK(cdf->cuGraphicsUnmapResources(resources.size(), resources.data(), stream.get()), "Couldn't unmap GL textures from CUDA");

// Map the GL textures to read for CUDA
CUgraphicsResource resources[2] = {y_res.get(), uv_res.get()};
CU_CHECK(cdf->cuGraphicsMapResources(2, resources, stream.get()), "Couldn't map GL textures in CUDA");

// Copy from the GL textures to the target CUDA frame
for (int i = 0; i < 2; i++) {
CUDA_MEMCPY2D cpy = {};
cpy.srcMemoryType = CU_MEMORYTYPE_ARRAY;
CU_CHECK(cdf->cuGraphicsSubResourceGetMappedArray(&cpy.srcArray, resources[i], 0, 0), "Couldn't get mapped plane array");

cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE;
cpy.dstDevice = (CUdeviceptr) frame->data[i];
cpy.dstPitch = frame->linesize[i];
cpy.WidthInBytes = (frame->width * fmt_desc->comp[i].step) >> (i ? fmt_desc->log2_chroma_w : 0);
cpy.Height = frame->height >> (i ? fmt_desc->log2_chroma_h : 0);

CU_CHECK_IGNORE(cdf->cuMemcpy2DAsync(&cpy, stream.get()), "Couldn't copy texture to CUDA frame");
}

// Unmap the textures to allow modification from GL again
CU_CHECK(cdf->cuGraphicsUnmapResources(2, resources, stream.get()), "Couldn't unmap GL textures from CUDA");

return 0;
}

Expand All @@ -446,6 +506,7 @@ namespace cuda {

egl::sws_t sws;
egl::nv12_t nv12;
egl::yuv444_t yuv444;
AVPixelFormat sw_format;

int height;
Expand All @@ -455,6 +516,8 @@ namespace cuda {
egl::rgb_t rgb;

registered_resource_t y_res;
registered_resource_t u_res;
registered_resource_t v_res;
registered_resource_t uv_res;

int offset_x;
Expand Down
Loading