diff --git a/README.md b/README.md index 1d529fd..5bf5b67 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,32 @@ Plugin akarin ============= +DLVFX +----- +`akarin.DLVFX(clip clip, int op[, float scale=1, float strength=0])` + +There are three operation modes: +- `op=0`: artefact reduction. `int strength` controls the strength. +- `op=1`: super resolution, `scale>1` controls the scale factor. `int strength` controls the enhancement strength. +- `op=2`: denoising. `float strength` controls the strength. (Not working.) + +Only 32-bit floating point RGB clips are supported for now. + +This filter requires appropriate [Video Effects library (v0.6 beta)](https://www.nvidia.com/en-us/geforce/broadcasting/broadcast-sdk/resources/) to be instaled. (This library is too large to be bundled with the plugin.) +This filter also requires RTX-capable Nvidia GPU to run. + +DLISR +----- + +`akarin.DLISR(clip clip, [, int scale=2])` + +This filter will use Nvidia [NGX Technology](https://developer.nvidia.com/rtx/ngx) DLISR DNN to scale up an input clip. +Input clip must be in `vs.RGBS` format. +The `scale` parameter can only be 2/4/8 and note that this filter uses considerable amount of GPU memory (e.g. 2GB for 2x scaling 1080p input) + +This filter requires `nvngx_dlisr.dll` to be present in the same directory as this plugin. +This filter requires RTX-capable Nvidia GPU to run. + Expr ---- diff --git a/ngx/README.md b/ngx/README.md new file mode 100644 index 0000000..ee982ea --- /dev/null +++ b/ngx/README.md @@ -0,0 +1,15 @@ +To build a standalone plugin: +``` +g++ -o akarin2.dll -shared -gdb ngx*.cc -I. -I ../include -static -DSTANDALONE_NGX +``` + +To use the plugin: +Rename the patched `nvngx_dlisr.dll` file as `akarin2.dlisr.dll`, and put in the +same directory as `akarin2.dll`: + +``` +core.std.LoadPlugin(r'/absolute/path/to/akarin2.dll') +c = core.std.BlankClip(format=vs.RGBS) # only support RGBS clips +res = core.akarin2.DLISR(c, scale=2) +res.set_output() +``` diff --git a/ngx/autodll.h b/ngx/autodll.h new file mode 100644 index 0000000..541b788 --- /dev/null +++ b/ngx/autodll.h @@ -0,0 +1,41 @@ +#ifndef AUTODLL_H +#define AUTODLL_H + +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif + +template +struct importDll { +#ifdef _WIN32 + importDll(T &x, const wchar_t *dllName, const char *displayname, std::vector &errors, const char *name) { + HMODULE h = LoadLibraryW(dllName); +#else + importDll(T &x, const char *dllName, const char *name, std::vector &errors) { + const char *displayname = dllName; + void *h = dlopen(dllName, RTLD_GLOBAL | RTLD_LAZY); +#endif + if (h == 0) { + errors.push_back(std::string("unable to load ") + displayname); + return; + } +#ifdef _WIN32 + x = reinterpret_cast(GetProcAddress(h, name)); +#else + x = reinterpret_cast(dlsym(h, name)); +#endif + if (x == nullptr) { + errors.push_back(std::string("unable to find ") + name + " in " + displayname); + } + } +}; + +#define EXT_FN(dll, retty, name, args) \ + static retty (*name) args = nullptr; \ + static importDll _load ## name(name, dll, #name) + +#endif diff --git a/ngx/cuda.h b/ngx/cuda.h new file mode 100644 index 0000000..a1f17e9 --- /dev/null +++ b/ngx/cuda.h @@ -0,0 +1,259 @@ +#ifndef CUDA_DLL +#error "CUDA_DLL not defined." +#endif + +#ifndef CUDA_H +#define CUDA_H + +#include "autodll.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void *CUdeviceptr; +typedef int CUdevice; /**< CUDA device */ +typedef struct CUctx_st *CUcontext; /**< CUDA context */ +typedef struct CUmod_st *CUmodule; /**< CUDA module */ +typedef struct CUfunc_st *CUfunction; /**< CUDA function */ +typedef struct CUstream_st *CUstream; /**< CUDA stream */ +typedef struct CUevent_st *CUevent; /**< CUDA event */ +typedef struct CUarray_st *CUarray; + +typedef enum { + CUDA_SUCCESS = 0, + CUDA_ERROR_INVALID_VALUE = 1, + CUDA_ERROR_OUT_OF_MEMORY = 2, + CUDA_ERROR_NOT_INITIALIZED = 3, + CUDA_ERROR_DEINITIALIZED = 4, + CUDA_ERROR_PROFILER_DISABLED = 5, + CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6, + CUDA_ERROR_PROFILER_ALREADY_STARTED = 7, + CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8, + CUDA_ERROR_NO_DEVICE = 100, + CUDA_ERROR_INVALID_DEVICE = 101, + CUDA_ERROR_INVALID_IMAGE = 200, + CUDA_ERROR_INVALID_CONTEXT = 201, + CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, + CUDA_ERROR_MAP_FAILED = 205, + CUDA_ERROR_UNMAP_FAILED = 206, + CUDA_ERROR_ARRAY_IS_MAPPED = 207, + CUDA_ERROR_ALREADY_MAPPED = 208, + CUDA_ERROR_NO_BINARY_FOR_GPU = 209, + CUDA_ERROR_ALREADY_ACQUIRED = 210, + CUDA_ERROR_NOT_MAPPED = 211, + CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, + CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, + CUDA_ERROR_ECC_UNCORRECTABLE = 214, + CUDA_ERROR_UNSUPPORTED_LIMIT = 215, + CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, + CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217, + CUDA_ERROR_INVALID_PTX = 218, + CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219, + CUDA_ERROR_NVLINK_UNCORRECTABLE = 220, + CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221, + CUDA_ERROR_INVALID_SOURCE = 300, + CUDA_ERROR_FILE_NOT_FOUND = 301, + CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, + CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, + CUDA_ERROR_OPERATING_SYSTEM = 304, + CUDA_ERROR_INVALID_HANDLE = 400, + CUDA_ERROR_NOT_FOUND = 500, + CUDA_ERROR_NOT_READY = 600, + CUDA_ERROR_ILLEGAL_ADDRESS = 700, + CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, + CUDA_ERROR_LAUNCH_TIMEOUT = 702, + CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, + CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, + CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705, + CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, + CUDA_ERROR_CONTEXT_IS_DESTROYED = 709, + CUDA_ERROR_ILLEGAL_INSTRUCTION = 715, + CUDA_ERROR_MISALIGNED_ADDRESS = 716, + CUDA_ERROR_INVALID_ADDRESS_SPACE = 717, + CUDA_ERROR_INVALID_PC = 718, + CUDA_ERROR_LAUNCH_FAILED = 719, + CUDA_ERROR_NOT_PERMITTED = 800, + CUDA_ERROR_NOT_SUPPORTED = 801, + CUDA_ERROR_UNKNOWN = 999 +} CUresult; + +typedef enum { + CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, /**< Maximum number of threads per block */ + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, /**< Maximum block dimension X */ + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, /**< Maximum block dimension Y */ + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, /**< Maximum block dimension Z */ + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, /**< Maximum grid dimension X */ + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, /**< Maximum grid dimension Y */ + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, /**< Maximum grid dimension Z */ + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, /**< Maximum shared memory available per block in bytes */ + CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */ + CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */ + CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, /**< Warp size in threads */ + CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, /**< Maximum pitch in bytes allowed by memory copies */ + CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, /**< Maximum number of 32-bit registers available per block */ + CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */ + CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, /**< Typical clock frequency in kilohertz */ + CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, /**< Alignment requirement for textures */ + CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, /**< Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. */ + CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, /**< Number of multiprocessors on device */ + CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, /**< Specifies whether there is a run time limit on kernels */ + CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, /**< Device is integrated with host memory */ + CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, /**< Device can map host memory into CUDA address space */ + CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, /**< Compute mode (See CUcomputemode for details) */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, /**< Maximum 1D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, /**< Maximum 2D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, /**< Maximum 2D texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, /**< Maximum 3D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, /**< Maximum 3D texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, /**< Maximum 3D texture depth */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, /**< Maximum 2D layered texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, /**< Maximum 2D layered texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, /**< Maximum layers in a 2D layered texture */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS */ + CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, /**< Alignment requirement for surfaces */ + CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, /**< Device can possibly execute multiple kernels concurrently */ + CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, /**< Device has ECC support enabled */ + CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, /**< PCI bus ID of the device */ + CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, /**< PCI device ID of the device */ + CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35, /**< Device is using TCC driver model */ + CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, /**< Peak memory clock frequency in kilohertz */ + CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, /**< Global memory bus width in bits */ + CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, /**< Size of L2 cache in bytes */ + CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, /**< Maximum resident threads per multiprocessor */ + CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, /**< Number of asynchronous engines */ + CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, /**< Device shares a unified address space with the host */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, /**< Maximum 1D layered texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, /**< Maximum layers in a 1D layered texture */ + CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44, /**< Deprecated, do not use. */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, /**< Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, /**< Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, /**< Alternate maximum 3D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48, /**< Alternate maximum 3D texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, /**< Alternate maximum 3D texture depth */ + CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50, /**< PCI domain ID of the device */ + CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51, /**< Pitch alignment requirement for textures */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, /**< Maximum cubemap texture width/height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, /**< Maximum cubemap layered texture width/height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, /**< Maximum layers in a cubemap layered texture */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55, /**< Maximum 1D surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56, /**< Maximum 2D surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57, /**< Maximum 2D surface height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58, /**< Maximum 3D surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59, /**< Maximum 3D surface height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60, /**< Maximum 3D surface depth */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, /**< Maximum 1D layered surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, /**< Maximum layers in a 1D layered surface */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, /**< Maximum 2D layered surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, /**< Maximum 2D layered surface height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, /**< Maximum layers in a 2D layered surface */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66, /**< Maximum cubemap surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, /**< Maximum cubemap layered surface width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, /**< Maximum layers in a cubemap layered surface */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, /**< Maximum 1D linear texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, /**< Maximum 2D linear texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, /**< Maximum 2D linear texture height */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72, /**< Maximum 2D linear texture pitch in bytes */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, /**< Maximum mipmapped 2D texture width */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74, /**< Maximum mipmapped 2D texture height */ + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75, /**< Major compute capability version number */ + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76, /**< Minor compute capability version number */ + CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, /**< Maximum mipmapped 1D texture width */ + CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78, /**< Device supports stream priorities */ + CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79, /**< Device supports caching globals in L1 */ + CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80, /**< Device supports caching locals in L1 */ + CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81, /**< Maximum shared memory available per multiprocessor in bytes */ + CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82, /**< Maximum number of 32-bit registers available per multiprocessor */ + CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83, /**< Device can allocate managed memory on this system */ + CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84, /**< Device is on a multi-GPU board */ + CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85, /**< Unique id for a group of devices on the same multi-GPU board */ + CU_DEVICE_ATTRIBUTE_MAX +} CUdevice_attribute; + +typedef enum CUmemorytype_enum { + CU_MEMORYTYPE_HOST = 0x01, + CU_MEMORYTYPE_DEVICE = 0x02, + CU_MEMORYTYPE_ARRAY = 0x03, + CU_MEMORYTYPE_UNIFIED = 0x04 +} CUmemorytype; + +typedef struct CUDA_MEMCPY3D_st { + size_t srcXInBytes; /**< Source X in bytes */ + size_t srcY; /**< Source Y */ + size_t srcZ; /**< Source Z */ + size_t srcLOD; /**< Source LOD */ + CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ + const void *srcHost; /**< Source host pointer */ + CUdeviceptr srcDevice; /**< Source device pointer */ + CUarray srcArray; /**< Source array reference */ + void *reserved0; /**< Must be NULL */ + size_t srcPitch; /**< Source pitch (ignored when src is array) */ + size_t srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */ + + size_t dstXInBytes; /**< Destination X in bytes */ + size_t dstY; /**< Destination Y */ + size_t dstZ; /**< Destination Z */ + size_t dstLOD; /**< Destination LOD */ + CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ + void *dstHost; /**< Destination host pointer */ + CUdeviceptr dstDevice; /**< Destination device pointer */ + CUarray dstArray; /**< Destination array reference */ + void *reserved1; /**< Must be NULL */ + size_t dstPitch; /**< Destination pitch (ignored when dst is array) */ + size_t dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */ + + size_t WidthInBytes; /**< Width of 3D memory copy in bytes */ + size_t Height; /**< Height of 3D memory copy */ + size_t Depth; /**< Depth of 3D memory copy */ +} CUDA_MEMCPY3D; + +typedef int CUjit_option; + +#ifndef CUDA_FN +#define CUDA_FN(ret, fn, args) EXT_FN(CUDA_DLL, ret, fn, args) +#endif +#ifndef CUDA_FN_OPTIONAL +#define CUDA_FN_OPTIONAL(ret, fn, args) +#endif +#ifndef CUDA_FN_3020 +#define CUDA_FN_3020(ret, fn, fn_3020, args) CUDA_FN(ret, fn_3020, args) +#endif +#ifndef CUDA_FN_4000 +#define CUDA_FN_4000(ret, fn, fn_4000, args) CUDA_FN(ret, fn_4000, args) +#endif + +CUDA_FN(CUresult, cuInit, (unsigned int Flags)); +CUDA_FN(CUresult, cuDeviceGetCount, (int *count)); +CUDA_FN(CUresult, cuDeviceGet, (CUdevice * device, int ordinal)); +CUDA_FN(CUresult, cuDeviceGetAttribute, (int *, CUdevice_attribute attrib, CUdevice dev)); +CUDA_FN(CUresult, cuDeviceGetName, (char *, int len, CUdevice dev)); +CUDA_FN(CUresult, cuDeviceTotalMem, (size_t *, CUdevice dev)); +CUDA_FN_3020(CUresult, cuCtxCreate, cuCtxCreate_v2, (CUcontext * pctx, unsigned int flags, CUdevice dev)); +CUDA_FN_4000(CUresult, cuCtxDestroy, cuCtxDestroy_v2, (CUcontext pctx)); +CUDA_FN(CUresult, cuCtxGetFlags, (unsigned int * flags)); +CUDA_FN(CUresult, cuCtxGetCurrent, (CUcontext * pctx)); +CUDA_FN(CUresult, cuCtxSetCurrent, (CUcontext ctx)); +CUDA_FN(CUresult, cuCtxPushCurrent, (CUcontext ctx)); +CUDA_FN(CUresult, cuCtxPopCurrent, (CUcontext * pctx)); +CUDA_FN(CUresult, cuProfilerStop, ()); +CUDA_FN(CUresult, cuCtxGetApiVersion, (CUcontext ctx, unsigned int *version)); +CUDA_FN(CUresult, cuCtxGetDevice, (CUdevice *)); +CUDA_FN(CUresult, cuModuleLoadData, (CUmodule * module, const void *image)); +CUDA_FN(CUresult, cuModuleLoadDataEx, (CUmodule * module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues)); +CUDA_FN(CUresult, cuModuleUnload, (CUmodule module)); +CUDA_FN(CUresult, cuModuleGetFunction, (CUfunction * hfunc, CUmodule hmod, const char *name)); +CUDA_FN_3020(CUresult, cuMemAlloc, cuMemAlloc_v2, (CUdeviceptr * dptr, size_t bytesize)); +CUDA_FN_3020(CUresult, cuMemFree, cuMemFree_v2, (CUdeviceptr dptr)); +CUDA_FN_3020(CUresult, cuMemcpyHtoD, cuMemcpyHtoD_v2, (CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount)); +CUDA_FN_3020(CUresult, cuMemcpyDtoH, cuMemcpyDtoH_v2, (void *dstHost, CUdeviceptr srcDevice, size_t ByteCount)); +CUDA_FN_3020(CUresult, cuMemcpyDtoD, cuMemcpyDtoD_v2, (CUdeviceptr dstHost, CUdeviceptr srcDevice, size_t ByteCount)); + +CUDA_FN(CUresult, cuMemsetD8Async, (CUdeviceptr devPtr, int value, size_t count, CUstream st)); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/ngx/internalfilters.h b/ngx/internalfilters.h new file mode 100644 index 0000000..db8a3a4 --- /dev/null +++ b/ngx/internalfilters.h @@ -0,0 +1,8 @@ +#ifndef NGX_INTERNALFILTERS_H +#define NGX_INTERNALFILTERS_H + +#include "VapourSynth.h" + +void VS_CC ngxInitialize(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin); + +#endif // INTERNALFILTERS_H diff --git a/ngx/ngx.cc b/ngx/ngx.cc new file mode 100644 index 0000000..86e6461 --- /dev/null +++ b/ngx/ngx.cc @@ -0,0 +1,280 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "VapourSynth.h" +#include "VSHelper.h" + +#ifndef _WIN32 +#error "Unsupported platform" +#else +static std::vector autoDllErrors; +static const wchar_t *dllPath(const wchar_t *suffix); +#define NGX_DLL dllPath(L".dlisr.dll"),".dlisr.dll",autoDllErrors +#define CUDA_DLL L"nvcuda.dll","nvcuda.dll",autoDllErrors +#endif +#include "cuda.h" +#include "ngx.h" + +#define CK_NGX(x) do { \ + int r = (x); \ + if (r != NVSDK_NGX_Result_Success) { \ + fprintf(stderr, "failed NGX call %s: %x\n", #x, r); \ + abort(); \ + } \ +} while (0) +#define CK_CUDA(x) do { \ + int r = (x); \ + if (r != CUDA_SUCCESS) { \ + fprintf(stderr, "failed cuda call %s: %d\n", #x, r); \ + abort(); \ + } \ +} while (0) + +static const wchar_t *dllPath(const wchar_t *suffix) { + static const std::wstring res = [suffix]() -> std::wstring { + HMODULE mod = 0; + if (GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (char *)dllPath, &mod)) { + std::vector buf; + size_t n = 0; + do { + buf.resize(buf.size() + MAX_PATH); + n = GetModuleFileNameW(mod, buf.data(), buf.size()); + } while (n >= buf.size()); + buf.resize(n); + std::wstring path(buf.begin(), buf.end() - 4); + path += suffix; + return path; + } + throw std::runtime_error("unable to locate myself"); + }(); + return res.c_str(); +} + +static void *cudaMalloc(size_t size) { + void *ptr = nullptr; + CK_CUDA(cuMemAlloc_v2(&ptr, size)); + return ptr; +} + +struct NgxData { + std::mutex lock; + + VSNodeRef *node; + VSVideoInfo vi; + int scale; + + typedef float T; + uint64_t pixel_size() const { return 3 * sizeof(T); } + uint64_t in_image_width() const { return vi.width / scale; } + uint64_t out_image_width() const { return vi.width; } + uint64_t in_image_height() const { return vi.height / scale; } + uint64_t out_image_height() const { return vi.height; } + uint64_t in_image_row_bytes() const { return pixel_size() * in_image_width(); } + uint64_t out_image_row_bytes() const { return pixel_size() * out_image_width(); } + uint64_t in_size() const { return in_image_height() * in_image_row_bytes(); } + uint64_t out_size() const { return out_image_height() * out_image_row_bytes(); } + + NVSDK_NGX_Parameter *param; + NVSDK_NGX_Handle *DUHandle; + CUcontext ctx; + + std::vector in_host, out_host; + CUdeviceptr inp, outp; + void allocate() { + in_host.resize(in_size()); + out_host.resize(out_size()); + inp = cudaMalloc(in_size()); + outp = cudaMalloc(out_size()); + } + + NgxData() : node(nullptr), vi(), scale(0), param(nullptr), DUHandle(nullptr), ctx(nullptr), inp(nullptr), outp(nullptr) {} + ~NgxData() { + if (ctx) { + CK_CUDA(cuCtxPushCurrent(ctx)); + if (inp) CK_CUDA(cuMemFree_v2(inp)); + if (outp) CK_CUDA(cuMemFree_v2(outp)); + if (DUHandle) CK_NGX(NVSDK_NGX_CUDA_ReleaseFeature(DUHandle)); + cuCtxPopCurrent(nullptr); + } + } +}; + +static void VS_CC ngxInit(VSMap *in, VSMap *out, void **instanceData, VSNode *node, VSCore *core, const VSAPI *vsapi) { + NgxData *d = static_cast(*instanceData); + vsapi->setVideoInfo(&d->vi, 1, node); +} + +static const VSFrameRef *VS_CC ngxGetFrame(int n, int activationReason, void **instanceData, void **frameData, VSFrameContext *frameCtx, VSCore *core, const VSAPI *vsapi) { + NgxData *d = static_cast(*instanceData); + + if (activationReason == arInitial) { + vsapi->requestFrameFilter(n, d->node, frameCtx); + } else if (activationReason == arAllFramesReady) { + const VSFrameRef *src = vsapi->getFrameFilter(n, d->node, frameCtx); + + const VSFormat *fi = d->vi.format; + assert(vsapi->getFrameHeight(src, 0) == (int)d->in_image_height()); + assert(vsapi->getFrameWidth(src, 0) == (int)d->in_image_width()); + int planes[3] = { 0, 1, 2 }; + const VSFrameRef *srcf[3] = { nullptr, nullptr, nullptr }; + VSFrameRef *dst = vsapi->newVideoFrame2(fi, d->out_image_width(), d->out_image_height(), srcf, planes, src, core); + + // The NGX API is not thread safe. + std::lock_guard lock(d->lock); + CK_CUDA(cuCtxPushCurrent(d->ctx)); + + auto params = d->param; + params->Set(NVSDK_NGX_Parameter_Width, (uint64_t)d->in_image_width()); + params->Set(NVSDK_NGX_Parameter_Height, (uint64_t)d->in_image_height()); + params->Set(NVSDK_NGX_Parameter_Scale, d->scale); + + // Create the feature + //CK_NGX(NVSDK_NGX_CUDA_CreateFeature(NVSDK_NGX_Feature_ImageSuperResolution, params, &d->DUHandle)); + + void *in_image_dev_ptr = d->inp; + void *out_image_dev_ptr = d->outp;; + + uint8_t *host = d->in_host.data(); + typedef float T; + const T factor = 255.0f; + for (int plane = 0; plane < 3; plane++) { + const size_t stride = vsapi->getStride(src, plane); + const uint8_t *ptr = (uint8_t*)vsapi->getReadPtr(src, plane); + for (size_t i = 0; i < d->in_image_height(); i++) + for (size_t j = 0; j < d->in_image_width(); j++) + *(T*)&host[i * d->in_image_row_bytes() + j * d->pixel_size() + plane * sizeof(T)] = *(T*)&ptr[i * stride + j * sizeof(T)] * factor; + } + CK_CUDA(cuMemcpyHtoD_v2(in_image_dev_ptr, host, d->in_size())); + + // Pass the pointers to the GPU allocations to the + // parameter block along with the format and size. + params->Set(NVSDK_NGX_Parameter_Color_SizeInBytes, d->in_size()); + params->Set(NVSDK_NGX_Parameter_Color_Format, NVSDK_NGX_Buffer_Format_RGB32F); + params->Set(NVSDK_NGX_Parameter_Color, in_image_dev_ptr); + params->Set(NVSDK_NGX_Parameter_Output_SizeInBytes, d->out_size()); + params->Set(NVSDK_NGX_Parameter_Output_Format, NVSDK_NGX_Buffer_Format_RGB32F); + params->Set(NVSDK_NGX_Parameter_Output, out_image_dev_ptr); + + // Execute the feature. + CK_NGX(NVSDK_NGX_CUDA_EvaluateFeature(d->DUHandle, params, nullptr)); + + host = d->out_host.data(); + CK_CUDA(cuMemcpyDtoH_v2(host, out_image_dev_ptr, d->out_size())); + for (int plane = 0; plane < 3; plane++) { + const size_t stride = vsapi->getStride(dst, plane); + uint8_t *ptr = (uint8_t*)vsapi->getWritePtr(dst, plane); + for (size_t i = 0; i < d->out_image_height(); i++) + for (size_t j = 0; j < d->out_image_width(); j++) + *(T*)&ptr[i * stride + j * sizeof(T)] = *(T*)&host[i * d->out_image_row_bytes() + j * d->pixel_size() + plane * sizeof(T)] / factor; + } + + cuCtxPopCurrent(nullptr); + + vsapi->freeFrame(src); + return dst; + } + + return nullptr; +} + +static void VS_CC ngxFree(void *instanceData, VSCore *core, const VSAPI *vsapi) { + NgxData *d = static_cast(instanceData); + vsapi->freeNode(d->node); + + delete d; +} + +static void VS_CC ngxCreate(const VSMap *in, VSMap *out, void *userData, VSCore *core, const VSAPI *vsapi) { + std::unique_ptr d(new NgxData); + int err; + + try { + if (autoDllErrors.size() > 0) { + std::string error, last; + for (const auto &s: autoDllErrors) { + if (error.size()) { + if (last != s) + error += "; " + s; + } else + error = s; + last = s; + } + throw std::runtime_error(error); + } + + d->node = vsapi->propGetNode(in, "clip", 0, &err); + d->vi = *vsapi->getVideoInfo(d->node); + + if (!isConstantFormat(&d->vi)) { + throw std::runtime_error("Only clips with constant format and dimensions allowed"); + } + if (d->vi.format->numPlanes != 3 || d->vi.format->colorFamily != cmRGB) + throw std::runtime_error("input clip must be RGB format"); + if (d->vi.format->sampleType != stFloat || d->vi.format->bitsPerSample != 32) + throw std::runtime_error("input clip must be 32-bit float format"); + + int scale = int64ToIntS(vsapi->propGetInt(in, "scale", 0, &err)); + if (err) scale = 2; + if (scale != 2 && scale != 4 && scale != 8) + throw std::runtime_error("scale must be 2/4/8"); + d->scale = scale; + } catch (std::runtime_error &e) { + if (d->node) + vsapi->freeNode(d->node); + vsapi->setError(out, (std::string{ "DLISR: " } + e.what()).c_str()); + return; + } + + d->vi.width *= d->scale; + d->vi.height *= d->scale; + + static bool inited = []() -> bool { + CUcontext ctx; + bool hasCtx = cuCtxGetCurrent(&ctx) == CUDA_SUCCESS; + CK_NGX(NVSDK_NGX_CUDA_Init(0, L"./", NVSDK_NGX_Version_API)); + // We don't expect NVSDK_NGX_CUDA_Init to create a context, but if it did, we need to + // switch to save a global CUDA context, instead of the pre-filter context. + if (!hasCtx && cuCtxGetCurrent(&ctx) == CUDA_SUCCESS) { + fprintf(stderr, "invariant violated: NVSDK_NGX_CUDA_Init created CUDA context: %p\n", ctx); + abort(); + } + return true; + }(); + (void) inited; + NV_new_Parameter(&d->param); + + d->param->Set(NVSDK_NGX_Parameter_Width, d->in_image_width()); + d->param->Set(NVSDK_NGX_Parameter_Height, d->in_image_height()); + d->param->Set(NVSDK_NGX_Parameter_Scale, d->scale); + + // Get the scratch buffer size and create the scratch allocation. + size_t byteSize{ 0u }; + CK_NGX(NVSDK_NGX_CUDA_GetScratchBufferSize(NVSDK_NGX_Feature_ImageSuperResolution, d->param, &byteSize)); + if (byteSize != 0) // should request none. + abort(); + + // Create the feature + CK_NGX(NVSDK_NGX_CUDA_CreateFeature(NVSDK_NGX_Feature_ImageSuperResolution, d->param, &d->DUHandle)); + CK_CUDA(cuCtxGetCurrent(&d->ctx)); + d->allocate(); + CK_CUDA(cuCtxPopCurrent(nullptr)); + + vsapi->createFilter(in, out, "DLISR", ngxInit, ngxGetFrame, ngxFree, fmParallel, 0, d.release(), core); +} + +////////////////////////////////////////// +// Init + +#ifndef STANDALONE_NGX +void VS_CC ngxInitialize(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin) { +#else +extern "C" void VS_CC VapourSynthPluginInit(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin) { + configFunc("info.akarin.plugin", "akarin2", "Experimental Nvidia DLISR plugin", VAPOURSYNTH_API_VERSION, 1, plugin); +#endif + registerFunc("DLISR", "clip:clip;scale:int:opt", ngxCreate, nullptr, plugin); +} diff --git a/ngx/ngx.h b/ngx/ngx.h new file mode 100644 index 0000000..9e1b23c --- /dev/null +++ b/ngx/ngx.h @@ -0,0 +1,228 @@ +#if !defined(NGX_DLL) && !defined(NGX_NO_FUNCS) +#error "NGX_DLL not defined." +#endif + +#ifndef NGX_H +#define NGX_H + +#include + +struct ID3D12Resource; +struct ID3D11Resource; + +enum NVSDK_NGX_Version : int { + NVSDK_NGX_Version_API = 0x12, +}; + +enum NVSDK_NGX_Result : uint32_t { + NVSDK_NGX_Result_Success = 0x1, + NVSDK_NGX_Result_Fail = 0xBAD00000, + NVSDK_NGX_Result_FAIL_FeatureNotSupported = 0xBAD00001, + NVSDK_NGX_Result_FAIL_PlatformError = 0xBAD00002, + NVSDK_NGX_Result_FAIL_FeatureAlreadyExists = 0xBAD00003, + NVSDK_NGX_Result_FAIL_FeatureNotFound = 0xBAD00004, + NVSDK_NGX_Result_FAIL_InvalidParameter = 0xBAD00005, + NVSDK_NGX_Result_FAIL_ScratchBufferTooSmall = 0xBAD00006, + NVSDK_NGX_Result_FAIL_NotInitialized = 0xBAD00007, + NVSDK_NGX_Result_FAIL_UnsupportedInputFormat = 0xBAD00008, + NVSDK_NGX_Result_FAIL_RWFlagMissing = 0xBAD00009, + NVSDK_NGX_Result_FAIL_MissingInput = 0xBAD0000A, + NVSDK_NGX_Result_FAIL_UnableToInitializeFeature = 0xBAD0000B, + NVSDK_NGX_Result_FAIL_OutOfDate = 0xBAD0000C, + NVSDK_NGX_Result_FAIL_OutOfGPUMemory = 0xBAD0000D, + NVSDK_NGX_Result_FAIL_UnsupportedFormat = 0xBAD0000E, + NVSDK_NGX_Result_FAIL_UnableToWriteToAppDataPath = 0xBAD0000F, + NVSDK_NGX_Result_FAIL_UnsupportedParameter = 0xBAD00010, +}; + +enum NVSDK_NGX_Buffer_Format : int { + NVSDK_NGX_Buffer_Format_Unknown = 0x0, + NVSDK_NGX_Buffer_Format_RGB8UI = 0x1, + NVSDK_NGX_Buffer_Format_RGB16F = 0x2, + NVSDK_NGX_Buffer_Format_RGB32F = 0x3, + NVSDK_NGX_Buffer_Format_RGBA8UI = 0x4, + NVSDK_NGX_Buffer_Format_RGBA16F = 0x5, + NVSDK_NGX_Buffer_Format_RGBA32F = 0x6, +}; + +enum NVSDK_NGX_Feature : int { + NVSDK_NGX_Feature_Reserved0 = 0x0, + NVSDK_NGX_Feature_SuperSampling = 0x1, + NVSDK_NGX_Feature_InPainting = 0x2, + NVSDK_NGX_Feature_ImageSuperResolution = 0x3, + NVSDK_NGX_Feature_SlowMotion = 0x4, + NVSDK_NGX_Feature_VideoSuperResolution = 0x5, + NVSDK_NGX_Feature_Reserved6 = 0x6, + NVSDK_NGX_Feature_Reserved7 = 0x7, + NVSDK_NGX_Feature_Reserved8 = 0x8, + NVSDK_NGX_Feature_ImageSignalProcessing = 0x9, + NVSDK_NGX_Feature_Count = 0xA, +}; + +struct NVSDK_NGX_Parameter { + virtual void Set(const char *, void *) = 0; + virtual void Set(const char *, struct ID3D12Resource *) = 0; + virtual void Set(const char *, struct ID3D11Resource *) = 0; + virtual void Set(const char *, int) = 0; + virtual void Set(const char *, unsigned int) = 0; + virtual void Set(const char *, long double) = 0; + virtual void Set(const char *, float) = 0; + virtual void Set(const char *, uint64_t) = 0; + virtual NVSDK_NGX_Result Get(const char *, void **) = 0; + virtual NVSDK_NGX_Result Get(const char *, struct ID3D12Resource **) = 0; + virtual NVSDK_NGX_Result Get(const char *, struct ID3D11Resource **) = 0; + virtual NVSDK_NGX_Result Get(const char *, int *) = 0; + virtual NVSDK_NGX_Result Get(const char *, unsigned int *) = 0; + virtual NVSDK_NGX_Result Get(const char *, long double *) = 0; + virtual NVSDK_NGX_Result Get(const char *, float *) = 0; + virtual NVSDK_NGX_Result Get(const char *, uint64_t *) = 0; + virtual void Reset() = 0; +}; + +void NV_new_Parameter(NVSDK_NGX_Parameter **p); + +struct NVSDK_NGX_Handle { + NVSDK_NGX_Feature Id; +}; + +// Parameters +#define NVSDK_NGX_Parameter_ImageSuperResolution_Available "ImageSuperResolution.Available" +#define NVSDK_NGX_Parameter_Width "Width" +#define NVSDK_NGX_Parameter_Height "Height" +#define NVSDK_NGX_Parameter_Scale "Scale" +#define NVSDK_NGX_Parameter_Scratch "Scratch" +#define NVSDK_NGX_Parameter_Scratch_SizeInBytes "Scratch.SizeInBytes" +#define NVSDK_NGX_Parameter_Color_SizeInBytes "Color.SizeInBytes" +#define NVSDK_NGX_Parameter_Color_Format "Color.Format" +#define NVSDK_NGX_Parameter_Color "Color" +#define NVSDK_NGX_Parameter_Output_SizeInBytes "Output.SizeInBytes" +#define NVSDK_NGX_Parameter_Output_Format "Output.Format" +#define NVSDK_NGX_Parameter_Output "Output" + +#ifndef NGX_NO_FUNCS +#include "autodll.h" + +// NVSDK_NGX_Init +// ------------------------------------- +// +// InApplicationId: +// Unique Id provided by NVIDIA +// +// InApplicationDataPath: +// Folder to store logs and other temporary files (write access required) +// +// InDevice: [d3d11/12 only] +// DirectX device to use +// +// DESCRIPTION: +// Initializes new SDK instance. +// +EXT_FN(NGX_DLL, NVSDK_NGX_Result, NVSDK_NGX_CUDA_Init, (unsigned long long InApplicationId, const wchar_t *InApplicationDataPath, NVSDK_NGX_Version InSDKVersion/* = NVSDK_NGX_Version_API*/)); + +// NVSDK_NGX_GetScratchBufferSize +// ---------------------------------------------------------- +// +// InFeatureId: +// AI feature in question +// +// InParameters: +// Parameters used by the feature to help estimate scratch buffer size +// +// OutSizeInBytes: +// Number of bytes needed for the scratch buffer for the specified feature. +// +// DESCRIPTION: +// SDK needs a buffer of a certain size provided by the client in +// order to initialize AI feature. Once feature is no longer +// needed buffer can be released. It is safe to reuse the same +// scratch buffer for different features as long as minimum size +// requirement is met for all features. Please note that some +// features might not need a scratch buffer so return size of 0 +// is completely valid. +// +EXT_FN(NGX_DLL, NVSDK_NGX_Result, NVSDK_NGX_CUDA_GetScratchBufferSize, (NVSDK_NGX_Feature InFeatureId, const NVSDK_NGX_Parameter *InParameters, size_t *OutSizeInBytes)); + +// NVSDK_NGX_CreateFeature +// ------------------------------------- +// +// InCmdList:[d3d12 only] +// Command list to use to execute GPU commands. Must be: +// - Open and recording +// - With node mask including the device provided in NVSDK_NGX_D3D12_Init +// - Execute on non-copy command queue. +// InDevCtx: [d3d11 only] +// Device context to use to execute GPU commands +// +// InFeatureID: +// AI feature to initialize +// +// InParameters: +// List of parameters +// +// OutHandle: +// Handle which uniquely identifies the feature. If feature with +// provided parameters already exists the "already exists" error code is returned. +// +// DESCRIPTION: +// Each feature needs to be created before it can be used. +// Refer to the sample code to find out which input parameters +// are needed to create specific feature. +// +EXT_FN(NGX_DLL, NVSDK_NGX_Result, NVSDK_NGX_CUDA_CreateFeature, (NVSDK_NGX_Feature InFeatureID, const NVSDK_NGX_Parameter *InParameters, NVSDK_NGX_Handle **OutHandle)); + +// NVSDK_NGX_EvaluateFeature +// ------------------------------------- +// +// InCmdList:[d3d12 only] +// Command list to use to execute GPU commands. Must be: +// - Open and recording +// - With node mask including the device provided in NVSDK_NGX_D3D12_Init +// - Execute on non-copy command queue. +// InDevCtx: [d3d11 only] +// Device context to use to execute GPU commands +// +// InFeatureHandle: +// Handle representing feature to be evaluated +// +// InParameters: +// List of parameters required to evaluate feature +// +// InCallback: +// Optional callback for features which might take longer +// to execute. If specified SDK will call it with progress +// values in range 0.0f - 1.0f. Client application can indicate +// that evaluation should be cancelled by setting OutShouldCancel +// to true. +// +// DESCRIPTION: +// Evaluates given feature using the provided parameters and +// pre-trained NN. Please note that for most features +// it can be beneficial to pass as many input buffers and parameters +// as possible (for example provide all render targets like color, albedo, normals, depth etc) +// +typedef void (*PFN_NVSDK_NGX_ProgressCallback)(float InCurrentProgress, bool &OutShouldCancel); +EXT_FN(NGX_DLL, NVSDK_NGX_Result, NVSDK_NGX_CUDA_EvaluateFeature, (const NVSDK_NGX_Handle *InFeatureHandle, const NVSDK_NGX_Parameter *InParameters, PFN_NVSDK_NGX_ProgressCallback InCallback /* = nullptr */)); + +// NVSDK_NGX_Release +// ------------------------------------- +// +// InHandle: +// Handle to feature to be released +// +// DESCRIPTION: +// Releases feature with a given handle. +// Handles are not reference counted so +// after this call it is invalid to use provided handle. +// +EXT_FN(NGX_DLL, NVSDK_NGX_Result, NVSDK_NGX_CUDA_ReleaseFeature, (NVSDK_NGX_Handle *InHandle)); + +// NVSDK_NGX_Shutdown +// ------------------------------------- +// +// DESCRIPTION: +// Shuts down the current SDK instance and releases all resources. +// +EXT_FN(NGX_DLL, NVSDK_NGX_Result, NVSDK_NGX_CUDA_Shutdown, ()); +#endif // !defined(NGX_NO_FUNCS) + +#endif // defined(NGX_H) diff --git a/ngx/ngximpl.cc b/ngx/ngximpl.cc new file mode 100644 index 0000000..62d3395 --- /dev/null +++ b/ngx/ngximpl.cc @@ -0,0 +1,78 @@ +#include +#include +#include + +#include + +#define NGX_NO_FUNCS +#include "ngx.h" + +struct NVSDK_NGX_Parameter_Impl: public NVSDK_NGX_Parameter { + union u { + void *p; + long double d; + uint64_t u64; + + u() : u64(0) {} + u(void *p) : p(p) {} + u(int i) : u64((int64_t)i) {} + u(unsigned int ui) : u64(ui) {} + u(float f) : d(f) {} + u(long double d) : d(d) {} + u(uint64_t u64) : u64(u64) {} + operator void *() const { return p; } + operator int() const { return (int)u64; } + operator unsigned int() const { return u64; } + operator float() const { return d; } + operator long double() const { return d; } + operator uint64_t() const { return u64; } + }; + std::map map; + template NVSDK_NGX_Result get(const char *name, T* p) { + auto it = map.find(name); + if (it != map.end()) { + *p = it->second; + return NVSDK_NGX_Result_Success; + } + return NVSDK_NGX_Result_Fail; + } + void Set(const char *, void *) override; + void Set(const char *, struct ID3D12Resource *) override; + void Set(const char *, struct ID3D11Resource *) override; + void Set(const char *, int) override; + void Set(const char *, unsigned int) override; + void Set(const char *, long double) override; + void Set(const char *, float) override; + void Set(const char *, uint64_t) override; + NVSDK_NGX_Result Get(const char *, void **) override; + NVSDK_NGX_Result Get(const char *, struct ID3D12Resource **) override; + NVSDK_NGX_Result Get(const char *, struct ID3D11Resource **) override; + NVSDK_NGX_Result Get(const char *, int *) override; + NVSDK_NGX_Result Get(const char *, unsigned int *) override; + NVSDK_NGX_Result Get(const char *, long double *) override; + NVSDK_NGX_Result Get(const char *, float *) override; + NVSDK_NGX_Result Get(const char *, uint64_t *) override; + void Reset() override; +}; + +void NVSDK_NGX_Parameter_Impl::Set(const char *name, void *p) { map.insert({ name, u(p) }); } +void NVSDK_NGX_Parameter_Impl::Set(const char *name, struct ID3D12Resource *p) { map.insert({ name, u((void*)p) }); } +void NVSDK_NGX_Parameter_Impl::Set(const char *name, struct ID3D11Resource *p) { map.insert({ name, u((void*)p) }); } +void NVSDK_NGX_Parameter_Impl::Set(const char *name, int i) { map.insert({ name, u(i) }); } +void NVSDK_NGX_Parameter_Impl::Set(const char *name, unsigned int ui) { map.insert({ name, u(ui) }); } +void NVSDK_NGX_Parameter_Impl::Set(const char *name, long double d) { map.insert({ name, u(d) }); } +void NVSDK_NGX_Parameter_Impl::Set(const char *name, float f) { map.insert({ name, u(f) }); } +void NVSDK_NGX_Parameter_Impl::Set(const char *name, uint64_t u64) { map.insert({ name, u(u64) }); } +NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, void **p) { return get(name, p); } +NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, struct ID3D12Resource **p) { return get(name, (void**)p); } +NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, struct ID3D11Resource **p) { return get(name, (void**)p); } +NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, int *p) { return get(name, p); } +NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, unsigned int *p) { return get(name, p); } +NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, long double *p) { return get(name, p); } +NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, float *p) { return get(name, p); } +NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, uint64_t *p) { return get(name, p); } +void NVSDK_NGX_Parameter_Impl::Reset() { map.clear(); } + +void NV_new_Parameter(NVSDK_NGX_Parameter **p) { + *p = new NVSDK_NGX_Parameter_Impl(); +} diff --git a/plugin.cpp b/plugin.cpp index fe7aa1f..fb40fd4 100644 --- a/plugin.cpp +++ b/plugin.cpp @@ -1,8 +1,12 @@ #include "VapourSynth.h" #include "expr/internalfilters.h" +#include "ngx/internalfilters.h" +#include "vfx/internalfilters.h" VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin) { configFunc("info.akarin.vsplugin", "akarin", "Akarin's Experimental Filters", VAPOURSYNTH_API_VERSION, 1, plugin); exprInitialize(configFunc, registerFunc, plugin); + ngxInitialize(configFunc, registerFunc, plugin); + vfxInitialize(configFunc, registerFunc, plugin); } diff --git a/vfx/README.md b/vfx/README.md new file mode 100644 index 0000000..3d7bae7 --- /dev/null +++ b/vfx/README.md @@ -0,0 +1,35 @@ +First install appropriate Video Effects library (v0.6 beta) from https://www.nvidia.com/en-us/geforce/broadcasting/broadcast-sdk/resources/. +Make sure your environment is setup correctly by downloading +[opencv_world346.dll](https://github.com/NVIDIA/MAXINE-VFX-SDK/blob/master/samples/external/opencv/bin/opencv_world346.dll) and +[VideoEffectsApp.exe](https://github.com/NVIDIA/MAXINE-VFX-SDK/blob/master/samples/VideoEffectsApp/VideoEffectsApp.exe). +Play with VideoEffectsApp.exe to make sure it works before proceeding. + +And then build the plugin like this with mingw: +``` +g++ -DSTANDALONE_VFX -o akarin2.dll -shared -static vfx.cc -I ../include nvvfx/src/*.cpp -I nvvfx/include -Wall -O2 +``` + +Example code: +```python +import os, os.path +import vapoursynth as vs +core = vs.core +import mvsfunc as mvf + +core.std.LoadPlugin(os.path.abspath(os.path.join(os.getcwd(), 'akarin2.dll'))) + +c = core.imwri.Read('input.png') +c = mvf.Depth(c, 32) # only supports vs.RGBS formats + +# OP_AR: Artefact reduction, OP_SUPERRES: super resolution, OP_DENOISE: denoise. +# strength: 0 for weak effect (weaker enhancement), 1 for strong effect (enhancement). +# scale = 2/3/4 for super resolution, otherwise unused. +OP_AR, OP_SUPERRES, OP_DENOISE = range(3) +d = core.akarin2.DLVFX(c, op=OP_SUPERRES, scale=2, strength=0) + +d = core.imwri.Write(d, 'PNG', 'out-%d.png') + +d.set_output() +``` + +This plugin is provided as is, and I haven't been able to test it locally. diff --git a/vfx/internalfilters.h b/vfx/internalfilters.h new file mode 100644 index 0000000..1407114 --- /dev/null +++ b/vfx/internalfilters.h @@ -0,0 +1,8 @@ +#ifndef VFX_INTERNALFILTERS_H +#define VFX_INTERNALFILTERS_H + +#include "VapourSynth.h" + +void VS_CC vfxInitialize(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin); + +#endif // INTERNALFILTERS_H diff --git a/vfx/nvvfx/NVOSSLicense.txt b/vfx/nvvfx/NVOSSLicense.txt new file mode 100644 index 0000000..ce0ca00 --- /dev/null +++ b/vfx/nvvfx/NVOSSLicense.txt @@ -0,0 +1,25 @@ +The contents of this folder are governed by the MIT license + +Copyright (C) 2019, NVIDIA Corporation, all rights reserved. + + MIT License + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/vfx/nvvfx/include/nvCVImage.h b/vfx/nvvfx/include/nvCVImage.h new file mode 100644 index 0000000..c5ed9ed --- /dev/null +++ b/vfx/nvvfx/include/nvCVImage.h @@ -0,0 +1,674 @@ +/*############################################################################### +# +# Copyright 2020-2021 NVIDIA Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +###############################################################################*/ + +#ifndef __NVCVIMAGE_H__ +#define __NVCVIMAGE_H__ + +#include "nvCVStatus.h" + +#ifdef __cplusplus +extern "C" { +#endif // ___cplusplus + + +#ifndef RTX_CAMERA_IMAGE // Compile with -DRTX_CAMERA_IMAGE=0 to get more functionality and bug fixes. + #define RTX_CAMERA_IMAGE 0 // Set to 1 for RTXCamera, which needs an old version, that avoids new functionality +#endif // RTX_CAMERA_IMAGE + + +struct CUstream_st; // typedef struct CUstream_st *CUstream; + +//! The format of pixels in an image. +typedef enum NvCVImage_PixelFormat { + NVCV_FORMAT_UNKNOWN = 0, //!< Unknown pixel format. + NVCV_Y = 1, //!< Luminance (gray). + NVCV_A = 2, //!< Alpha (opacity) + NVCV_YA = 3, //!< { Luminance, Alpha } + NVCV_RGB = 4, //!< { Red, Green, Blue } + NVCV_BGR = 5, //!< { Red, Green, Blue } + NVCV_RGBA = 6, //!< { Red, Green, Blue, Alpha } + NVCV_BGRA = 7, //!< { Red, Green, Blue, Alpha } +#if RTX_CAMERA_IMAGE + NVCV_YUV420 = 8, //!< Luminance and subsampled Chrominance { Y, Cb, Cr } + NVCV_YUV422 = 9, //!< Luminance and subsampled Chrominance { Y, Cb, Cr } +#else // !RTX_CAMERA_IMAGE + NVCV_ARGB = 8, //!< { Red, Green, Blue, Alpha } + NVCV_ABGR = 9, //!< { Red, Green, Blue, Alpha } + NVCV_YUV420 = 10, //!< Luminance and subsampled Chrominance { Y, Cb, Cr } + NVCV_YUV422 = 11, //!< Luminance and subsampled Chrominance { Y, Cb, Cr } +#endif // !RTX_CAMERA_IMAGE + NVCV_YUV444 = 12, //!< Luminance and full bandwidth Chrominance { Y, Cb, Cr } +} NvCVImage_PixelFormat; + + +//! The data type used to represent each component of an image. +typedef enum NvCVImage_ComponentType { + NVCV_TYPE_UNKNOWN = 0, //!< Unknown type of component. + NVCV_U8 = 1, //!< Unsigned 8-bit integer. + NVCV_U16 = 2, //!< Unsigned 16-bit integer. + NVCV_S16 = 3, //!< Signed 16-bit integer. + NVCV_F16 = 4, //!< 16-bit floating-point. + NVCV_U32 = 5, //!< Unsigned 32-bit integer. + NVCV_S32 = 6, //!< Signed 32-bit integer. + NVCV_F32 = 7, //!< 32-bit floating-point (float). + NVCV_U64 = 8, //!< Unsigned 64-bit integer. + NVCV_S64 = 9, //!< Signed 64-bit integer. + NVCV_F64 = 10, //!< 64-bit floating-point (double). +} NvCVImage_ComponentType; + + +//! Value for the planar field or layout argument. Two values are currently accommodated for RGB: +//! Interleaved or chunky storage locates all components of a pixel adjacent in memory, +//! e.g. RGBRGBRGB... (denoted [RGB]). +//! Planar storage locates the same component of all pixels adjacent in memory, +//! e.g. RRRRR...GGGGG...BBBBB... (denoted [R][G][B]) +//! YUV has many more variants. +//! 4:2:2 can be chunky, planar or semi-planar, with different orderings. +//! 4:2:0 can be planar or semi-planar, with different orderings. +//! Aliases are provided for FOURCCs defined at fourcc.org. +//! Note: the LSB can be used to distinguish between chunky and planar formats. +#define NVCV_INTERLEAVED 0 //!< All components of pixel(x,y) are adjacent (same as chunky) (default for non-YUV). +#define NVCV_CHUNKY 0 //!< All components of pixel(x,y) are adjacent (same as interleaved). +#define NVCV_PLANAR 1 //!< The same component of all pixels are adjacent. +#define NVCV_UYVY 2 //!< [UYVY] Chunky 4:2:2 (default for 4:2:2) +#define NVCV_VYUY 4 //!< [VYUY] Chunky 4:2:2 +#define NVCV_YUYV 6 //!< [YUYV] Chunky 4:2:2 +#define NVCV_YVYU 8 //!< [YVYU] Chunky 4:2:2 +#define NVCV_CYUV 10 //!< [YUV] Chunky 4:4:4 +#define NVCV_CYVU 12 //!< [YVU] Chunky 4:4:4 +#define NVCV_YUV 3 //!< [Y][U][V] Planar 4:2:2 or 4:2:0 or 4:4:4 +#define NVCV_YVU 5 //!< [Y][V][U] Planar 4:2:2 or 4:2:0 or 4:4:4 +#define NVCV_YCUV 7 //!< [Y][UV] Semi-planar 4:2:2 or 4:2:0 (default for 4:2:0) +#define NVCV_YCVU 9 //!< [Y][VU] Semi-planar 4:2:2 or 4:2:0 + +//! The following are FOURCC aliases for specific layouts. Note that it is still required to specify the format as well +//! as the layout, e.g. NVCV_YUV420 and NVCV_NV12, even though the NV12 layout is only associated with YUV420 sampling. +#define NVCV_I420 NVCV_YUV //!< [Y][U][V] Planar 4:2:0 +#define NVCV_IYUV NVCV_YUV //!< [Y][U][V] Planar 4:2:0 +#define NVCV_YV12 NVCV_YVU //!< [Y][V][U] Planar 4:2:0 +#define NVCV_NV12 NVCV_YCUV //!< [Y][UV] Semi-planar 4:2:0 (default for 4:2:0) +#define NVCV_NV21 NVCV_YCVU //!< [Y][VU] Semi-planar 4:2:0 +#define NVCV_YUY2 NVCV_YUYV //!< [YUYV] Chunky 4:2:2 +#define NVCV_I444 NVCV_YUV //!< [Y][U][V] Planar 4:4:4 +#define NVCV_YM24 NVCV_YUV //!< [Y][U][V] Planar 4:4:4 +#define NVCV_YM42 NVCV_YVU //!< [Y][V][U] Planar 4:4:4 +#define NVCV_NV24 NVCV_YCUV //!< [Y][UV] Semi-planar 4:4:4 +#define NVCV_NV42 NVCV_YCVU //!< [Y][VU] Semi-planar 4:4:4 + +//! The following are ORed together for the colorspace field for YUV. +//! NVCV_601 and NVCV_709 describe the color axes of YUV. +//! NVCV_VIDEO_RANGE and NVCV_VIDEO_RANGE describe the range, [16, 235] or [0, 255], respectively. +//! NVCV_CHROMA_COSITED and NVCV_CHROMA_INTSTITIAL describe the location of the chroma samples. +#define NVCV_601 0x00 //!< The Rec.601 YUV colorspace, typically used for SD. +#define NVCV_709 0x01 //!< The Rec.709 YUV colorspace, typically used for HD. +#define NVCV_2020 0x02 //!< The Rec.2020 YUV colorspace. +#define NVCV_VIDEO_RANGE 0x00 //!< The video range is [16, 235]. +#define NVCV_FULL_RANGE 0x04 //!< The video range is [ 0, 255]. +#define NVCV_CHROMA_COSITED 0x00 //!< The chroma is sampled at the same location as the luma samples horizontally. +#define NVCV_CHROMA_INTSTITIAL 0x08 //!< The chroma is sampled between luma samples horizontally. +#define NVCV_CHROMA_TOPLEFT 0x10 //!< The chroma is sampled at the same location as the luma samples horizontally and vertically. +#define NVCV_CHROMA_MPEG2 NVCV_CHROMA_COSITED //!< As is most video. +#define NVCV_CHROMA_MPEG1 NVCV_CHROMA_INTSTITIAL +#define NVCV_CHROMA_JPEG NVCV_CHROMA_INTSTITIAL +#define NVCV_CHROMA_H261 NVCV_CHROMA_INTSTITIAL +#define NVCV_CHROMA_INTERSTITIAL NVCV_CHROMA_INTSTITIAL //!< Correct spelling + +//! This is the value for the gpuMem field or the memSpace argument. +#define NVCV_CPU 0 //!< The buffer is stored in CPU memory. +#define NVCV_GPU 1 //!< The buffer is stored in CUDA memory. +#define NVCV_CUDA 1 //!< The buffer is stored in CUDA memory. +#define NVCV_CPU_PINNED 2 //!< The buffer is stored in pinned CPU memory. +#define NVCV_CUDA_ARRAY 3 //!< A CUDA array is used for storage. + +//! Image descriptor. +typedef struct +#ifdef _MSC_VER +__declspec(dllexport) +#endif // _MSC_VER +NvCVImage { + unsigned int width; //!< The number of pixels horizontally in the image. + unsigned int height; //!< The number of pixels vertically in the image. + signed int pitch; //!< The byte stride between pixels vertically. + NvCVImage_PixelFormat pixelFormat; //!< The format of the pixels in the image. + NvCVImage_ComponentType componentType; //!< The data type used to represent each component of the image. + unsigned char pixelBytes; //!< The number of bytes in a chunky pixel. + unsigned char componentBytes; //!< The number of bytes in each pixel component. + unsigned char numComponents; //!< The number of components in each pixel. + unsigned char planar; //!< NVCV_CHUNKY, NVCV_PLANAR, NVCV_UYVY, .... + unsigned char gpuMem; //!< NVCV_CPU, NVCV_CPU_PINNED, NVCV_CUDA, NVCV_GPU + unsigned char colorspace; //!< An OR of colorspace, range and chroma phase. + unsigned char reserved[2]; //!< For structure padding and future expansion. Set to 0. + void *pixels; //!< Pointer to pixel(0,0) in the image. + void *deletePtr; //!< Buffer memory to be deleted (can be NULL). + void (*deleteProc)(void *p); //!< Delete procedure to call rather than free(). + unsigned long long bufferBytes; //!< The maximum amount of memory available through pixels. + + +#ifdef __cplusplus + + //! Default constructor: fill with 0. + inline NvCVImage(); + + //! Allocation constructor. + //! \param[in] width the number of pixels horizontally. + //! \param[in] height the number of pixels vertically. + //! \param[in] format the format of the pixels. + //! \param[in] type the type of each pixel component. + //! \param[in] layout One of { NVCV_CHUNKY, NVCV_PLANAR } or one of the YUV layouts. + //! \param[in] memSpace One of { NVCV_CPU, NVCV_CPU_PINNED, NVCV_GPU, NVCV_CUDA } + //! \param[in] alignment row byte alignment. Choose 0 or a power of 2. + //! 1: yields no gap whatsoever between scanlines; + //! 0: default alignment: 4 on CPU, and cudaMallocPitch's choice on GPU. + //! Other common values are 16 or 32 for cache line size. + inline NvCVImage(unsigned width, unsigned height, NvCVImage_PixelFormat format, NvCVImage_ComponentType type, + unsigned layout = NVCV_CHUNKY, unsigned memSpace = NVCV_CPU, unsigned alignment = 0); + + //! Subimage constructor. + //! \param[in] fullImg the full image, from which this subImage view is to be created. + //! \param[in] x the left edge of the subImage, in reference to the full image. + //! \param[in] y the top edge of the subImage, in reference to the full image. + //! \param[in] width the width of the subImage, in pixels. + //! \param[in] height the height of the subImage, in pixels. + //! \bug This does not work for planar or semi-planar formats, neither RGB nor YUV. + //! \note This does work for all chunky formats, including UYVY, VYUY, YUYV, YVYU. + inline NvCVImage(NvCVImage *fullImg, int x, int y, unsigned width, unsigned height); + + //! Destructor + inline ~NvCVImage(); + + //! Copy a rectangular subimage. This works for CPU->CPU, CPU->GPU, GPU->GPU, and GPU->CPU. + //! \param[in] src The source image from which to copy. + //! \param[in] srcX The left coordinate of the src rectangle. + //! \param[in] srcY The top coordinate of the src rectangle. + //! \param[in] dstX The left coordinate of the dst rectangle. + //! \param[in] dstY The top coordinate of the dst rectangle. + //! \param[in] width The width of the rectangle to be copied, in pixels. + //! \param[in] height The height of the rectangle to be copied, in pixels. + //! \note NvCVImage_Transfer() can handle more cases. + //! \return NVCV_SUCCESS if successful + //! \return NVCV_ERR_MISMATCH if the formats are different + //! \return NVCV_ERR_CUDA if a CUDA error occurred + //! \return NVCV_ERR_PIXELFORMAT if the pixel format is not yet accommodated. + inline NvCV_Status copyFrom(const NvCVImage *src, int srcX, int srcY, int dstX, int dstY, unsigned width, unsigned height); + + //! Copy from one image to another. This works for CPU->CPU, CPU->GPU, GPU->GPU, and GPU->CPU. + //! \param[in] src The source image from which to copy. + //! \note NvCVImage_Transfer() can handle more cases. + //! \return NVCV_SUCCESS if successful + //! \return NVCV_ERR_MISMATCH if the formats are different + //! \return NVCV_ERR_CUDA if a CUDA error occurred + //! \return NVCV_ERR_PIXELFORMAT if the pixel format is not yet accommodated. + inline NvCV_Status copyFrom(const NvCVImage *src); + +#endif // ___cplusplus +} NvCVImage; + + +//! Integer rectangle. +typedef struct NvCVRect2i { + int x; //!< The left edge of the rectangle. + int y; //!< The top edge of the rectangle. + int width; //!< The width of the rectangle. + int height; //!< The height of the rectangle. +} NvCVRect2i; + + +//! Integer point. +typedef struct NvCVPoint2i { + int x; //!< The horizontal coordinate. + int y; //!< The vertical coordinate +} NvCVPoint2i; + + +//! Initialize an image. The C++ constructors can initialize this appropriately. +//! This is called by the C++ constructor, but C code should call this explicitly. +//! \param[in,out] im the image to initialize. +//! \param[in] width the desired width of the image, in pixels. +//! \param[in] height the desired height of the image, in pixels. +//! \param[in] pitch the byte stride between pixels vertically. +//! \param[in] pixels a pointer to the pixel buffer. +//! \param[in] format the format of the pixels. +//! \param[in] type the type of the components of the pixels. +//! \param[in] layout One of { NVCV_CHUNKY, NVCV_PLANAR } or one of the YUV layouts. +//! \param[in] memSpace Location of the buffer: one of { NVCV_CPU, NVCV_CPU_PINNED, NVCV_GPU, NVCV_CUDA } +//! \return NVCV_SUCCESS if successful +//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not yet accommodated. +NvCV_Status NvCV_API NvCVImage_Init(NvCVImage *im, unsigned width, unsigned height, int pitch, void *pixels, + NvCVImage_PixelFormat format, NvCVImage_ComponentType type, unsigned layout, unsigned memSpace); + + +//! Initialize a view into a subset of an existing image. +//! No memory is allocated -- the fullImg buffer is used. +//! \param[in] subImg the sub-image view into the existing full image. +//! \param[in] fullImg the existing full image. +//! \param[in] x the left edge of the sub-image, as coordinate of the full image. +//! \param[in] y the top edge of the sub-image, as coordinate of the full image. +//! \param[in] width the desired width of the subImage, in pixels. +//! \param[in] height the desired height of the subImage, in pixels. +//! \bug This does not work in general for planar or semi-planar formats, neither RGB nor YUV. +//! However, it does work for all formats with the full image, to make a shallow copy, e.g. +//! NvCVImage_InitView(&subImg, &fullImg, 0, 0, fullImage.width, fullImage.height). +//! Cropping a planar or semi-planar image can be accomplished with NvCVImage_TransferRect(). +//! \note This does work for all chunky formats, including UYVY, VYUY, YUYV, YVYU. +//! \sa { NvCVImage_TransferRect } +void NvCV_API NvCVImage_InitView(NvCVImage *subImg, NvCVImage *fullImg, int x, int y, unsigned width, unsigned height); + + +//! Allocate memory for, and initialize an image. This assumes that the image data structure has nothing meaningful in it. +//! This is called by the C++ constructor, but C code can call this to allocate an image. +//! \param[in,out] im the image to initialize. +//! \param[in] width the desired width of the image, in pixels. +//! \param[in] height the desired height of the image, in pixels. +//! \param[in] format the format of the pixels. +//! \param[in] type the type of the components of the pixels. +//! \param[in] layout One of { NVCV_CHUNKY, NVCV_PLANAR } or one of the YUV layouts. +//! \param[in] memSpace Location of the buffer: one of { NVCV_CPU, NVCV_CPU_PINNED, NVCV_GPU, NVCV_CUDA } +//! \param[in] alignment row byte alignment. Choose 0 or a power of 2. +//! 1: yields no gap whatsoever between scanlines; +//! 0: default alignment: 4 on CPU, and cudaMallocPitch's choice on GPU. +//! Other common values are 16 or 32 for cache line size. +//! \return NVCV_SUCCESS if the operation was successful. +//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not accommodated. +//! \return NVCV_ERR_MEMORY if there is not enough memory to allocate the buffer. +NvCV_Status NvCV_API NvCVImage_Alloc(NvCVImage *im, unsigned width, unsigned height, NvCVImage_PixelFormat format, + NvCVImage_ComponentType type, unsigned layout, unsigned memSpace, unsigned alignment); + + +//! Reallocate memory for, and initialize an image. This assumes that the image is valid. +//! It will check bufferBytes to see if enough memory is already available, and will reshape rather than realloc if true. +//! Otherwise, it will free the previous buffer and reallocate a new one. +//! \param[in,out] im the image to initialize. +//! \param[in] width the desired width of the image, in pixels. +//! \param[in] height the desired height of the image, in pixels. +//! \param[in] format the format of the pixels. +//! \param[in] type the type of the components of the pixels. +//! \param[in] layout One of { NVCV_CHUNKY, NVCV_PLANAR } or one of the YUV layouts. +//! \param[in] memSpace Location of the buffer: one of { NVCV_CPU, NVCV_CPU_PINNED, NVCV_GPU, NVCV_CUDA } +//! \param[in] alignment row byte alignment. Choose 0 or a power of 2. +//! 1: yields no gap whatsoever between scanlines; +//! 0: default alignment: 4 on CPU, and cudaMallocPitch's choice on GPU. +//! Other common values are 16 or 32 for cache line size. +//! \return NVCV_SUCCESS if the operation was successful. +//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not accommodated. +//! \return NVCV_ERR_MEMORY if there is not enough memory to allocate the buffer. +NvCV_Status NvCV_API NvCVImage_Realloc(NvCVImage *im, unsigned width, unsigned height, NvCVImage_PixelFormat format, + NvCVImage_ComponentType type, unsigned layout, unsigned memSpace, unsigned alignment); + + +//! Deallocate the image buffer from the image. The image is not deallocated. +//! param[in,out] im the image whose buffer is to be deallocated. +void NvCV_API NvCVImage_Dealloc(NvCVImage *im); + + +//! Allocate a new image, with storage (C-style constructor). +//! \param[in] width the desired width of the image, in pixels. +//! \param[in] height the desired height of the image, in pixels. +//! \param[in] format the format of the pixels. +//! \param[in] type the type of the components of the pixels. +//! \param[in] layout One of { NVCV_CHUNKY, NVCV_PLANAR } or one of the YUV layouts. +//! \param[in] memSpace Location of the buffer: one of { NVCV_CPU, NVCV_CPU_PINNED, NVCV_GPU, NVCV_CUDA } +//! \param[in] alignment row byte alignment. Choose 0 or a power of 2. +//! 1: yields no gap whatsoever between scanlines; +//! 0: default alignment: 4 on CPU, and cudaMallocPitch's choice on GPU. +//! Other common values are 16 or 32 for cache line size. +//! \param[out] *out will be a pointer to the new image if successful; otherwise NULL. +//! \return NVCV_SUCCESS if the operation was successful. +//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not accommodated. +//! \return NVCV_ERR_MEMORY if there is not enough memory to allocate the buffer. +NvCV_Status NvCV_API NvCVImage_Create(unsigned width, unsigned height, NvCVImage_PixelFormat format, + NvCVImage_ComponentType type, unsigned layout, unsigned memSpace, unsigned alignment, NvCVImage **out); + + +//! Deallocate the image allocated with NvCVImage_Create() (C-style destructor). +void NvCV_API NvCVImage_Destroy(NvCVImage *im); + + +//! Get offsets for the components of a pixel format. +//! These are not byte offsets, but component offsets. +//! \param[in] format the pixel format to be interrogated. +//! \param[out] rOff a place to store the offset for the red channel (can be NULL). +//! \param[out] gOff a place to store the offset for the green channel (can be NULL). +//! \param[out] bOff a place to store the offset for the blue channel (can be NULL). +//! \param[out] aOff a place to store the offset for the alpha channel (can be NULL). +//! \param[out] yOff a place to store the offset for the luminance channel (can be NULL). +void NvCV_API NvCVImage_ComponentOffsets(NvCVImage_PixelFormat format, int *rOff, int *gOff, int *bOff, int *aOff, int *yOff); + + +//! Transfer one image to another, with a limited set of conversions. +//! +//! If any of the images resides on the GPU, it may run asynchronously, +//! so cudaStreamSynchronize() should be called if it is necessary to run synchronously. +//! The following table indicates (with X) the currently-implemented conversions: +//! +-------------------+-------------+-------------+-------------+-------------+ +//! | | u8 --> u8 | u8 --> f32 | f32 --> u8 | f32 --> f32 | +//! +-------------------+-------------+-------------+-------------+-------------+ +//! | Y --> Y | X | | X | X | +//! | Y --> A | X | | X | X | +//! | Y --> RGB | X | X | X | X | +//! | Y --> RGBA | X | X | X | X | +//! | A --> Y | X | | X | X | +//! | A --> A | X | | X | X | +//! | A --> RGB | X | X | X | X | +//! | A --> RGBA | X | | | | +//! | RGB --> Y | X | X | | | +//! | RGB --> A | X | X | | | +//! | RGB --> RGB | X | X | X | X | +//! | RGB --> RGBA | X | X | X | X | +//! | RGBA --> Y | X | X | | | +//! | RGBA --> A | | X | | | +//! | RGBA --> RGB | X | X | X | X | +//! | RGBA --> RGBA | X | X | X | X | +//! | RGB --> YUV420 | X | | X | | +//! | RGBA --> YUV420 | X | | X | | +//! | RGB --> YUV422 | X | | X | | +//! | RGBA --> YUV422 | X | | X | | +//! | RGB --> YUV444 | X | | X | | +//! | RGBA --> YUV444 | X | | X | | +//! | YUV420 --> RGB | X | X | | | +//! | YUV420 --> RGBA | X | X | | | +//! | YUV422 --> RGB | X | X | | | +//! | YUV422 --> RGBA | X | X | | | +//! | YUV444 --> RGB | X | X | | | +//! | YUV444 --> RGBA | X | X | | | +//! +-------------------+-------------+-------------+-------------+-------------+ +//! where +//! * Either source or destination can be CHUNKY or PLANAR. +//! * Either source or destination can reside on the CPU or the GPU. +//! * The RGB components are in any order (i.e. RGB or BGR; RGBA or BGRA). +//! * For RGBA (or BGRA) destinations, most implementations do not change the alpha channel, so it is recommended to +//! set it at initialization time with [cuda]memset(im.pixels, -1, im.pitch * im.height) or +//! [cuda]memset(im.pixels, -1, im.pitch * im.height * im.numComponents) for chunky and planar images respectively. +//! * YUV requires that the colorspace field be set manually prior to Transfer, e.g. typical for layout=NVCV_NV12: +//! image.colorspace = NVCV_709 | NVCV_VIDEO_RANGE | NVCV_CHROMA_INTSTITIAL; +//! * There are also RGBf16-->RGBf32 and RGBf32-->RGBf16 transfers. +//! * Additionally, when the src and dst formats are the same, all formats are accommodated on CPU and GPU, +//! and this can be used as a replacement for cudaMemcpy2DAsync() (which it utilizes). This is also true for YUV, +//! whose src and dst must share the same format, layout and colorspace. +//! +//! When there is some kind of conversion AND the src and dst reside on different processors (CPU, GPU), +//! it is necessary to have a temporary GPU buffer, which is reshaped as needed to match the characteristics +//! of the CPU image. The same temporary image can be used in subsequent calls to NvCVImage_Transfer(), +//! regardless of the shape, format or component type, as it will grow as needed to accommodate +//! the largest memory requirement. The recommended usage for most cases is to supply an empty image +//! as the temporary; if it is not needed, no buffer is allocated. NULL can be supplied as the tmp +//! image, in which case an ephemeral buffer is allocated if needed, with resultant +//! performance degradation for image sequences. +//! +//! \param[in] src the source image. +//! \param[out] dst the destination image. +//! \param[in] scale a scale factor that can be applied when one (but not both) of the images +//! is based on floating-point components; this parameter is ignored when all image components +//! are represented with integer data types, or all image components are represented with +//! floating-point data types. +//! \param[in] stream the stream on which to perform the copy. This is ignored if both images reside on the CPU. +//! \param[in,out] tmp a temporary buffer that is sometimes needed when transferring images +//! between the CPU and GPU in either direction (can be empty or NULL). +//! It has the same characteristics as the CPU image, but it resides on the GPU. +//! \return NVCV_SUCCESS if successful, +//! NVCV_ERR_PIXELFORMAT if one of the pixel formats is not accommodated. +//! NVCV_ERR_CUDA if a CUDA error has occurred. +//! NVCV_ERR_GENERAL if an otherwise unspecified error has occurred. +NvCV_Status NvCV_API NvCVImage_Transfer( + const NvCVImage *src, NvCVImage *dst, float scale, struct CUstream_st *stream, NvCVImage *tmp); + + +//! Transfer a rectangular portion of an image. +//! See NvCVImage_Transfer() for the pixel format combinations that are implemented. +//! \param[in] src the source image. +//! \param[in] srcRect the subRect of the src to be transferred (NULL implies the whole image). +//! \param[out] dst the destination image. +//! \param[in] dstPt location to which the srcRect is to be copied (NULL implies (0,0)). +//! \param[in] scale scale factor applied to the magnitude during transfer, typically 1, 255 or 1/255. +//! \param[in] stream the CUDA stream. +//! \param[in] tmp a staging image. +//! \return NVCV_SUCCESS if the operation was completed successfully. +//! \note The actual transfer region may be smaller, because the rects are clipped against the images. +NvCV_Status NvCV_API NvCVImage_TransferRect( + const NvCVImage *src, const NvCVRect2i *srcRect, NvCVImage *dst, const NvCVPoint2i *dstPt, + float scale, struct CUstream_st *stream, NvCVImage *tmp); + + +//! Transfer from a YUV image. +//! YUVu8 --> RGBu8 and YUVu8 --> RGBf32 are currently available. +//! \param[in] y pointer to pixel(0,0) of the luminance channel. +//! \param[in] yPixBytes the byte stride between y pixels horizontally. +//! \param[in] yPitch the byte stride between y pixels vertically. +//! \param[in] u pointer to pixel(0,0) of the u (Cb) chrominance channel. +//! \param[in] v pointer to pixel(0,0) of the v (Cr) chrominance channel. +//! \param[in] uvPixBytes the byte stride between u or v pixels horizontally. +//! \param[in] uvPitch the byte stride between u or v pixels vertically. +//! \param[in] yuvColorSpace the yuv colorspace, specifying range, chromaticities, and chrominance phase. +//! \param[in] yuvMemSpace the memory space where the pixel buffers reside. +//! \param[out] dst the destination image. +//! \param[in] dstRect the destination rectangle (NULL implies the whole image). +//! \param[in] scale scale factor applied to the magnitude during transfer, typically 1, 255 or 1/255. +//! \param[in] stream the CUDA stream. +//! \param[in] tmp a staging image. +//! \return NVCV_SUCCESS if the operation was completed successfully. +//! \note The actual transfer region may be smaller, because the rects are clipped against the images. +NvCV_Status NvCV_API NvCVImage_TransferFromYUV( + const void *y, int yPixBytes, int yPitch, + const void *u, const void *v, int uvPixBytes, int uvPitch, + NvCVImage_PixelFormat yuvFormat, NvCVImage_ComponentType yuvType, + unsigned yuvColorSpace, unsigned yuvMemSpace, + NvCVImage *dst, const NvCVRect2i *dstRect, float scale, struct CUstream_st *stream, NvCVImage *tmp); + + +//! Transfer to a YUV image. +//! RGBu8 --> YUVu8 and RGBf32 --> YUVu8 are currently available. +//! \param[in] src the source image. +//! \param[in] srcRect the destination rectangle (NULL implies the whole image). +//! \param[out] y pointer to pixel(0,0) of the luminance channel. +//! \param[in] yPixBytes the byte stride between y pixels horizontally. +//! \param[in] yPitch the byte stride between y pixels vertically. +//! \param[out] u pointer to pixel(0,0) of the u (Cb) chrominance channel. +//! \param[out] v pointer to pixel(0,0) of the v (Cr) chrominance channel. +//! \param[in] uvPixBytes the byte stride between u or v pixels horizontally. +//! \param[in] uvPitch the byte stride between u or v pixels vertically. +//! \param[in] yuvColorSpace the yuv colorspace, specifying range, chromaticities, and chrominance phase. +//! \param[in] yuvMemSpace the memory space where the pixel buffers reside. +//! \param[in] scale scale factor applied to the magnitude during transfer, typically 1, 255 or 1/255. +//! \param[in] stream the CUDA stream. +//! \param[in] tmp a staging image. +//! \return NVCV_SUCCESS if the operation was completed successfully. +//! \note The actual transfer region may be smaller, because the rects are clipped against the images. +NvCV_Status NvCV_API NvCVImage_TransferToYUV( + const NvCVImage *src, const NvCVRect2i *srcRect, + const void *y, int yPixBytes, int yPitch, + const void *u, const void *v, int uvPixBytes, int uvPitch, + NvCVImage_PixelFormat yuvFormat, NvCVImage_ComponentType yuvType, + unsigned yuvColorSpace, unsigned yuvMemSpace, + float scale, struct CUstream_st *stream, NvCVImage *tmp); + + +//! Between rendering by a graphics system and Transfer by CUDA, it is necessary to map the texture resource. +//! There is a fair amount of overhead, so its use should be minimized. +//! Every call to NvCVImage_MapResource() should be matched by a subsequent call to NvCVImage_UnmapResource(). +//! \param[in,out] im the image to be mapped. +//! \param[in] stream the stream on which the mapping is to be performed. +//! \return NVCV_SUCCESS is the operation was completed successfully. +NvCV_Status NvCV_API NvCVImage_MapResource(NvCVImage *im, struct CUstream_st *stream); + + +//! After transfer by CUDA, the texture resource must be unmapped in order to be used by the graphics system again. +//! There is a fair amount of overhead, so its use should be minimized. +//! Every call to NvCVImage_UnmapResource() should correspond to a preceding call to NvCVImage_MapResource(). +//! \param[in,out] im the image to be mapped. +//! \param[in] stream the CUDA stream on which the mapping is to be performed. +//! \return NVCV_SUCCESS is the operation was completed successfully. +NvCV_Status NvCV_API NvCVImage_UnmapResource(NvCVImage *im, struct CUstream_st *stream); + + +//! Composite one source image over another using the given matte. +//! This accommodates all RGB and RGBA formats, with u8 and f32 components. +//! \param[in] fg the foreground source image. +//! \param[in] bg the background source image. +//! \param[in] mat the matte Yu8 (or Au8) image, indicating where the src should come through. +//! \param[out] dst the destination image. This can be the same as fg or bg. +//! \param[in] stream the CUDA stream on which the composition is to be performed. +//! \return NVCV_SUCCESS if the operation was successful. +//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not accommodated. +//! \return NVCV_ERR_MISMATCH if either the fg & bg & dst formats do not match, or if fg & bg & dst & mat are not +//! in the same address space (CPU or GPU). +#if RTX_CAMERA_IMAGE == 0 +NvCV_Status NvCV_API NvCVImage_Composite(const NvCVImage *fg, const NvCVImage *bg, const NvCVImage *mat, NvCVImage *dst, + struct CUstream_st *stream); +#else // RTX_CAMERA_IMAGE == 1 // No GPU acceleration +NvCV_Status NvCV_API NvCVImage_Composite(const NvCVImage *fg, const NvCVImage *bg, const NvCVImage *mat, NvCVImage *dst); +#endif // RTX_CAMERA_IMAGE == 1 + +//! Composite one source image over another using the given matte. +//! Not all pixel format combinations are accommodated. +//! \param[in] fg the foreground source image. +//! \param[in] fgOrg the upper-left corner of the fg image to be composited (NULL implies (0,0)). +//! \param[in] bg the background source image. +//! \param[in] bgOrg the upper-left corner of the bg image to be composited (NULL implies (0,0)). +//! \param[in] mat the matte image, indicating where the src should come through. +//! This determines the size of the rectangle to be composited. +//! If this is multi-channel, the alpha channel is used as the matte. +//! \param[in] mode the composition mode. Only 0 (straight alpha over) is implemented at this time. +//! \param[out] dst the destination image. This can be the same as fg or bg. +//! \param[in] dstOrg the upper-left corner of the dst image to be updated (NULL implies (0,0)). +//! \param[in] stream the CUDA stream on which the composition is to be performed. +//! \note If a smaller region of a matte is desired, a window can be created using +//! NvCVImage_InitView() for chunky or NvCVImage_Init() for planar pixels. +//! \return NVCV_SUCCESS if the operation was successful. +//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not accommodated. +//! \return NVCV_ERR_MISMATCH if either the fg & bg & dst formats do not match, or if fg & bg & dst & mat are not +//! in the same address space (CPU or GPU). +//! \bug Though RGBA destinations are accommodated, the A channel is not updated at all. +//! \todo Accommodate premultiplied alpha, either as a flag in NvCVImage or as a different mode. +//! \todo If the destination has an A channel, update it as per Adobe and Pixar. +NvCV_Status NvCV_API NvCVImage_CompositeRect( + const NvCVImage *fg, const NvCVPoint2i *fgOrg, + const NvCVImage *bg, const NvCVPoint2i *bgOrg, + const NvCVImage *mat, unsigned mode, + NvCVImage *dst, const NvCVPoint2i *dstOrg, + struct CUstream_st *stream); + +//! Composite a BGRu8 source image over a constant color field using the given matte. +//! \param[in] src the source BGRu8 (or RGBu8) image. +//! \param[in] mat the matte Yu8 (or Au8) image, indicating where the src should come through. +//! \param[in] bgColor the desired flat background color, with the same component ordering as the src and dst. +//! \param[in,out] dst the destination BGRu8 (or RGBu8) image. May be the same as src. +//! \return NVCV_SUCCESS if the operation was successful. +//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not accommodated. +//! \bug This is only implemented for 3-component u8 src and dst, and 1-component mat, +//! where all images are resident on the CPU. +NvCV_Status NvCV_API NvCVImage_CompositeOverConstant( + const NvCVImage *src, const NvCVImage *mat, const unsigned char bgColor[3], NvCVImage *dst); + + +//! Flip the image vertically. +//! No actual pixels are moved: it is just an accounting procedure. +//! This is extremely low overhead, but requires appropriate interpretation of the pitch. +//! Flipping twice yields the original orientation. +//! \param[in] src the source image (NULL implies src == dst). +//! \param[out] dst the flipped image (can be the same as the src). +//! \return NVCV_SUCCESS if successful. +//! \return NVCV_ERR_PIXELFORMAT for most planar formats. +//! \bug This does not work for planar or semi-planar formats, neither RGB nor YUV. +//! \note This does work for all chunky formats, including UYVY, VYUY, YUYV, YVYU. +NvCV_Status NvCV_API NvCVImage_FlipY(const NvCVImage *src, NvCVImage *dst); + + +//! Get the pointers for YUV, based on the format. +//! \param[in] im The image to be deconstructed. +//! \param[out] y A place to store the pointer to y(0,0). +//! \param[out] u A place to store the pointer to u(0,0). +//! \param[out] v A place to store the pointer to v(0,0). +//! \param[out] yPixBytes A place to store the byte stride between luma samples horizontally. +//! \param[out] cPixBytes A place to store the byte stride between chroma samples horizontally. +//! \param[out] yRowBytes A place to store the byte stride between luma samples vertically. +//! \param[out] cRowBytes A place to store the byte stride between chroma samples vertically. +//! \return NVCV_SUCCESS If the information was gathered successfully. +//! NVCV_ERR_PIXELFORMAT Otherwise. +NvCV_Status NvCV_API NvCVImage_GetYUVPointers(NvCVImage *im, + unsigned char **y, unsigned char **u, unsigned char **v, + int *yPixBytes, int *cPixBytes, int *yRowBytes, int *cRowBytes); + + +#ifdef __cplusplus +} // extern "C" + +/******************************************************************************** + * NvCVImage default constructor + ********************************************************************************/ + +NvCVImage::NvCVImage() { + pixels = nullptr; + (void)NvCVImage_Alloc(this, 0, 0, NVCV_FORMAT_UNKNOWN, NVCV_TYPE_UNKNOWN, 0, 0, 0); +} + +/******************************************************************************** + * NvCVImage allocation constructor + ********************************************************************************/ + +NvCVImage::NvCVImage(unsigned width, unsigned height, NvCVImage_PixelFormat format, NvCVImage_ComponentType type, + unsigned layout, unsigned memSpace, unsigned alignment) { + pixels = nullptr; + (void)NvCVImage_Alloc(this, width, height, format, type, layout, memSpace, alignment); +} + +/******************************************************************************** + * Subimage constructor + ********************************************************************************/ + +NvCVImage::NvCVImage(NvCVImage *fullImg, int x, int y, unsigned width, unsigned height) { + NvCVImage_InitView(this, fullImg, x, y, width, height); +} + +/******************************************************************************** + * NvCVImage destructor + ********************************************************************************/ + +NvCVImage::~NvCVImage() { NvCVImage_Dealloc(this); } + +/******************************************************************************** + * copy subimage + ********************************************************************************/ + +NvCV_Status NvCVImage::copyFrom(const NvCVImage *src, int srcX, int srcY, int dstX, int dstY, unsigned wd, + unsigned ht) { +#if RTX_CAMERA_IMAGE // This only works for chunky images + NvCVImage srcView, dstView; + NvCVImage_InitView(&srcView, const_cast(src), srcX, srcY, wd, ht); + NvCVImage_InitView(&dstView, this, dstX, dstY, wd, ht); + return NvCVImage_Transfer(&srcView, &dstView, 1.f, 0, nullptr); +#else // !RTX_CAMERA_IMAGE bug fix for non-chunky images + NvCVRect2i srcRect = { (int)srcX, (int)srcY, (int)wd, (int)ht }; + NvCVPoint2i dstPt = { (int)dstX, (int)dstY }; + return NvCVImage_TransferRect(src, &srcRect, this, &dstPt, 1.f, 0, nullptr); +#endif // RTX_CAMERA_IMAGE +} + +/******************************************************************************** + * copy image + ********************************************************************************/ + +NvCV_Status NvCVImage::copyFrom(const NvCVImage *src) { return NvCVImage_Transfer(src, this, 1.f, 0, nullptr); } + + +#endif // ___cplusplus + +#endif // __NVCVIMAGE_H__ diff --git a/vfx/nvvfx/include/nvCVStatus.h b/vfx/nvvfx/include/nvCVStatus.h new file mode 100644 index 0000000..dd47ba3 --- /dev/null +++ b/vfx/nvvfx/include/nvCVStatus.h @@ -0,0 +1,113 @@ +/*############################################################################### +# +# Copyright 2020 NVIDIA Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +###############################################################################*/ + +#ifndef __NVCVSTATUS_H__ +#define __NVCVSTATUS_H__ + +#ifndef NvCV_API + #ifdef _WIN32 + #ifdef NVCV_API_EXPORT + #define NvCV_API __declspec(dllexport) __cdecl + #else + #define NvCV_API + #endif + #else //if linux + #define NvCV_API // TODO: Linux code goes here + #endif // _WIN32 or linux +#endif //NvCV_API + + +#ifdef __cplusplus +extern "C" { +#endif // ___cplusplus + + +//! Status codes returned from APIs. +typedef enum NvCV_Status { + NVCV_SUCCESS = 0, //!< The procedure returned successfully. + NVCV_ERR_GENERAL = -1, //!< An otherwise unspecified error has occurred. + NVCV_ERR_UNIMPLEMENTED = -2, //!< The requested feature is not yet implemented. + NVCV_ERR_MEMORY = -3, //!< There is not enough memory for the requested operation. + NVCV_ERR_EFFECT = -4, //!< An invalid effect handle has been supplied. + NVCV_ERR_SELECTOR = -5, //!< The given parameter selector is not valid in this effect filter. + NVCV_ERR_BUFFER = -6, //!< An image buffer has not been specified. + NVCV_ERR_PARAMETER = -7, //!< An invalid parameter value has been supplied for this effect+selector. + NVCV_ERR_MISMATCH = -8, //!< Some parameters are not appropriately matched. + NVCV_ERR_PIXELFORMAT = -9, //!< The specified pixel format is not accommodated. + NVCV_ERR_MODEL = -10, //!< Error while loading the TRT model. + NVCV_ERR_LIBRARY = -11, //!< Error loading the dynamic library. + NVCV_ERR_INITIALIZATION = -12, //!< The effect has not been properly initialized. + NVCV_ERR_FILE = -13, //!< The file could not be found. + NVCV_ERR_FEATURENOTFOUND = -14, //!< The requested feature was not found + NVCV_ERR_MISSINGINPUT = -15, //!< A required parameter was not set + NVCV_ERR_RESOLUTION = -16, //!< The specified image resolution is not supported. + NVCV_ERR_UNSUPPORTEDGPU = -17, //!< The GPU is not supported + NVCV_ERR_WRONGGPU = -18, //!< The current GPU is not the one selected. + NVCV_ERR_UNSUPPORTEDDRIVER = -19, //!< The currently installed graphics driver is not supported + NVCV_ERR_MODELDEPENDENCIES = -20, //!< There is no model with dependencies that match this system + NVCV_ERR_PARSE = -21, //!< There has been a parsing or syntax error while reading a file + NVCV_ERR_MODELSUBSTITUTION = -22, //!< The specified model does not exist and has been substituted. + NVCV_ERR_READ = -23, //!< An error occurred while reading a file. + NVCV_ERR_WRITE = -24, //!< An error occurred while writing a file. + NVCV_ERR_PARAMREADONLY = -25, //!< The selected parameter is read-only. + NVCV_ERR_TRT_ENQUEUE = -26, //!< TensorRT enqueue failed. + NVCV_ERR_TRT_BINDINGS = -27, //!< Unexpected TensorRT bindings. + NVCV_ERR_TRT_CONTEXT = -28, //!< An error occurred while creating a TensorRT context. + NVCV_ERR_TRT_INFER = -29, ///< The was a problem creating the inference engine. + NVCV_ERR_TRT_ENGINE = -30, ///< There was a problem deserializing the inference runtime engine. + NVCV_ERR_NPP = -31, //!< An error has occurred in the NPP library. + NVCV_ERR_CONFIG = -32, //!< No suitable model exists for the specified parameter configuration. + + NVCV_ERR_DIRECT3D = -99, //!< A Direct3D error has occurred. + + NVCV_ERR_CUDA_BASE = -100, //!< CUDA errors are offset from this value. + NVCV_ERR_CUDA_VALUE = -101, //!< A CUDA parameter is not within the acceptable range. + NVCV_ERR_CUDA_MEMORY = -102, //!< There is not enough CUDA memory for the requested operation. + NVCV_ERR_CUDA_PITCH = -112, //!< A CUDA pitch is not within the acceptable range. + NVCV_ERR_CUDA_INIT = -127, //!< The CUDA driver and runtime could not be initialized. + NVCV_ERR_CUDA_LAUNCH = -819, //!< The CUDA kernel launch has failed. + NVCV_ERR_CUDA_KERNEL = -309, //!< No suitable kernel image is available for the device. + NVCV_ERR_CUDA_DRIVER = -135, //!< The installed NVIDIA CUDA driver is older than the CUDA runtime library. + NVCV_ERR_CUDA_UNSUPPORTED = -901, //!< The CUDA operation is not supported on the current system or device. + NVCV_ERR_CUDA_ILLEGAL_ADDRESS = -800, //!< CUDA tried to load or store on an invalid memory address. + NVCV_ERR_CUDA = -1099, //!< An otherwise unspecified CUDA error has been reported. +} NvCV_Status; + + +//! Get an error string corresponding to the given status code. +//! \param[in] code the NvCV status code. +//! \return the corresponding string. +//! \todo Find a cleaner way to do this, because NvCV_API doesn't work. +#ifdef _WIN32 + __declspec(dllexport) const char* __cdecl +#else + const char* +#endif // _WIN32 or linux +NvCV_GetErrorStringFromCode(NvCV_Status code); + + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // __NVCVSTATUS_H__ diff --git a/vfx/nvvfx/include/nvTransferD3D.h b/vfx/nvvfx/include/nvTransferD3D.h new file mode 100644 index 0000000..e914eb5 --- /dev/null +++ b/vfx/nvvfx/include/nvTransferD3D.h @@ -0,0 +1,72 @@ +/*############################################################################### +# +# Copyright(c) 2021 NVIDIA CORPORATION.All Rights Reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto.Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. +# +###############################################################################*/ + +#ifndef __NVTRANSFER_D3D_H__ +#define __NVTRANSFER_D3D_H__ + +#ifndef _WINDOWS_ + #define WIN32_LEAN_AND_MEAN + #include +#endif // _WINDOWS_ +#include +#include "nvCVImage.h" + +#ifdef __cplusplus +extern "C" { +#endif // ___cplusplus + + + +//! Utility to determine the D3D format from the NvCVImage format, type and layout. +//! \param[in] format the pixel format. +//! \param[in] type the component type. +//! \param[in] layout the layout. +//! \param[out] d3dFormat a place to store the corresponding D3D format. +//! \return NVCV_SUCCESS if successful. +NvCV_Status NvCV_API NvCVImage_ToD3DFormat(NvCVImage_PixelFormat format, NvCVImage_ComponentType type, unsigned layout, DXGI_FORMAT *d3dFormat); + + +//! Utility to determine the NvCVImage format, component type and layout from a D3D format. +//! \param[in] d3dFormat the D3D format to translate. +//! \param[out] format a place to store the NvCVImage pixel format. +//! \param[out] type a place to store the NvCVImage component type. +//! \param[out] layout a place to store the NvCVImage layout. +//! \return NVCV_SUCCESS if successful. +NvCV_Status NvCV_API NvCVImage_FromD3DFormat(DXGI_FORMAT d3dFormat, NvCVImage_PixelFormat *format, NvCVImage_ComponentType *type, unsigned char *layout); + + +#ifdef __dxgicommon_h__ + +//! Utility to determine the D3D color space from the NvCVImage color space. +//! \param[in] nvcvColorSpace the NvCVImage colro space. +//! \param[out] pD3dColorSpace a place to store the resultant D3D color space. +//! \return NVCV_SUCCESS if successful. +//! \return NVCV_ERR_PIXELFORMAT if there is no equivalent color space. +NvCV_Status NvCV_API NvCVImage_ToD3DColorSpace(unsigned char nvcvColorSpace, DXGI_COLOR_SPACE_TYPE *pD3dColorSpace); + + +//! Utility to determine the NvCVImage color space from the D3D color space. +//! \param[in] d3dColorSpace the D3D color space. +//! \param[out] pNvcvColorSpace a place to store the resultant NvCVImage color space. +//! \return NVCV_SUCCESS if successful. +//! \return NVCV_ERR_PIXELFORMAT if there is no equivalent color space. +NvCV_Status NvCV_API NvCVImage_FromD3DColorSpace(DXGI_COLOR_SPACE_TYPE d3dColorSpace, unsigned char *pNvcvColorSpace); + +#endif // __dxgicommon_h__ + + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // __NVTRANSFER_D3D_H__ + diff --git a/vfx/nvvfx/include/nvTransferD3D11.h b/vfx/nvvfx/include/nvTransferD3D11.h new file mode 100644 index 0000000..fabf067 --- /dev/null +++ b/vfx/nvvfx/include/nvTransferD3D11.h @@ -0,0 +1,44 @@ +/*############################################################################### +# +# Copyright(c) 2021 NVIDIA CORPORATION.All Rights Reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto.Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. +# +###############################################################################*/ + +#ifndef __NVTRANSFER_D3D11_H__ +#define __NVTRANSFER_D3D11_H__ + +#include +#include "nvCVImage.h" +#include "nvTransferD3D.h" // for NvCVImage_ToD3DFormat() and NvCVImage_FromD3DFormat() + +#ifdef __cplusplus +extern "C" { +#endif // ___cplusplus + + + +//! Initialize an NvCVImage from a D3D11 texture. +//! The pixelFormat and component types with be transferred over, and a cudaGraphicsResource will be registered; +//! the NvCVImage destructor will unregister the resource. +//! This is designed to work with NvCVImage_TransferFromArray() (and eventually NvCVImage_Transfer()); +//! however it is necessary to call NvCVImage_MapResource beforehand, and NvCVImage_UnmapResource +//! before allowing D3D to render into it. +//! \param[in,out] im the image to be initialized. +//! \param[in] tx the texture to be used for initialization. +//! \return NVCV_SUCCESS if successful. +NvCV_Status NvCV_API NvCVImage_InitFromD3D11Texture(NvCVImage *im, struct ID3D11Texture2D *tx); + + + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // __NVTRANSFER_D3D11_H__ + diff --git a/vfx/nvvfx/include/nvVideoEffects.h b/vfx/nvvfx/include/nvVideoEffects.h new file mode 100644 index 0000000..9d461bc --- /dev/null +++ b/vfx/nvvfx/include/nvVideoEffects.h @@ -0,0 +1,228 @@ +/*############################################################################### +# +# Copyright (c) 2020 NVIDIA Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +###############################################################################*/ + +#ifndef __NVVIDEO_EFFECTS_H__ +#define __NVVIDEO_EFFECTS_H__ + +#include "nvCVImage.h" + +#ifndef NvVFX_API + #ifdef _WIN32 + #ifdef NVVFX_API_EXPORT + #define NvVFX_API __declspec(dllexport) __cdecl + #else + #define NvVFX_API + #endif + #else //if linux + #define NvVFX_API // TODO: Linux code goes here + #endif // _WIN32 or linux +#endif //NvVFX_API + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// Forward declaration for CUDA API +typedef struct CUstream_st* CUstream; + +//! We use strings as effect selectors. +typedef const char* NvVFX_EffectSelector; + +//! We use strings as parameter selectors. +typedef const char* NvVFX_ParameterSelector; + +//! Each effect instantiation is associated with an opaque handle. +struct NvVFX_Object; +typedef struct NvVFX_Object NvVFX_Object, *NvVFX_Handle; + +//! Get the SDK version +//! \param[in,out] version Pointer to an unsigned int set to +//! (major << 24) | (minor << 16) | (build << 8) | 0 +//! \return NVCV_SUCCESS if the version was set +//! \return NVCV_ERR_PARAMETER if version was NULL +NvCV_Status NvVFX_API NvVFX_GetVersion(unsigned int *version); + +//! Create an new instantiation of a video effect. +//! \param[in] code the selector code for the desired video effect. +//! \param[out] effect a handle to the Video Effect instantiation. +//! \return NVCV_SUCCESS if the operation was successful. +NvCV_Status NvVFX_API NvVFX_CreateEffect(NvVFX_EffectSelector code, NvVFX_Handle *effect); + + +//! Delete a previously allocated video effect. +//! \param[in] effect a handle to the video effect to be deleted. +void NvVFX_API NvVFX_DestroyEffect(NvVFX_Handle effect); + + +//! Set the value of the selected parameter (unsigned int, int, float double, unsigned long long, void*, CUstream). +//! \param[in,out] effect The effect to configure. +//! \param[in] paramName The selector of the effect parameter to configure. +//! \param[in] val The value to be assigned to the selected effect parameter. +//! \return NVCV_SUCCESS if the operation was successful. +//! \return NVCV_ERR_EFFECT if an invalid effect handle was supplied. +//! \return NVCV_ERR_SELECTOR if the chosen effect does not understand the specified selector and data type. +//! \return NVCV_ERR_PARAMETER if the value was out of range. +NvCV_Status NvVFX_API NvVFX_SetU32(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, unsigned int val); +NvCV_Status NvVFX_API NvVFX_SetS32(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, int val); +NvCV_Status NvVFX_API NvVFX_SetF32(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, float val); +NvCV_Status NvVFX_API NvVFX_SetF64(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, double val); +NvCV_Status NvVFX_API NvVFX_SetU64(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, unsigned long long val); +NvCV_Status NvVFX_API NvVFX_SetObject(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, void *ptr); +NvCV_Status NvVFX_API NvVFX_SetCudaStream(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, CUstream stream); + +//! Set the selected image descriptor. +//! A shallow copy of the descriptor is made (preserving the pixel pointer), so that an ephemeral NvVFXImage_Init() +//! wrapper may be used in the call to NvVFX_SetImage() if desired, without having to preserve it for the lifetime +//! of the effect. The effect does not take ownership of the pixel buffer. +//! \param[in,out] effect The effect to configure. +//! \param[in] paramName The selector of the effect image to configure. +//! \param[in] im Pointer to the image descriptor to be used for the selected effect image. +//! NULL clears the selected internal image descriptor. +//! \return NVCV_SUCCESS if the operation was successful. +//! \return NVCV_ERR_EFFECT if an invalid effect handle was supplied. +//! \return NVCV_ERR_SELECTOR if the chosen effect does not understand the specified image selector. +//! \return NVCV_ERR_PARAMETER if an unexpected NULL pointer was supplied. +NvCV_Status NvVFX_API NvVFX_SetImage(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, NvCVImage *im); + +//! Set the value of the selected string, by making a copy in the effect handle. +//! \param[in,out] effect The effect to configure. +//! \param[in] paramName The selector of the effect string to configure. +//! \param[in] str The value to be assigned to the selected effect string. NULL clears the selected string. +//! \return NVCV_SUCCESS if the operation was successful. +//! \return NVCV_ERR_EFFECT if an invalid effect handle was supplied. +//! \return NVCV_ERR_SELECTOR if the chosen effect does not understand the specified string selector. +//! \return NVCV_ERR_PARAMETER if an unexpected NULL pointer was supplied. +NvCV_Status NvVFX_API NvVFX_SetString(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, const char *str); + + +//! Get the value of the selected parameter (unsigned int, int, float double, unsigned long long, void*, CUstream). +//! These are not typically used except for testing. +//! \param[in] effect the effect to be queried. +//! \param[in] paramName the selector of the effect parameter to retrieve. +//! \param[out] val a place to store the retrieved parameter. +//! \return NVCV_SUCCESS if the operation was successful. +//! \return NVCV_ERR_EFFECT if an invalid effect handle was supplied. +//! \return NVCV_ERR_SELECTOR if the chosen effect does not understand the specified selector and data type. +//! \return NVCV_ERR_PARAMETER if an unexpected NULL pointer was supplied. +//! \note Typically, these are not used outside of testing. +NvCV_Status NvVFX_API NvVFX_GetU32(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, unsigned int *val); +NvCV_Status NvVFX_API NvVFX_GetS32(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, int *val); +NvCV_Status NvVFX_API NvVFX_GetF32(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, float *val); +NvCV_Status NvVFX_API NvVFX_GetF64(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, double *val); +NvCV_Status NvVFX_API NvVFX_GetU64(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, unsigned long long *val); +NvCV_Status NvVFX_API NvVFX_GetObject(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, void **ptr); +NvCV_Status NvVFX_API NvVFX_GetCudaStream(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, CUstream *stream); + +//! Get a copy of the selected image descriptor. +//! If GetImage() is called before SetImage(), the returned descriptor will be filled with zeros. +//! Otherwise, the values will be identical to that in the previous SetImage() call, +//! with the exception of deletePtr, deleteProc and bufferBytes, which will be 0. +//! \param[in] effect the effect to be queried. +//! \param[in] paramName the selector of the effect image to retrieve. +//! \param[out] val a place to store the selected image descriptor. +//! A pointer to an empty NvCVImage (deletePtr==NULL) should be supplied to avoid memory leaks. +//! \return NVCV_SUCCESS if the operation was successful. +//! \return NVCV_ERR_EFFECT if an invalid effect handle was supplied. +//! \return NVCV_ERR_SELECTOR if the chosen effect does not understand the specified image selector. +//! \return NVCV_ERR_PARAMETER if an unexpected NULL pointer was supplied. +//! \note Typically, this is not used outside of testing. +NvCV_Status NvVFX_API NvVFX_GetImage(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, NvCVImage *im); + +//! Get the specified string. +//! If GetString() is called before SetString(), the returned string will be empty. +//! Otherwise, the string will be identical to that in the previous SetString() call, +//! though it will be stored in a different location. +//! \param[in] effect the effect to be queried. +//! \param[in] paramName the selector of the effect string to retrieve. +//! \param[out] val a place to store a pointer to the selected string. +//! \return NVCV_SUCCESS if the operation was successful. +//! \return NVCV_ERR_EFFECT if an invalid effect handle was supplied. +//! \return NVCV_ERR_SELECTOR if the chosen effect does not understand the specified string selector. +//! \return NVCV_ERR_PARAMETER if an unexpected NULL pointer was supplied. +//! \note Typically, this is not used outside of testing. +NvCV_Status NvVFX_API NvVFX_GetString(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, const char **str); + +//! Run the selected effect. +//! \param[in] effect the effect object handle. +//! \param[in] async run the effect asynchronously if nonzero; otherwise run synchronously. +//! \todo Should async instead be a pointer to a place to store a token that can be useful +//! for synchronizing two streams alter? +//! \return NVFVX_SUCCESS if the operation was successful. +//! \return NVCV_ERR_EFFECT if an invalid effect handle was supplied. +NvCV_Status NvVFX_API NvVFX_Run(NvVFX_Handle effect, int async); + +//! Load the model based on the set params. +//! \param[in] effect the effect object handle. +//! \return NVFVX_SUCCESS if the operation was successful. +//! \return NVCV_ERR_EFFECT if an invalid effect handle was supplied. +NvCV_Status NvVFX_API NvVFX_Load(NvVFX_Handle effect); + +//! Wrapper for cudaStreamCreate(), if it is desired to avoid linking with the cuda lib. +//! \param[out] stream A place to store the newly allocated stream. +//! \return NVFVX_SUCCESS if the operation was successful, +//! NVCV_ERR_CUDA_VALUE if not. +NvCV_Status NvVFX_API NvVFX_CudaStreamCreate(CUstream *stream); + +//! Wrapper for cudaStreamDestroy(), if it is desired to avoid linking with the cuda lib. +//! \param[in] stream The stream to destroy. +//! \return NVFVX_SUCCESS if the operation was successful, +//! NVCV_ERR_CUDA_VALUE if not. +NvCV_Status NvVFX_API NvVFX_CudaStreamDestroy(CUstream stream); + + +// Filter selectors +#define NVVFX_FX_TRANSFER "Transfer" +#define NVVFX_FX_GREEN_SCREEN "GreenScreen" // Green Screen +#define NVVFX_FX_BGBLUR "BackgroundBlur" // Background blur +#define NVVFX_FX_ARTIFACT_REDUCTION "ArtifactReduction" // Artifact Reduction +#define NVVFX_FX_SUPER_RES "SuperRes" // Super Res +#define NVVFX_FX_SR_UPSCALE "Upscale" // Super Res Upscale +#define NVVFX_FX_DENOISING "Denoising" // Denoising + +// Parameter selectors +#define NVVFX_INPUT_IMAGE_0 "SrcImage0" //!< There may be multiple input images +#define NVVFX_INPUT_IMAGE NVVFX_INPUT_IMAGE_0 //!< but there is usually only one input image +#define NVVFX_INPUT_IMAGE_1 "SrcImage1" //!< Source Image 1 +#define NVVFX_OUTPUT_IMAGE_0 "DstImage0" //!< There may be multiple output images +#define NVVFX_OUTPUT_IMAGE NVVFX_OUTPUT_IMAGE_0 //!< but there is usually only one output image +#define NVVFX_MODEL_DIRECTORY "ModelDir" //!< The directory where the model may be found +#define NVVFX_CUDA_STREAM "CudaStream" //!< The CUDA stream to use +#define NVVFX_INFO "Info" //!< Get info about the effects +#define NVVFX_SCALE "Scale" //!< Scale factor +#define NVVFX_STRENGTH "Strength" //!< Strength for different filters +#define NVVFX_STRENGTH_LEVELS "StrengthLevels" //!< Number of strength levels +#define NVVFX_MODE "Mode" //!< Mode for different filters +#define NVVFX_TEMPORAL "Temporal" //!< Temporal mode: 0=image, 1=video +#define NVVFX_GPU "GPU" //!< Preferred GPU (optional) +#define NVVFX_BATCH_SIZE "BatchSize" //!< Batch Size (default 1) +#define NVVFX_MODEL_BATCH "ModelBatch" //!< The preferred batching model to use (default 1) +#define NVVFX_STATE "State" //!< State variable +#define NVVFX_STATE_SIZE "StateSize" //!< Number of bytes needed to store state + + + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // __NVVIDEO_EFFECTS_H__ diff --git a/vfx/nvvfx/src/NVVideoEffectsProxy.cpp b/vfx/nvvfx/src/NVVideoEffectsProxy.cpp new file mode 100644 index 0000000..c4770b9 --- /dev/null +++ b/vfx/nvvfx/src/NVVideoEffectsProxy.cpp @@ -0,0 +1,258 @@ +/*############################################################################### +# +# Copyright (c) 2020 NVIDIA Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +###############################################################################*/ +#include + +#include "nvVideoEffects.h" + +#ifdef _WIN32 + #define _WINSOCKAPI_ + #include + #include +#else // !_WIN32 + #include + typedef void* HMODULE; + typedef void* HANDLE; + typedef void* HINSTANCE; +#endif // _WIN32 + +// Parameter string does not include the file extension +#ifdef _WIN32 +#define nvLoadLibrary(library) LoadLibrary(TEXT(library ".dll")) +#else // !_WIN32 +#define nvLoadLibrary(library) dlopen("lib" library ".so", RTLD_LAZY) +#endif // _WIN32 + + +inline void* nvGetProcAddress(HINSTANCE handle, const char* proc) { + if (nullptr == handle) return nullptr; +#ifdef _WIN32 + return (void *)GetProcAddress(handle, proc); +#else // !_WIN32 + return dlsym(handle, proc); +#endif // _WIN32 +} + +inline int nvFreeLibrary(HINSTANCE handle) { +#ifdef _WIN32 + return FreeLibrary(handle); +#else + return dlclose(handle); +#endif +} + +HINSTANCE getNvVfxLib() { + + TCHAR path[MAX_PATH], fullPath[MAX_PATH]; + + // There can be multiple apps on the system, + // some might include the SDK in the app package and + // others might expect the SDK to be installed in Program Files + GetEnvironmentVariable(TEXT("NV_VIDEO_EFFECTS_PATH"), path, MAX_PATH); + if (_tcscmp(path, TEXT("USE_APP_PATH"))) { + // App has not set environment variable to "USE_APP_PATH" + // So pick up the SDK dll and dependencies from Program Files + GetEnvironmentVariable(TEXT("ProgramFiles"), path, MAX_PATH); + size_t max_len = sizeof(fullPath)/sizeof(TCHAR); + _stprintf_s(fullPath, max_len, TEXT("%s\\NVIDIA Corporation\\NVIDIA Video Effects\\"), path); + SetDllDirectory(fullPath); + } + static const HINSTANCE NvVfxLib = nvLoadLibrary("NVVideoEffects"); + return NvVfxLib; +} + +NvCV_Status NvVFX_API NvVFX_GetVersion(unsigned int* version) { + static const auto funcPtr = (decltype(NvVFX_GetVersion)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetVersion"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(version); +} + +NvCV_Status NvVFX_API NvVFX_CreateEffect(NvVFX_EffectSelector code, NvVFX_Handle* obj) { + static const auto funcPtr = (decltype(NvVFX_CreateEffect)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_CreateEffect"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(code, obj); +} + +void NvVFX_API NvVFX_DestroyEffect(NvVFX_Handle obj) { + static const auto funcPtr = (decltype(NvVFX_DestroyEffect)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_DestroyEffect"); + + if (nullptr != funcPtr) funcPtr(obj); +} + +NvCV_Status NvVFX_API NvVFX_SetU32(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, unsigned int val) { + static const auto funcPtr = (decltype(NvVFX_SetU32)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetU32"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, val); +} + +NvCV_Status NvVFX_API NvVFX_SetS32(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, int val) { + static const auto funcPtr = (decltype(NvVFX_SetS32)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetS32"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, val); +} + +NvCV_Status NvVFX_API NvVFX_SetF32(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, float val) { + static const auto funcPtr = (decltype(NvVFX_SetF32)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetF32"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, val); +} + +NvCV_Status NvVFX_API NvVFX_SetF64(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, double val) { + static const auto funcPtr = (decltype(NvVFX_SetF64)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetF64"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, val); +} + +NvCV_Status NvVFX_API NvVFX_SetU64(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, unsigned long long val) { + static const auto funcPtr = (decltype(NvVFX_SetU64)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetU64"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, val); +} + +NvCV_Status NvVFX_API NvVFX_SetImage(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, NvCVImage* im) { + static const auto funcPtr = (decltype(NvVFX_SetImage)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetImage"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, im); +} + +NvCV_Status NvVFX_API NvVFX_SetObject(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, void* ptr) { + static const auto funcPtr = (decltype(NvVFX_SetObject)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetObject"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, ptr); +} + +NvCV_Status NvVFX_API NvVFX_SetString(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, const char* str) { + static const auto funcPtr = (decltype(NvVFX_SetString)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetString"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, str); +} + +NvCV_Status NvVFX_API NvVFX_SetCudaStream(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, CUstream stream) { + static const auto funcPtr = (decltype(NvVFX_SetCudaStream)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetCudaStream"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, stream); +} + +NvCV_Status NvVFX_API NvVFX_GetU32(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, unsigned int* val) { + static const auto funcPtr = (decltype(NvVFX_GetU32)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetU32"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, val); +} + +NvCV_Status NvVFX_API NvVFX_GetS32(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, int* val) { + static const auto funcPtr = (decltype(NvVFX_GetS32)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetS32"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, val); +} + +NvCV_Status NvVFX_API NvVFX_GetF32(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, float* val) { + static const auto funcPtr = (decltype(NvVFX_GetF32)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetF32"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, val); +} + +NvCV_Status NvVFX_API NvVFX_GetF64(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, double* val) { + static const auto funcPtr = (decltype(NvVFX_GetF64)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetF64"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, val); +} + +NvCV_Status NvVFX_API NvVFX_GetU64(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, unsigned long long* val) { + static const auto funcPtr = (decltype(NvVFX_GetU64)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetU64"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, val); +} + +NvCV_Status NvVFX_API NvVFX_GetImage(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, NvCVImage* im) { + static const auto funcPtr = (decltype(NvVFX_GetImage)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetImage"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, im); +} + +NvCV_Status NvVFX_API NvVFX_GetObject(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, void** ptr) { + static const auto funcPtr = (decltype(NvVFX_GetObject)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetObject"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, ptr); +} + +NvCV_Status NvVFX_API NvVFX_GetString(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, const char** str) { + static const auto funcPtr = (decltype(NvVFX_GetString)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetString"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, str); +} + +NvCV_Status NvVFX_API NvVFX_GetCudaStream(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, CUstream* stream) { + static const auto funcPtr = (decltype(NvVFX_GetCudaStream)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetCudaStream"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, paramName, stream); +} + +NvCV_Status NvVFX_API NvVFX_Run(NvVFX_Handle obj, int async) { + static const auto funcPtr = (decltype(NvVFX_Run)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_Run"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj, async); +} + +NvCV_Status NvVFX_API NvVFX_Load(NvVFX_Handle obj) { + static const auto funcPtr = (decltype(NvVFX_Load)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_Load"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(obj); +} + +NvCV_Status NvVFX_API NvVFX_CudaStreamCreate(CUstream* stream) { + static const auto funcPtr = + (decltype(NvVFX_CudaStreamCreate)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_CudaStreamCreate"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(stream); +} + +NvCV_Status NvVFX_API NvVFX_CudaStreamDestroy(CUstream stream) { + static const auto funcPtr = + (decltype(NvVFX_CudaStreamDestroy)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_CudaStreamDestroy"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(stream); +} + diff --git a/vfx/nvvfx/src/nvCVImageProxy.cpp b/vfx/nvvfx/src/nvCVImageProxy.cpp new file mode 100644 index 0000000..a6694da --- /dev/null +++ b/vfx/nvvfx/src/nvCVImageProxy.cpp @@ -0,0 +1,311 @@ +#if defined(linux) || defined(unix) || defined(__linux) +#warning nvCVImageProxy.cpp not ported +#else +/*############################################################################### +# +# Copyright 2020 NVIDIA Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +###############################################################################*/ +#include +#include "nvCVImage.h" + +#ifdef _WIN32 + #define _WINSOCKAPI_ + #include + #include + #include "nvTransferD3D.h" + #include "nvTransferD3D11.h" +#else // !_WIN32 + #include + typedef void* HMODULE; + typedef void* HANDLE; + typedef void* HINSTANCE; +#endif // _WIN32 + +// Parameter string does not include the file extension +#ifdef _WIN32 +#define nvLoadLibrary(library) LoadLibrary(TEXT(library ".dll")) +#else // !_WIN32 +#define nvLoadLibrary(library) dlopen("lib" library ".so", RTLD_LAZY) +#endif // _WIN32 + + +inline void* nvGetProcAddress(HINSTANCE handle, const char* proc) { + if (nullptr == handle) return nullptr; +#ifdef _WIN32 + return (void *)GetProcAddress(handle, proc); +#else // !_WIN32 + return dlsym(handle, proc); +#endif // _WIN32 +} + +inline int nvFreeLibrary(HINSTANCE handle) { +#ifdef _WIN32 + return FreeLibrary(handle); +#else + return dlclose(handle); +#endif +} + +HINSTANCE getNvCVImageLib() { + TCHAR path[MAX_PATH], tmpPath[MAX_PATH], fullPath[MAX_PATH]; + static HINSTANCE nvCVImageLib = NULL; + static bool bSDKPathSet = false; + if (!bSDKPathSet) { + // There can be multiple apps on the system, + // some might include the SDK in the app package and + // others might expect the SDK to be installed in Program Files + GetEnvironmentVariable(TEXT("NV_VIDEO_EFFECTS_PATH"), path, MAX_PATH); + GetEnvironmentVariable(TEXT("NV_AR_SDK_PATH"), tmpPath, MAX_PATH); + if (_tcscmp(path, TEXT("USE_APP_PATH")) && _tcscmp(tmpPath, TEXT("USE_APP_PATH"))) { + // App has not set environment variable to "USE_APP_PATH" + // So pick up the SDK dll and dependencies from Program Files + GetEnvironmentVariable(TEXT("ProgramFiles"), path, MAX_PATH); + size_t max_len = sizeof(fullPath) / sizeof(TCHAR); + _stprintf_s(fullPath, max_len, TEXT("%s\\NVIDIA Corporation\\NVIDIA Video Effects\\"), path); + SetDllDirectory(fullPath); + nvCVImageLib = nvLoadLibrary("NVCVImage"); + if (!nvCVImageLib) { + _stprintf_s(fullPath, max_len, TEXT("%s\\NVIDIA Corporation\\NVIDIA AR SDK\\"), path); + SetDllDirectory(fullPath); + nvCVImageLib = nvLoadLibrary("NVCVImage"); + } + } + bSDKPathSet = true; + } + return nvCVImageLib; +} + +NvCV_Status NvCV_API NvCVImage_Init(NvCVImage* im, unsigned width, unsigned height, int pitch, void* pixels, + NvCVImage_PixelFormat format, NvCVImage_ComponentType type, unsigned isPlanar, + unsigned onGPU) { + static const auto funcPtr = (decltype(NvCVImage_Init)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Init"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(im, width, height, pitch, pixels, format, type, isPlanar, onGPU); +} + +void NvCV_API NvCVImage_InitView(NvCVImage* subImg, NvCVImage* fullImg, int x, int y, unsigned width, + unsigned height) { + static const auto funcPtr = (decltype(NvCVImage_InitView)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_InitView"); + + if (nullptr != funcPtr) funcPtr(subImg, fullImg, x, y, width, height); +} + +NvCV_Status NvCV_API NvCVImage_Alloc(NvCVImage* im, unsigned width, unsigned height, NvCVImage_PixelFormat format, + NvCVImage_ComponentType type, unsigned isPlanar, unsigned onGPU, unsigned alignment) { + static const auto funcPtr = (decltype(NvCVImage_Alloc)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Alloc"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(im, width, height, format, type, isPlanar, onGPU, alignment); +} + +NvCV_Status NvCV_API NvCVImage_Realloc(NvCVImage* im, unsigned width, unsigned height, + NvCVImage_PixelFormat format, NvCVImage_ComponentType type, + unsigned isPlanar, unsigned onGPU, unsigned alignment) { + static const auto funcPtr = (decltype(NvCVImage_Realloc)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Realloc"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(im, width, height, format, type, isPlanar, onGPU, alignment); +} + +void NvCV_API NvCVImage_Dealloc(NvCVImage* im) { + static const auto funcPtr = (decltype(NvCVImage_Dealloc)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Dealloc"); + + if (nullptr != funcPtr) funcPtr(im); +} + +NvCV_Status NvCV_API NvCVImage_Create(unsigned width, unsigned height, NvCVImage_PixelFormat format, + NvCVImage_ComponentType type, unsigned isPlanar, unsigned onGPU, + unsigned alignment, NvCVImage** out) { + static const auto funcPtr = (decltype(NvCVImage_Create)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Create"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(width, height, format, type, isPlanar, onGPU, alignment, out); +} + +void NvCV_API NvCVImage_Destroy(NvCVImage* im) { + static const auto funcPtr = (decltype(NvCVImage_Destroy)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Destroy"); + + if (nullptr != funcPtr) funcPtr(im); +} + +void NvCV_API NvCVImage_ComponentOffsets(NvCVImage_PixelFormat format, int* rOff, int* gOff, int* bOff, int* aOff, + int* yOff) { + static const auto funcPtr = + (decltype(NvCVImage_ComponentOffsets)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_ComponentOffsets"); + + if (nullptr != funcPtr) funcPtr(format, rOff, gOff, bOff, aOff, yOff); +} + +NvCV_Status NvCV_API NvCVImage_Transfer(const NvCVImage* src, NvCVImage* dst, float scale, CUstream_st* stream, + NvCVImage* tmp) { + static const auto funcPtr = (decltype(NvCVImage_Transfer)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Transfer"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(src, dst, scale, stream, tmp); +} + +NvCV_Status NvCV_API NvCVImage_TransferRect(const NvCVImage *src, const NvCVRect2i *srcRect, NvCVImage *dst, + const NvCVPoint2i *dstPt, float scale, struct CUstream_st *stream, NvCVImage *tmp) { + static const auto funcPtr = (decltype(NvCVImage_TransferRect)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_TransferRect"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(src, srcRect, dst, dstPt, scale, stream, tmp); +} + +NvCV_Status NvCV_API NvCVImage_TransferFromYUV(const void *y, int yPixBytes, int yPitch, const void *u, const void *v, + int uvPixBytes, int uvPitch, NvCVImage_PixelFormat yuvFormat, NvCVImage_ComponentType yuvType, unsigned yuvColorSpace, + unsigned yuvMemSpace, NvCVImage *dst, const NvCVRect2i *dstRect, float scale, struct CUstream_st *stream, NvCVImage *tmp) { + static const auto funcPtr = (decltype(NvCVImage_TransferFromYUV)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_TransferFromYUV"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(y, yPixBytes, yPitch, u, v, uvPixBytes, uvPitch, yuvFormat, yuvType, yuvColorSpace, yuvMemSpace, dst, + dstRect, scale, stream, tmp); +} + +NvCV_Status NvCV_API NvCVImage_TransferToYUV(const NvCVImage *src, const NvCVRect2i *srcRect, + const void *y, int yPixBytes, int yPitch, const void *u, const void *v, int uvPixBytes, int uvPitch, + NvCVImage_PixelFormat yuvFormat, NvCVImage_ComponentType yuvType, unsigned yuvColorSpace, unsigned yuvMemSpace, + float scale, struct CUstream_st *stream, NvCVImage *tmp) { + static const auto funcPtr = (decltype(NvCVImage_TransferToYUV)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_TransferToYUV"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(src, srcRect, y, yPixBytes, yPitch, u, v, uvPixBytes, uvPitch, yuvFormat, yuvType, yuvColorSpace, yuvMemSpace, scale, stream, tmp); +} + +NvCV_Status NvCV_API NvCVImage_MapResource(NvCVImage *im, struct CUstream_st *stream) { + static const auto funcPtr = (decltype(NvCVImage_MapResource)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_MapResource"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(im, stream); +} + +NvCV_Status NvCV_API NvCVImage_UnmapResource(NvCVImage *im, struct CUstream_st *stream) { + static const auto funcPtr = (decltype(NvCVImage_UnmapResource)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_UnmapResource"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(im, stream); +} + +#if RTX_CAMERA_IMAGE == 0 +NvCV_Status NvCV_API NvCVImage_Composite(const NvCVImage* fg, const NvCVImage* bg, const NvCVImage* mat, NvCVImage* dst, + struct CUstream_st *stream) { + static const auto funcPtr = (decltype(NvCVImage_Composite)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Composite"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(fg, bg, mat, dst, stream); +} +#else // RTX_CAMERA_IMAGE == 1 +NvCV_Status NvCV_API NvCVImage_Composite(const NvCVImage* fg, const NvCVImage* bg, const NvCVImage* mat, NvCVImage* dst) { + static const auto funcPtr = (decltype(NvCVImage_Composite)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Composite"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(fg, bg, mat, dst); +} +#endif // RTX_CAMERA_IMAGE + +NvCV_Status NvCV_API NvCVImage_CompositeRect( + const NvCVImage *fg, const NvCVPoint2i *fgOrg, + const NvCVImage *bg, const NvCVPoint2i *bgOrg, + const NvCVImage *mat, unsigned mode, + NvCVImage *dst, const NvCVPoint2i *dstOrg, + struct CUstream_st *stream) { + static const auto funcPtr = (decltype(NvCVImage_CompositeRect)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_CompositeRect"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(fg, fgOrg, bg, bgOrg, mat, mode, dst, dstOrg, stream); +} + +NvCV_Status NvCV_API NvCVImage_CompositeOverConstant(const NvCVImage* src, const NvCVImage* mat, + const unsigned char bgColor[3], NvCVImage* dst) { + static const auto funcPtr = + (decltype(NvCVImage_CompositeOverConstant)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_CompositeOverConstant"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(src, mat, bgColor, dst); +} + +NvCV_Status NvCV_API NvCVImage_FlipY(const NvCVImage* src, NvCVImage* dst) { + static const auto funcPtr = (decltype(NvCVImage_FlipY)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_FlipY"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(src, dst); +} + +#ifdef _WIN32 +__declspec(dllexport) const char* __cdecl +#else +const char* +#endif // _WIN32 or linux + NvCV_GetErrorStringFromCode(NvCV_Status code) { + static const auto funcPtr = + (decltype(NvCV_GetErrorStringFromCode)*)nvGetProcAddress(getNvCVImageLib(), "NvCV_GetErrorStringFromCode"); + + if (nullptr == funcPtr) return "Cannot find nvCVImage DLL or its dependencies"; + return funcPtr(code); +} + + + +#ifdef _WIN32 // Direct 3D + +NvCV_Status NvCV_API NvCVImage_InitFromD3D11Texture(NvCVImage *im, struct ID3D11Texture2D *tx) { + static const auto funcPtr = (decltype(NvCVImage_InitFromD3D11Texture)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_InitFromD3D11Texture"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(im, tx); +} + +NvCV_Status NvCV_API NvCVImage_ToD3DFormat(NvCVImage_PixelFormat format, NvCVImage_ComponentType type, unsigned layout, DXGI_FORMAT *d3dFormat) { + static const auto funcPtr = (decltype(NvCVImage_ToD3DFormat)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_ToD3DFormat"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(format, type, layout, d3dFormat); +} + +NvCV_Status NvCV_API NvCVImage_FromD3DFormat(DXGI_FORMAT d3dFormat, NvCVImage_PixelFormat *format, NvCVImage_ComponentType *type, unsigned char *layout) { + static const auto funcPtr = (decltype(NvCVImage_FromD3DFormat)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_FromD3DFormat"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(d3dFormat, format, type, layout); +} + +#ifdef __dxgicommon_h__ + +NvCV_Status NvCV_API NvCVImage_ToD3DColorSpace(unsigned char nvcvColorSpace, DXGI_COLOR_SPACE_TYPE *pD3dColorSpace) { + static const auto funcPtr = (decltype(NvCVImage_ToD3DColorSpace)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_ToD3DColorSpace"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(nvcvColorSpace, pD3dColorSpace); +} + +NvCV_Status NvCV_API NvCVImage_FromD3DColorSpace(DXGI_COLOR_SPACE_TYPE d3dColorSpace, unsigned char *pNvcvColorSpace) { + static const auto funcPtr = (decltype(NvCVImage_FromD3DColorSpace)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_FromD3DColorSpace"); + + if (nullptr == funcPtr) return NVCV_ERR_LIBRARY; + return funcPtr(d3dColorSpace, pNvcvColorSpace); +} + +#endif // __dxgicommon_h__ + +#endif // _WIN32 Direct 3D + +#endif // enabling for this file diff --git a/vfx/vfx.cc b/vfx/vfx.cc new file mode 100644 index 0000000..0dd0ed2 --- /dev/null +++ b/vfx/vfx.cc @@ -0,0 +1,265 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "VapourSynth.h" +#include "VSHelper.h" + +#ifndef _WIN32 +#error "Unsupported platform" +#else +static std::vector autoDllErrors; +#define CUDA_DLL L"nvcuda.dll","nvcuda.dll",autoDllErrors +#endif +#include "../ngx/cuda.h" + +#include "nvvfx/include/nvCVStatus.h" +#include "nvvfx/include/nvCVImage.h" +#include "nvvfx/include/nvVideoEffects.h" + +#define CK_VFX(x) do { \ + NvCV_Status r = (x); \ + if (r != NVCV_SUCCESS) { \ + fprintf(stderr, "failed VFX call %s: %x (%s)\n", #x, r, NvCV_GetErrorStringFromCode(r)); \ + abort(); \ + } \ +} while (0) +#define CK_CUDA(x) do { \ + int r = (x); \ + if (r != CUDA_SUCCESS) { \ + fprintf(stderr, "failed cuda call %s: %d\n", #x, r); \ + abort(); \ + } \ +} while (0) + +struct VfxData { + std::mutex lock; + + VSNodeRef *node; + VSVideoInfo vi; + double scale; + double strength; + + int in_width, in_height; + + NvVFX_Handle vfx; + CUstream stream; + CUdeviceptr state; + + NvCVImage srcCpuImg, srcGpuImg; + NvCVImage dstCpuImg, dstGpuImg; + + typedef float T; + uint64_t in_image_width() const { return in_width; } + uint64_t out_image_width() const { return vi.width; } + uint64_t in_image_height() const { return in_height; } + uint64_t out_image_height() const { return vi.height; } + + VfxData() : node(nullptr), vi(), scale(0), strength(0), vfx(nullptr), stream(nullptr), state(nullptr) {} + ~VfxData() { + if (vfx) NvVFX_DestroyEffect(vfx); + if (stream) NvVFX_CudaStreamDestroy(stream); + if (state) cuMemFree_v2(state); + NvCVImage_Dealloc(&srcCpuImg); + NvCVImage_Dealloc(&srcGpuImg); + NvCVImage_Dealloc(&dstCpuImg); + NvCVImage_Dealloc(&dstGpuImg); + } +}; + +static void VS_CC vfxInit(VSMap *in, VSMap *out, void **instanceData, VSNode *node, VSCore *core, const VSAPI *vsapi) { + VfxData *d = static_cast(*instanceData); + vsapi->setVideoInfo(&d->vi, 1, node); +} + +static const VSFrameRef *VS_CC vfxGetFrame(int n, int activationReason, void **instanceData, void **frameData, VSFrameContext *frameCtx, VSCore *core, const VSAPI *vsapi) { + VfxData *d = static_cast(*instanceData); + + if (activationReason == arInitial) { + vsapi->requestFrameFilter(n, d->node, frameCtx); + } else if (activationReason == arAllFramesReady) { + const VSFrameRef *src = vsapi->getFrameFilter(n, d->node, frameCtx); + + const VSFormat *fi = d->vi.format; + assert(vsapi->getFrameHeight(src, 0) == (int)d->in_image_height()); + assert(vsapi->getFrameWidth(src, 0) == (int)d->in_image_width()); + int planes[3] = { 0, 1, 2 }; + const VSFrameRef *srcf[3] = { nullptr, nullptr, nullptr }; + VSFrameRef *dst = vsapi->newVideoFrame2(fi, d->out_image_width(), d->out_image_height(), srcf, planes, src, core); + + std::lock_guard lock(d->lock); + + typedef VfxData::T T; + T *host = (T *)d->srcCpuImg.pixels; + for (int plane = 0; plane < 3; plane++) { + const size_t stride = vsapi->getStride(src, plane); + const uint8_t *ptr = (uint8_t*)vsapi->getReadPtr(src, plane); + const size_t w = d->in_image_width(), h = d->in_image_height(); + for (size_t i = 0; i < h; i++) + for (size_t j = 0; j < w; j++) + host[plane * h * w + i * w + j] = *(T*)&ptr[i * stride + j * sizeof(T)]; + } + + CK_VFX(NvCVImage_Transfer(&d->srcCpuImg, &d->srcGpuImg, 1.0f, d->stream, nullptr)); + + CK_VFX(NvVFX_Run(d->vfx, 0)); + + CK_VFX(NvCVImage_Transfer(&d->dstGpuImg, &d->dstCpuImg, 1.0f, d->stream, nullptr)); + + host = (T *)d->dstCpuImg.pixels; + for (int plane = 0; plane < 3; plane++) { + const size_t stride = vsapi->getStride(dst, plane); + uint8_t *ptr = (uint8_t*)vsapi->getWritePtr(dst, plane); + const size_t w = d->out_image_width(), h = d->out_image_height(); + for (size_t i = 0; i < h; i++) + for (size_t j = 0; j < d->out_image_width(); j++) + *(T*)&ptr[i * stride + j * sizeof(T)] = host[plane * h * w + i * w + j]; + } + + vsapi->freeFrame(src); + return dst; + } + + return nullptr; +} + +static void VS_CC vfxFree(void *instanceData, VSCore *core, const VSAPI *vsapi) { + VfxData *d = static_cast(instanceData); + vsapi->freeNode(d->node); + + delete d; +} + +static void VS_CC vfxCreate(const VSMap *in, VSMap *out, void *userData, VSCore *core, const VSAPI *vsapi) { + std::unique_ptr d(new VfxData); + int err; + + try { + if (autoDllErrors.size() > 0) { + std::string error, last; + for (const auto &s: autoDllErrors) { + if (error.size()) { + if (last != s) + error += "; " + s; + } else + error = s; + last = s; + } + throw std::runtime_error(error); + } + + d->node = vsapi->propGetNode(in, "clip", 0, &err); + d->vi = *vsapi->getVideoInfo(d->node); + + if (!isConstantFormat(&d->vi)) { + throw std::runtime_error("Only clips with constant format and dimensions allowed"); + } + if (d->vi.format->numPlanes != 3 || d->vi.format->colorFamily != cmRGB) + throw std::runtime_error("input clip must be RGB format"); + if (d->vi.format->sampleType != stFloat || d->vi.format->bitsPerSample != 32) + throw std::runtime_error("input clip must be 32-bit float format"); + + enum { OP_AR, OP_SUPERRES, OP_DENOISE }; + const NvVFX_EffectSelector selectors[] = { NVVFX_FX_ARTIFACT_REDUCTION, NVVFX_FX_SUPER_RES, NVVFX_FX_DENOISING }; + size_t op = int64ToIntS(vsapi->propGetInt(in, "op", 0, &err)); + if (err) throw std::runtime_error("op is required argument"); + if (op >= sizeof selectors / sizeof selectors[0]) + throw std::runtime_error("op is out of range."); + + if (op != OP_SUPERRES) + d->scale = 1; + else { + double scale = vsapi->propGetFloat(in, "scale", 0, &err); + if (err) scale = 1; + if (scale < 1) + throw std::runtime_error("invalid scale parameter"); + d->scale = scale; + } + + double strength = vsapi->propGetFloat(in, "strength", 0, &err); + if (err) strength = 0; + d->strength = strength; + + const char *modelDir = getenv("MODEL_DIR"); // TODO: configurable model directory? + if (modelDir == nullptr) + modelDir = "C:\\Program Files\\NVIDIA Corporation\\NVIDIA Video Effects\\models"; + fprintf(stderr, "MODEL_DIR = %s\n", modelDir); + + NvCV_Status r = NvVFX_CreateEffect(selectors[op], &d->vfx); + if (r != NVCV_SUCCESS) { + const char *err = NvCV_GetErrorStringFromCode(r); + fprintf(stderr, "NvVFX_CreateEffect failed: %x (%s)\n", r, err); + throw std::runtime_error("unable to create effect: " + std::string(err)); + } + + CK_VFX(NvVFX_CudaStreamCreate(&d->stream)); + CK_VFX(NvVFX_SetCudaStream(d->vfx, NVVFX_CUDA_STREAM, d->stream)); + + if (op == OP_AR || op == OP_SUPERRES) + r = NvVFX_SetU32(d->vfx, NVVFX_STRENGTH, int(d->strength)); + else if (op == OP_DENOISE) + r = NvVFX_SetF32(d->vfx, NVVFX_STRENGTH, d->strength); + else assert(false); + if (r != NVCV_SUCCESS) { + const char *err = NvCV_GetErrorStringFromCode(r); + fprintf(stderr, "NvVFX set strength failed: %x (%s)\n", r, err); + throw std::runtime_error("failed to set strength: " + std::string(err)); + } + + r = NvVFX_SetString(d->vfx, NVVFX_MODEL_DIRECTORY, modelDir); + if (r != NVCV_SUCCESS) { + fprintf(stderr, "NvVFX set model directory to %s failed: %x (%s)\n", modelDir, r, NvCV_GetErrorStringFromCode(r)); + throw std::runtime_error("unable to set model directory " + std::string(modelDir)); + } + + if (op == OP_DENOISE) { + unsigned int stateSizeInBytes = 0; + CK_VFX(NvVFX_GetU32(d->vfx, NVVFX_STATE_SIZE, &stateSizeInBytes)); + CK_CUDA(cuMemAlloc_v2(&d->state, stateSizeInBytes)); + CK_CUDA(cuMemsetD8Async(d->state, 0, stateSizeInBytes, d->stream)); + void *stateArray[1] = { d->state }; + CK_VFX(NvVFX_SetObject(d->vfx, NVVFX_STATE, (void*)stateArray)); + } + } catch (std::runtime_error &e) { + if (d->node) + vsapi->freeNode(d->node); + vsapi->setError(out, (std::string{ "DLVFX: " } + e.what()).c_str()); + return; + } + + d->in_width = d->vi.width; + d->in_height = d->vi.height; + d->vi.width *= d->scale; + d->vi.height *= d->scale; + + CK_VFX(NvCVImage_Alloc(&d->srcCpuImg, d->in_image_width(), d->in_image_height(), NVCV_RGB, NVCV_F32, NVCV_PLANAR, NVCV_CPU, 1)); + CK_VFX(NvCVImage_Alloc(&d->srcGpuImg, d->in_image_width(), d->in_image_height(), NVCV_BGR, NVCV_F32, NVCV_PLANAR, NVCV_GPU, 1)); + CK_VFX(NvCVImage_Alloc(&d->dstCpuImg, d->out_image_width(), d->out_image_height(), NVCV_RGB, NVCV_F32, NVCV_PLANAR, NVCV_CPU, 1)); + CK_VFX(NvCVImage_Alloc(&d->dstGpuImg, d->out_image_width(), d->out_image_height(), NVCV_BGR, NVCV_F32, NVCV_PLANAR, NVCV_GPU, 1)); + + CK_VFX(NvVFX_SetImage(d->vfx, NVVFX_INPUT_IMAGE, &d->srcGpuImg)); + CK_VFX(NvVFX_SetImage(d->vfx, NVVFX_OUTPUT_IMAGE, &d->dstGpuImg)); + + CK_VFX(NvVFX_Load(d->vfx)); + + vsapi->createFilter(in, out, "DLVFX", vfxInit, vfxGetFrame, vfxFree, fmParallel, 0, d.release(), core); +} + +////////////////////////////////////////// +// Init + +#ifndef STANDALONE_VFX +void VS_CC vfxInitialize(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin) { +#else +VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin) { + configFunc("info.akarin.plugin", "akarin2", "Experimental Nvidia Maxine plugin", VAPOURSYNTH_API_VERSION, 1, plugin); +#endif + unsigned int version = 0; + if (NvVFX_GetVersion(&version) == NVCV_SUCCESS) + registerFunc("DLVFX", "clip:clip;op:int;scale:float:opt;strength:float:opt", vfxCreate, nullptr, plugin); +}