diff --git a/README.md b/README.md
index 1d529fd..5bf5b67 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,32 @@
 Plugin akarin
 =============
 
+DLVFX
+-----
+`akarin.DLVFX(clip clip, int op[, float scale=1, float strength=0])`
+
+There are three operation modes:
+- `op=0`: artefact reduction. `int strength` controls the strength.
+- `op=1`: super resolution, `scale>1` controls the scale factor. `int strength` controls the enhancement strength.
+- `op=2`: denoising. `float strength` controls the strength. (Not working.)
+
+Only 32-bit floating point RGB clips are supported for now.
+
+This filter requires appropriate [Video Effects library (v0.6 beta)](https://www.nvidia.com/en-us/geforce/broadcasting/broadcast-sdk/resources/) to be instaled. (This library is too large to be bundled with the plugin.)
+This filter also requires RTX-capable Nvidia GPU to run.
+
+DLISR
+-----
+
+`akarin.DLISR(clip clip, [, int scale=2])`
+
+This filter will use Nvidia [NGX Technology](https://developer.nvidia.com/rtx/ngx) DLISR DNN to scale up an input clip.
+Input clip must be in `vs.RGBS` format.
+The `scale` parameter can only be 2/4/8 and note that this filter uses considerable amount of GPU memory (e.g. 2GB for 2x scaling 1080p input)
+
+This filter requires `nvngx_dlisr.dll` to be present in the same directory as this plugin.
+This filter requires RTX-capable Nvidia GPU to run.
+
 Expr
 ----
 
diff --git a/ngx/README.md b/ngx/README.md
new file mode 100644
index 0000000..ee982ea
--- /dev/null
+++ b/ngx/README.md
@@ -0,0 +1,15 @@
+To build a standalone plugin:
+```
+g++ -o akarin2.dll -shared -gdb ngx*.cc -I. -I ../include -static -DSTANDALONE_NGX
+```
+
+To use the plugin:
+Rename the patched `nvngx_dlisr.dll` file as `akarin2.dlisr.dll`, and put in the
+same directory as `akarin2.dll`:
+
+```
+core.std.LoadPlugin(r'/absolute/path/to/akarin2.dll')
+c = core.std.BlankClip(format=vs.RGBS)   # only support RGBS clips
+res = core.akarin2.DLISR(c, scale=2)
+res.set_output()
+```
diff --git a/ngx/autodll.h b/ngx/autodll.h
new file mode 100644
index 0000000..541b788
--- /dev/null
+++ b/ngx/autodll.h
@@ -0,0 +1,41 @@
+#ifndef AUTODLL_H
+#define AUTODLL_H
+
+#include <string>
+#include <vector>
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+
+template<typename T>
+struct importDll {
+#ifdef _WIN32
+	importDll(T &x, const wchar_t *dllName, const char *displayname, std::vector<std::string> &errors, const char *name) {
+		HMODULE h = LoadLibraryW(dllName);
+#else
+	importDll(T &x, const char *dllName, const char *name, std::vector<std::string> &errors) {
+		const char *displayname = dllName;
+		void *h = dlopen(dllName, RTLD_GLOBAL | RTLD_LAZY);
+#endif
+		if (h == 0) {
+			errors.push_back(std::string("unable to load ") + displayname);
+			return;
+		}
+#ifdef _WIN32
+		x = reinterpret_cast<T>(GetProcAddress(h, name));
+#else
+		x = reinterpret_cast<T>(dlsym(h, name));
+#endif
+		if (x == nullptr) {
+			errors.push_back(std::string("unable to find ") + name + " in " + displayname);
+		}
+	}
+};
+
+#define EXT_FN(dll, retty, name, args) \
+	static retty (*name) args = nullptr; \
+	static importDll<retty (*)args> _load ## name(name, dll, #name)
+
+#endif
diff --git a/ngx/cuda.h b/ngx/cuda.h
new file mode 100644
index 0000000..a1f17e9
--- /dev/null
+++ b/ngx/cuda.h
@@ -0,0 +1,259 @@
+#ifndef CUDA_DLL
+#error "CUDA_DLL not defined."
+#endif
+
+#ifndef CUDA_H
+#define CUDA_H
+
+#include "autodll.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void *CUdeviceptr;
+typedef int CUdevice;                 /**< CUDA device */
+typedef struct CUctx_st *CUcontext;   /**< CUDA context */
+typedef struct CUmod_st *CUmodule;    /**< CUDA module */
+typedef struct CUfunc_st *CUfunction; /**< CUDA function */
+typedef struct CUstream_st *CUstream; /**< CUDA stream */
+typedef struct CUevent_st *CUevent;   /**< CUDA event */
+typedef struct CUarray_st *CUarray;
+
+typedef enum {
+    CUDA_SUCCESS = 0,
+    CUDA_ERROR_INVALID_VALUE = 1,
+    CUDA_ERROR_OUT_OF_MEMORY = 2,
+    CUDA_ERROR_NOT_INITIALIZED = 3,
+    CUDA_ERROR_DEINITIALIZED = 4,
+    CUDA_ERROR_PROFILER_DISABLED = 5,
+    CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6,
+    CUDA_ERROR_PROFILER_ALREADY_STARTED = 7,
+    CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8,
+    CUDA_ERROR_NO_DEVICE = 100,
+    CUDA_ERROR_INVALID_DEVICE = 101,
+    CUDA_ERROR_INVALID_IMAGE = 200,
+    CUDA_ERROR_INVALID_CONTEXT = 201,
+    CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,
+    CUDA_ERROR_MAP_FAILED = 205,
+    CUDA_ERROR_UNMAP_FAILED = 206,
+    CUDA_ERROR_ARRAY_IS_MAPPED = 207,
+    CUDA_ERROR_ALREADY_MAPPED = 208,
+    CUDA_ERROR_NO_BINARY_FOR_GPU = 209,
+    CUDA_ERROR_ALREADY_ACQUIRED = 210,
+    CUDA_ERROR_NOT_MAPPED = 211,
+    CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212,
+    CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213,
+    CUDA_ERROR_ECC_UNCORRECTABLE = 214,
+    CUDA_ERROR_UNSUPPORTED_LIMIT = 215,
+    CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216,
+    CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217,
+    CUDA_ERROR_INVALID_PTX = 218,
+    CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
+    CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,
+    CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221,
+    CUDA_ERROR_INVALID_SOURCE = 300,
+    CUDA_ERROR_FILE_NOT_FOUND = 301,
+    CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
+    CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303,
+    CUDA_ERROR_OPERATING_SYSTEM = 304,
+    CUDA_ERROR_INVALID_HANDLE = 400,
+    CUDA_ERROR_NOT_FOUND = 500,
+    CUDA_ERROR_NOT_READY = 600,
+    CUDA_ERROR_ILLEGAL_ADDRESS = 700,
+    CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701,
+    CUDA_ERROR_LAUNCH_TIMEOUT = 702,
+    CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703,
+    CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704,
+    CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705,
+    CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708,
+    CUDA_ERROR_CONTEXT_IS_DESTROYED = 709,
+    CUDA_ERROR_ILLEGAL_INSTRUCTION = 715,
+    CUDA_ERROR_MISALIGNED_ADDRESS = 716,
+    CUDA_ERROR_INVALID_ADDRESS_SPACE = 717,
+    CUDA_ERROR_INVALID_PC = 718,
+    CUDA_ERROR_LAUNCH_FAILED = 719,
+    CUDA_ERROR_NOT_PERMITTED = 800,
+    CUDA_ERROR_NOT_SUPPORTED = 801,
+    CUDA_ERROR_UNKNOWN = 999
+} CUresult;
+
+typedef enum {
+    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,                  /**< Maximum number of threads per block */
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,                        /**< Maximum block dimension X */
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,                        /**< Maximum block dimension Y */
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,                        /**< Maximum block dimension Z */
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,                         /**< Maximum grid dimension X */
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,                         /**< Maximum grid dimension Y */
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,                         /**< Maximum grid dimension Z */
+    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,            /**< Maximum shared memory available per block in bytes */
+    CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,                /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */
+    CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,                  /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */
+    CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,                             /**< Warp size in threads */
+    CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,                             /**< Maximum pitch in bytes allowed by memory copies */
+    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,               /**< Maximum number of 32-bit registers available per block */
+    CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,                   /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */
+    CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,                            /**< Typical clock frequency in kilohertz */
+    CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,                     /**< Alignment requirement for textures */
+    CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,                           /**< Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. */
+    CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,                  /**< Number of multiprocessors on device */
+    CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,                   /**< Specifies whether there is a run time limit on kernels */
+    CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,                            /**< Device is integrated with host memory */
+    CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,                   /**< Device can map host memory into CUDA address space */
+    CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,                          /**< Compute mode (See CUcomputemode for details) */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,               /**< Maximum 1D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,               /**< Maximum 2D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,              /**< Maximum 2D texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,               /**< Maximum 3D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,              /**< Maximum 3D texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,               /**< Maximum 3D texture depth */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,       /**< Maximum 2D layered texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,      /**< Maximum 2D layered texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,      /**< Maximum layers in a 2D layered texture */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,         /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,        /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,     /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS */
+    CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,                     /**< Alignment requirement for surfaces */
+    CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,                    /**< Device can possibly execute multiple kernels concurrently */
+    CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,                           /**< Device has ECC support enabled */
+    CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,                            /**< PCI bus ID of the device */
+    CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,                         /**< PCI device ID of the device */
+    CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,                            /**< Device is using TCC driver model */
+    CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,                     /**< Peak memory clock frequency in kilohertz */
+    CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,               /**< Global memory bus width in bits */
+    CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,                         /**< Size of L2 cache in bytes */
+    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,        /**< Maximum resident threads per multiprocessor */
+    CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,                    /**< Number of asynchronous engines */
+    CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,                    /**< Device shares a unified address space with the host */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,       /**< Maximum 1D layered texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,      /**< Maximum layers in a 1D layered texture */
+    CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,                      /**< Deprecated, do not use. */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,        /**< Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,       /**< Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,     /**< Alternate maximum 3D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,    /**< Alternate maximum 3D texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,     /**< Alternate maximum 3D texture depth */
+    CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,                         /**< PCI domain ID of the device */
+    CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,               /**< Pitch alignment requirement for textures */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,          /**< Maximum cubemap texture width/height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,  /**< Maximum cubemap layered texture width/height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, /**< Maximum layers in a cubemap layered texture */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,               /**< Maximum 1D surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,               /**< Maximum 2D surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,              /**< Maximum 2D surface height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,               /**< Maximum 3D surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,              /**< Maximum 3D surface height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,               /**< Maximum 3D surface depth */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,       /**< Maximum 1D layered surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,      /**< Maximum layers in a 1D layered surface */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,       /**< Maximum 2D layered surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,      /**< Maximum 2D layered surface height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,      /**< Maximum layers in a 2D layered surface */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,          /**< Maximum cubemap surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,  /**< Maximum cubemap layered surface width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, /**< Maximum layers in a cubemap layered surface */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,        /**< Maximum 1D linear texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,        /**< Maximum 2D linear texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,       /**< Maximum 2D linear texture height */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,        /**< Maximum 2D linear texture pitch in bytes */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,     /**< Maximum mipmapped 2D texture width */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,    /**< Maximum mipmapped 2D texture height */
+    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,              /**< Major compute capability version number */
+    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,              /**< Minor compute capability version number */
+    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,     /**< Maximum mipmapped 1D texture width */
+    CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,           /**< Device supports stream priorities */
+    CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,             /**< Device supports caching globals in L1 */
+    CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,              /**< Device supports caching locals in L1 */
+    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,  /**< Maximum shared memory available per multiprocessor in bytes */
+    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,      /**< Maximum number of 32-bit registers available per multiprocessor */
+    CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,                        /**< Device can allocate managed memory on this system */
+    CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,                       /**< Device is on a multi-GPU board */
+    CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,              /**< Unique id for a group of devices on the same multi-GPU board */
+    CU_DEVICE_ATTRIBUTE_MAX
+} CUdevice_attribute;
+
+typedef enum CUmemorytype_enum {
+    CU_MEMORYTYPE_HOST = 0x01,
+    CU_MEMORYTYPE_DEVICE = 0x02,
+    CU_MEMORYTYPE_ARRAY = 0x03,
+    CU_MEMORYTYPE_UNIFIED = 0x04
+} CUmemorytype;
+
+typedef struct CUDA_MEMCPY3D_st {
+    size_t srcXInBytes;         /**< Source X in bytes */
+    size_t srcY;                /**< Source Y */
+    size_t srcZ;                /**< Source Z */
+    size_t srcLOD;              /**< Source LOD */
+    CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */
+    const void *srcHost;        /**< Source host pointer */
+    CUdeviceptr srcDevice;      /**< Source device pointer */
+    CUarray srcArray;           /**< Source array reference */
+    void *reserved0;            /**< Must be NULL */
+    size_t srcPitch;            /**< Source pitch (ignored when src is array) */
+    size_t srcHeight;           /**< Source height (ignored when src is array; may be 0 if Depth==1) */
+
+    size_t dstXInBytes;         /**< Destination X in bytes */
+    size_t dstY;                /**< Destination Y */
+    size_t dstZ;                /**< Destination Z */
+    size_t dstLOD;              /**< Destination LOD */
+    CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */
+    void *dstHost;              /**< Destination host pointer */
+    CUdeviceptr dstDevice;      /**< Destination device pointer */
+    CUarray dstArray;           /**< Destination array reference */
+    void *reserved1;            /**< Must be NULL */
+    size_t dstPitch;            /**< Destination pitch (ignored when dst is array) */
+    size_t dstHeight;           /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */
+
+    size_t WidthInBytes; /**< Width of 3D memory copy in bytes */
+    size_t Height;       /**< Height of 3D memory copy */
+    size_t Depth;        /**< Depth of 3D memory copy */
+} CUDA_MEMCPY3D;
+
+typedef int CUjit_option;
+
+#ifndef CUDA_FN
+#define CUDA_FN(ret, fn, args) EXT_FN(CUDA_DLL, ret, fn, args)
+#endif
+#ifndef CUDA_FN_OPTIONAL
+#define CUDA_FN_OPTIONAL(ret, fn, args)
+#endif
+#ifndef CUDA_FN_3020
+#define CUDA_FN_3020(ret, fn, fn_3020, args) CUDA_FN(ret, fn_3020, args)
+#endif
+#ifndef CUDA_FN_4000
+#define CUDA_FN_4000(ret, fn, fn_4000, args) CUDA_FN(ret, fn_4000, args)
+#endif
+
+CUDA_FN(CUresult, cuInit, (unsigned int Flags));
+CUDA_FN(CUresult, cuDeviceGetCount, (int *count));
+CUDA_FN(CUresult, cuDeviceGet, (CUdevice * device, int ordinal));
+CUDA_FN(CUresult, cuDeviceGetAttribute, (int *, CUdevice_attribute attrib, CUdevice dev));
+CUDA_FN(CUresult, cuDeviceGetName, (char *, int len, CUdevice dev));
+CUDA_FN(CUresult, cuDeviceTotalMem, (size_t *, CUdevice dev));
+CUDA_FN_3020(CUresult, cuCtxCreate, cuCtxCreate_v2, (CUcontext * pctx, unsigned int flags, CUdevice dev));
+CUDA_FN_4000(CUresult, cuCtxDestroy, cuCtxDestroy_v2, (CUcontext pctx));
+CUDA_FN(CUresult, cuCtxGetFlags, (unsigned int * flags));
+CUDA_FN(CUresult, cuCtxGetCurrent, (CUcontext * pctx));
+CUDA_FN(CUresult, cuCtxSetCurrent, (CUcontext ctx));
+CUDA_FN(CUresult, cuCtxPushCurrent, (CUcontext ctx));
+CUDA_FN(CUresult, cuCtxPopCurrent, (CUcontext * pctx));
+CUDA_FN(CUresult, cuProfilerStop, ());
+CUDA_FN(CUresult, cuCtxGetApiVersion, (CUcontext ctx, unsigned int *version));
+CUDA_FN(CUresult, cuCtxGetDevice, (CUdevice *));
+CUDA_FN(CUresult, cuModuleLoadData, (CUmodule * module, const void *image));
+CUDA_FN(CUresult, cuModuleLoadDataEx, (CUmodule * module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues));
+CUDA_FN(CUresult, cuModuleUnload, (CUmodule module));
+CUDA_FN(CUresult, cuModuleGetFunction, (CUfunction * hfunc, CUmodule hmod, const char *name));
+CUDA_FN_3020(CUresult, cuMemAlloc, cuMemAlloc_v2, (CUdeviceptr * dptr, size_t bytesize));
+CUDA_FN_3020(CUresult, cuMemFree, cuMemFree_v2, (CUdeviceptr dptr));
+CUDA_FN_3020(CUresult, cuMemcpyHtoD, cuMemcpyHtoD_v2, (CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount));
+CUDA_FN_3020(CUresult, cuMemcpyDtoH, cuMemcpyDtoH_v2, (void *dstHost, CUdeviceptr srcDevice, size_t ByteCount));
+CUDA_FN_3020(CUresult, cuMemcpyDtoD, cuMemcpyDtoD_v2, (CUdeviceptr dstHost, CUdeviceptr srcDevice, size_t ByteCount));
+
+CUDA_FN(CUresult, cuMemsetD8Async, (CUdeviceptr devPtr, int value, size_t count, CUstream st));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/ngx/internalfilters.h b/ngx/internalfilters.h
new file mode 100644
index 0000000..db8a3a4
--- /dev/null
+++ b/ngx/internalfilters.h
@@ -0,0 +1,8 @@
+#ifndef NGX_INTERNALFILTERS_H
+#define NGX_INTERNALFILTERS_H
+
+#include "VapourSynth.h"
+
+void VS_CC ngxInitialize(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin);
+
+#endif // INTERNALFILTERS_H
diff --git a/ngx/ngx.cc b/ngx/ngx.cc
new file mode 100644
index 0000000..86e6461
--- /dev/null
+++ b/ngx/ngx.cc
@@ -0,0 +1,280 @@
+#include <memory>
+#include <mutex>
+#include <utility>
+#include <string>
+#include <stdexcept>
+#include <vector>
+#include <stdio.h>
+
+#include "VapourSynth.h"
+#include "VSHelper.h"
+
+#ifndef _WIN32
+#error "Unsupported platform"
+#else
+static std::vector<std::string> autoDllErrors;
+static const wchar_t *dllPath(const wchar_t *suffix);
+#define NGX_DLL dllPath(L".dlisr.dll"),".dlisr.dll",autoDllErrors
+#define CUDA_DLL L"nvcuda.dll","nvcuda.dll",autoDllErrors
+#endif
+#include "cuda.h"
+#include "ngx.h"
+
+#define CK_NGX(x) do { \
+    int r = (x); \
+    if (r != NVSDK_NGX_Result_Success) { \
+        fprintf(stderr, "failed NGX call %s: %x\n", #x, r); \
+        abort(); \
+    } \
+} while (0)
+#define CK_CUDA(x) do { \
+    int r = (x); \
+    if (r != CUDA_SUCCESS) { \
+        fprintf(stderr, "failed cuda call %s: %d\n", #x, r); \
+        abort(); \
+    } \
+} while (0)
+
+static const wchar_t *dllPath(const wchar_t *suffix) {
+    static const std::wstring res = [suffix]() -> std::wstring {
+        HMODULE mod = 0;
+        if (GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (char *)dllPath, &mod)) {
+            std::vector<wchar_t> buf;
+            size_t n = 0;
+            do {
+                buf.resize(buf.size() + MAX_PATH);
+                n = GetModuleFileNameW(mod, buf.data(), buf.size());
+            } while (n >= buf.size());
+            buf.resize(n);
+            std::wstring path(buf.begin(), buf.end() - 4);
+            path += suffix;
+            return path;
+        }
+        throw std::runtime_error("unable to locate myself");
+    }();
+    return res.c_str();
+}
+
+static void *cudaMalloc(size_t size) {
+    void *ptr = nullptr;
+    CK_CUDA(cuMemAlloc_v2(&ptr, size));
+    return ptr;
+}
+
+struct NgxData {
+    std::mutex lock;
+
+    VSNodeRef *node;
+    VSVideoInfo vi;
+    int scale;
+
+    typedef float T;
+    uint64_t pixel_size() const { return 3 * sizeof(T); }
+    uint64_t in_image_width() const   { return vi.width / scale; }
+    uint64_t out_image_width() const  { return vi.width; }
+    uint64_t in_image_height() const  { return vi.height / scale; }
+    uint64_t out_image_height() const { return vi.height; }
+    uint64_t in_image_row_bytes() const  { return pixel_size() * in_image_width(); }
+    uint64_t out_image_row_bytes() const { return pixel_size() * out_image_width(); }
+    uint64_t in_size() const  { return in_image_height()  * in_image_row_bytes(); }
+    uint64_t out_size() const { return out_image_height() * out_image_row_bytes(); }
+
+    NVSDK_NGX_Parameter *param;
+    NVSDK_NGX_Handle *DUHandle;
+    CUcontext ctx;
+
+    std::vector<uint8_t> in_host, out_host;
+    CUdeviceptr inp, outp;
+    void allocate() {
+        in_host.resize(in_size());
+        out_host.resize(out_size());
+        inp = cudaMalloc(in_size());
+        outp = cudaMalloc(out_size());
+    }
+
+    NgxData() : node(nullptr), vi(), scale(0), param(nullptr), DUHandle(nullptr), ctx(nullptr), inp(nullptr), outp(nullptr) {}
+    ~NgxData() {
+        if (ctx) {
+            CK_CUDA(cuCtxPushCurrent(ctx));
+            if (inp) CK_CUDA(cuMemFree_v2(inp));
+            if (outp) CK_CUDA(cuMemFree_v2(outp));
+            if (DUHandle) CK_NGX(NVSDK_NGX_CUDA_ReleaseFeature(DUHandle));
+            cuCtxPopCurrent(nullptr);
+        }
+    }
+};
+
+static void VS_CC ngxInit(VSMap *in, VSMap *out, void **instanceData, VSNode *node, VSCore *core, const VSAPI *vsapi) {
+    NgxData *d = static_cast<NgxData *>(*instanceData);
+    vsapi->setVideoInfo(&d->vi, 1, node);
+}
+
+static const VSFrameRef *VS_CC ngxGetFrame(int n, int activationReason, void **instanceData, void **frameData, VSFrameContext *frameCtx, VSCore *core, const VSAPI *vsapi) {
+    NgxData *d = static_cast<NgxData *>(*instanceData);
+
+    if (activationReason == arInitial) {
+        vsapi->requestFrameFilter(n, d->node, frameCtx);
+    } else if (activationReason == arAllFramesReady) {
+        const VSFrameRef *src = vsapi->getFrameFilter(n, d->node, frameCtx);
+
+        const VSFormat *fi = d->vi.format;
+        assert(vsapi->getFrameHeight(src, 0) == (int)d->in_image_height());
+        assert(vsapi->getFrameWidth(src, 0) == (int)d->in_image_width());
+        int planes[3] = { 0, 1, 2 };
+        const VSFrameRef *srcf[3] = { nullptr, nullptr, nullptr };
+        VSFrameRef *dst = vsapi->newVideoFrame2(fi, d->out_image_width(), d->out_image_height(), srcf, planes, src, core);
+
+        // The NGX API is not thread safe.
+        std::lock_guard<std::mutex> lock(d->lock);
+        CK_CUDA(cuCtxPushCurrent(d->ctx));
+
+        auto params = d->param;
+        params->Set(NVSDK_NGX_Parameter_Width, (uint64_t)d->in_image_width());
+        params->Set(NVSDK_NGX_Parameter_Height, (uint64_t)d->in_image_height());
+        params->Set(NVSDK_NGX_Parameter_Scale, d->scale);
+
+        // Create the feature
+        //CK_NGX(NVSDK_NGX_CUDA_CreateFeature(NVSDK_NGX_Feature_ImageSuperResolution, params, &d->DUHandle));
+
+        void *in_image_dev_ptr = d->inp;
+        void *out_image_dev_ptr = d->outp;;
+
+        uint8_t *host = d->in_host.data();
+        typedef float T;
+        const T factor = 255.0f;
+        for (int plane = 0; plane < 3; plane++) {
+            const size_t stride = vsapi->getStride(src, plane);
+            const uint8_t *ptr = (uint8_t*)vsapi->getReadPtr(src, plane);
+            for (size_t i = 0; i < d->in_image_height(); i++)
+                for (size_t j = 0; j < d->in_image_width(); j++)
+                    *(T*)&host[i * d->in_image_row_bytes() + j * d->pixel_size() + plane * sizeof(T)] = *(T*)&ptr[i * stride + j * sizeof(T)] * factor;
+        }
+        CK_CUDA(cuMemcpyHtoD_v2(in_image_dev_ptr, host, d->in_size()));
+
+        // Pass the pointers to the GPU allocations to the
+        // parameter block along with the format and size.
+        params->Set(NVSDK_NGX_Parameter_Color_SizeInBytes, d->in_size());
+        params->Set(NVSDK_NGX_Parameter_Color_Format, NVSDK_NGX_Buffer_Format_RGB32F);
+        params->Set(NVSDK_NGX_Parameter_Color, in_image_dev_ptr);
+        params->Set(NVSDK_NGX_Parameter_Output_SizeInBytes, d->out_size());
+        params->Set(NVSDK_NGX_Parameter_Output_Format, NVSDK_NGX_Buffer_Format_RGB32F);
+        params->Set(NVSDK_NGX_Parameter_Output, out_image_dev_ptr);
+
+        // Execute the feature.
+        CK_NGX(NVSDK_NGX_CUDA_EvaluateFeature(d->DUHandle, params, nullptr));
+
+        host = d->out_host.data();
+        CK_CUDA(cuMemcpyDtoH_v2(host, out_image_dev_ptr, d->out_size()));
+        for (int plane = 0; plane < 3; plane++) {
+            const size_t stride = vsapi->getStride(dst, plane);
+            uint8_t *ptr = (uint8_t*)vsapi->getWritePtr(dst, plane);
+            for (size_t i = 0; i < d->out_image_height(); i++)
+                for (size_t j = 0; j < d->out_image_width(); j++)
+                    *(T*)&ptr[i * stride + j * sizeof(T)] = *(T*)&host[i * d->out_image_row_bytes() + j * d->pixel_size() + plane * sizeof(T)] / factor;
+        }
+
+        cuCtxPopCurrent(nullptr);
+
+        vsapi->freeFrame(src);
+        return dst;
+    }
+
+    return nullptr;
+}
+
+static void VS_CC ngxFree(void *instanceData, VSCore *core, const VSAPI *vsapi) {
+    NgxData *d = static_cast<NgxData *>(instanceData);
+    vsapi->freeNode(d->node);
+
+    delete d;
+}
+
+static void VS_CC ngxCreate(const VSMap *in, VSMap *out, void *userData, VSCore *core, const VSAPI *vsapi) {
+    std::unique_ptr<NgxData> d(new NgxData);
+    int err;
+
+    try {
+        if (autoDllErrors.size() > 0) {
+                std::string error, last;
+                for (const auto &s: autoDllErrors) {
+                    if (error.size()) {
+                        if (last != s)
+                            error += "; " + s;
+                    } else
+                        error = s;
+                    last = s;
+                }
+                throw std::runtime_error(error);
+        }
+
+        d->node = vsapi->propGetNode(in, "clip", 0, &err);
+        d->vi = *vsapi->getVideoInfo(d->node);
+
+        if (!isConstantFormat(&d->vi)) {
+            throw std::runtime_error("Only clips with constant format and dimensions allowed");
+        }
+        if (d->vi.format->numPlanes != 3 || d->vi.format->colorFamily != cmRGB)
+            throw std::runtime_error("input clip must be RGB format");
+        if (d->vi.format->sampleType != stFloat || d->vi.format->bitsPerSample != 32)
+            throw std::runtime_error("input clip must be 32-bit float format");
+
+        int scale = int64ToIntS(vsapi->propGetInt(in, "scale", 0, &err));
+        if (err) scale = 2;
+        if (scale != 2 && scale != 4 && scale != 8)
+            throw std::runtime_error("scale must be 2/4/8");
+        d->scale = scale;
+    } catch (std::runtime_error &e) {
+        if (d->node)
+            vsapi->freeNode(d->node);
+        vsapi->setError(out, (std::string{ "DLISR: " } + e.what()).c_str());
+        return;
+    }
+
+    d->vi.width *= d->scale;
+    d->vi.height *= d->scale;
+
+    static bool inited = []() -> bool {
+        CUcontext ctx;
+        bool hasCtx = cuCtxGetCurrent(&ctx) == CUDA_SUCCESS;
+        CK_NGX(NVSDK_NGX_CUDA_Init(0, L"./", NVSDK_NGX_Version_API));
+        // We don't expect NVSDK_NGX_CUDA_Init to create a context, but if it did, we need to
+        // switch to save a global CUDA context, instead of the pre-filter context.
+        if (!hasCtx && cuCtxGetCurrent(&ctx) == CUDA_SUCCESS) {
+            fprintf(stderr, "invariant violated: NVSDK_NGX_CUDA_Init created CUDA context: %p\n", ctx);
+            abort();
+        }
+        return true;
+    }();
+    (void) inited;
+    NV_new_Parameter(&d->param);
+
+    d->param->Set(NVSDK_NGX_Parameter_Width, d->in_image_width());
+    d->param->Set(NVSDK_NGX_Parameter_Height, d->in_image_height());
+    d->param->Set(NVSDK_NGX_Parameter_Scale, d->scale);
+
+    // Get the scratch buffer size and create the scratch allocation.
+    size_t byteSize{ 0u };
+    CK_NGX(NVSDK_NGX_CUDA_GetScratchBufferSize(NVSDK_NGX_Feature_ImageSuperResolution, d->param, &byteSize));
+    if (byteSize != 0) // should request none.
+        abort();
+
+    // Create the feature
+    CK_NGX(NVSDK_NGX_CUDA_CreateFeature(NVSDK_NGX_Feature_ImageSuperResolution, d->param, &d->DUHandle));
+    CK_CUDA(cuCtxGetCurrent(&d->ctx));
+    d->allocate();
+    CK_CUDA(cuCtxPopCurrent(nullptr));
+
+    vsapi->createFilter(in, out, "DLISR", ngxInit, ngxGetFrame, ngxFree, fmParallel, 0, d.release(), core);
+}
+
+//////////////////////////////////////////
+// Init
+
+#ifndef STANDALONE_NGX
+void VS_CC ngxInitialize(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin) {
+#else
+extern "C" void VS_CC VapourSynthPluginInit(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin) {
+    configFunc("info.akarin.plugin", "akarin2", "Experimental Nvidia DLISR plugin", VAPOURSYNTH_API_VERSION, 1, plugin);
+#endif
+    registerFunc("DLISR", "clip:clip;scale:int:opt", ngxCreate, nullptr, plugin);
+}
diff --git a/ngx/ngx.h b/ngx/ngx.h
new file mode 100644
index 0000000..9e1b23c
--- /dev/null
+++ b/ngx/ngx.h
@@ -0,0 +1,228 @@
+#if !defined(NGX_DLL) && !defined(NGX_NO_FUNCS)
+#error "NGX_DLL not defined."
+#endif
+
+#ifndef NGX_H
+#define NGX_H
+
+#include <stdint.h>
+
+struct ID3D12Resource;
+struct ID3D11Resource;
+
+enum NVSDK_NGX_Version : int {
+	NVSDK_NGX_Version_API = 0x12,
+};
+
+enum NVSDK_NGX_Result : uint32_t {
+	NVSDK_NGX_Result_Success = 0x1,
+	NVSDK_NGX_Result_Fail = 0xBAD00000,
+	NVSDK_NGX_Result_FAIL_FeatureNotSupported = 0xBAD00001,
+	NVSDK_NGX_Result_FAIL_PlatformError = 0xBAD00002,
+	NVSDK_NGX_Result_FAIL_FeatureAlreadyExists = 0xBAD00003,
+	NVSDK_NGX_Result_FAIL_FeatureNotFound = 0xBAD00004,
+	NVSDK_NGX_Result_FAIL_InvalidParameter = 0xBAD00005,
+	NVSDK_NGX_Result_FAIL_ScratchBufferTooSmall = 0xBAD00006,
+	NVSDK_NGX_Result_FAIL_NotInitialized = 0xBAD00007,
+	NVSDK_NGX_Result_FAIL_UnsupportedInputFormat = 0xBAD00008,
+	NVSDK_NGX_Result_FAIL_RWFlagMissing = 0xBAD00009,
+	NVSDK_NGX_Result_FAIL_MissingInput = 0xBAD0000A,
+	NVSDK_NGX_Result_FAIL_UnableToInitializeFeature = 0xBAD0000B,
+	NVSDK_NGX_Result_FAIL_OutOfDate = 0xBAD0000C,
+	NVSDK_NGX_Result_FAIL_OutOfGPUMemory = 0xBAD0000D,
+	NVSDK_NGX_Result_FAIL_UnsupportedFormat = 0xBAD0000E,
+	NVSDK_NGX_Result_FAIL_UnableToWriteToAppDataPath = 0xBAD0000F,
+	NVSDK_NGX_Result_FAIL_UnsupportedParameter = 0xBAD00010,
+};
+
+enum NVSDK_NGX_Buffer_Format : int {
+	NVSDK_NGX_Buffer_Format_Unknown = 0x0,
+	NVSDK_NGX_Buffer_Format_RGB8UI = 0x1,
+	NVSDK_NGX_Buffer_Format_RGB16F = 0x2,
+	NVSDK_NGX_Buffer_Format_RGB32F = 0x3,
+	NVSDK_NGX_Buffer_Format_RGBA8UI = 0x4,
+	NVSDK_NGX_Buffer_Format_RGBA16F = 0x5,
+	NVSDK_NGX_Buffer_Format_RGBA32F = 0x6,
+};
+
+enum NVSDK_NGX_Feature : int {
+	NVSDK_NGX_Feature_Reserved0 = 0x0,
+	NVSDK_NGX_Feature_SuperSampling = 0x1,
+	NVSDK_NGX_Feature_InPainting = 0x2,
+	NVSDK_NGX_Feature_ImageSuperResolution = 0x3,
+	NVSDK_NGX_Feature_SlowMotion = 0x4,
+	NVSDK_NGX_Feature_VideoSuperResolution = 0x5,
+	NVSDK_NGX_Feature_Reserved6 = 0x6,
+	NVSDK_NGX_Feature_Reserved7 = 0x7,
+	NVSDK_NGX_Feature_Reserved8 = 0x8,
+	NVSDK_NGX_Feature_ImageSignalProcessing = 0x9,
+	NVSDK_NGX_Feature_Count = 0xA,
+};
+
+struct NVSDK_NGX_Parameter {
+	virtual void Set(const char *, void *) = 0;
+	virtual void Set(const char *, struct ID3D12Resource *) = 0;
+	virtual void Set(const char *, struct ID3D11Resource *) = 0;
+	virtual void Set(const char *, int) = 0;
+	virtual void Set(const char *, unsigned int) = 0;
+	virtual void Set(const char *, long double) = 0;
+	virtual void Set(const char *, float) = 0;
+	virtual void Set(const char *, uint64_t) = 0;
+	virtual NVSDK_NGX_Result Get(const char *, void **) = 0;
+	virtual NVSDK_NGX_Result Get(const char *, struct ID3D12Resource **) = 0;
+	virtual NVSDK_NGX_Result Get(const char *, struct ID3D11Resource **) = 0;
+	virtual NVSDK_NGX_Result Get(const char *, int *) = 0;
+	virtual NVSDK_NGX_Result Get(const char *, unsigned int *) = 0;
+	virtual NVSDK_NGX_Result Get(const char *, long double *) = 0;
+	virtual NVSDK_NGX_Result Get(const char *, float *) = 0;
+	virtual NVSDK_NGX_Result Get(const char *, uint64_t *) = 0;
+	virtual void Reset() = 0;
+};
+
+void NV_new_Parameter(NVSDK_NGX_Parameter **p);
+
+struct NVSDK_NGX_Handle {
+	NVSDK_NGX_Feature Id;
+};
+
+// Parameters
+#define NVSDK_NGX_Parameter_ImageSuperResolution_Available "ImageSuperResolution.Available"
+#define NVSDK_NGX_Parameter_Width "Width"
+#define NVSDK_NGX_Parameter_Height "Height"
+#define NVSDK_NGX_Parameter_Scale "Scale"
+#define NVSDK_NGX_Parameter_Scratch "Scratch"
+#define NVSDK_NGX_Parameter_Scratch_SizeInBytes "Scratch.SizeInBytes"
+#define NVSDK_NGX_Parameter_Color_SizeInBytes "Color.SizeInBytes"
+#define NVSDK_NGX_Parameter_Color_Format "Color.Format"
+#define NVSDK_NGX_Parameter_Color "Color"
+#define NVSDK_NGX_Parameter_Output_SizeInBytes "Output.SizeInBytes"
+#define NVSDK_NGX_Parameter_Output_Format "Output.Format"
+#define NVSDK_NGX_Parameter_Output "Output"
+
+#ifndef NGX_NO_FUNCS
+#include "autodll.h"
+
+// NVSDK_NGX_Init
+// -------------------------------------
+// 
+// InApplicationId:
+//      Unique Id provided by NVIDIA
+//
+// InApplicationDataPath:
+//      Folder to store logs and other temporary files (write access required)
+//
+// InDevice: [d3d11/12 only]
+//      DirectX device to use
+//
+// DESCRIPTION:
+//      Initializes new SDK instance.
+//
+EXT_FN(NGX_DLL, NVSDK_NGX_Result, NVSDK_NGX_CUDA_Init, (unsigned long long InApplicationId, const wchar_t *InApplicationDataPath, NVSDK_NGX_Version InSDKVersion/* = NVSDK_NGX_Version_API*/));
+
+// NVSDK_NGX_GetScratchBufferSize
+// ----------------------------------------------------------
+//
+// InFeatureId:
+//      AI feature in question
+//
+// InParameters:
+//      Parameters used by the feature to help estimate scratch buffer size
+//
+// OutSizeInBytes:
+//      Number of bytes needed for the scratch buffer for the specified feature.
+//
+// DESCRIPTION:
+//      SDK needs a buffer of a certain size provided by the client in
+//      order to initialize AI feature. Once feature is no longer
+//      needed buffer can be released. It is safe to reuse the same
+//      scratch buffer for different features as long as minimum size
+//      requirement is met for all features. Please note that some
+//      features might not need a scratch buffer so return size of 0
+//      is completely valid.
+//
+EXT_FN(NGX_DLL, NVSDK_NGX_Result, NVSDK_NGX_CUDA_GetScratchBufferSize, (NVSDK_NGX_Feature InFeatureId, const NVSDK_NGX_Parameter *InParameters, size_t *OutSizeInBytes));
+
+// NVSDK_NGX_CreateFeature
+// -------------------------------------
+//
+// InCmdList:[d3d12 only]
+//      Command list to use to execute GPU commands. Must be:
+//      - Open and recording 
+//      - With node mask including the device provided in NVSDK_NGX_D3D12_Init
+//      - Execute on non-copy command queue.
+// InDevCtx: [d3d11 only]
+//      Device context to use to execute GPU commands
+//
+// InFeatureID:
+//      AI feature to initialize
+//
+// InParameters:
+//      List of parameters 
+// 
+// OutHandle:
+//      Handle which uniquely identifies the feature. If feature with
+//      provided parameters already exists the "already exists" error code is returned.
+//
+// DESCRIPTION:
+//      Each feature needs to be created before it can be used. 
+//      Refer to the sample code to find out which input parameters
+//      are needed to create specific feature.
+//
+EXT_FN(NGX_DLL, NVSDK_NGX_Result, NVSDK_NGX_CUDA_CreateFeature, (NVSDK_NGX_Feature InFeatureID, const NVSDK_NGX_Parameter *InParameters, NVSDK_NGX_Handle **OutHandle));
+
+// NVSDK_NGX_EvaluateFeature
+// -------------------------------------
+//
+// InCmdList:[d3d12 only]
+//      Command list to use to execute GPU commands. Must be:
+//      - Open and recording 
+//      - With node mask including the device provided in NVSDK_NGX_D3D12_Init
+//      - Execute on non-copy command queue.
+// InDevCtx: [d3d11 only]
+//      Device context to use to execute GPU commands
+//
+// InFeatureHandle:
+//      Handle representing feature to be evaluated
+// 
+// InParameters:
+//      List of parameters required to evaluate feature
+//
+// InCallback:
+//      Optional callback for features which might take longer
+//      to execute. If specified SDK will call it with progress
+//      values in range 0.0f - 1.0f. Client application can indicate
+//      that evaluation should be cancelled by setting OutShouldCancel
+//      to true.
+//
+// DESCRIPTION:
+//      Evaluates given feature using the provided parameters and
+//      pre-trained NN. Please note that for most features
+//      it can be beneficial to pass as many input buffers and parameters
+//      as possible (for example provide all render targets like color, albedo, normals, depth etc)
+//
+typedef void (*PFN_NVSDK_NGX_ProgressCallback)(float InCurrentProgress, bool &OutShouldCancel);
+EXT_FN(NGX_DLL, NVSDK_NGX_Result, NVSDK_NGX_CUDA_EvaluateFeature, (const NVSDK_NGX_Handle *InFeatureHandle, const NVSDK_NGX_Parameter *InParameters, PFN_NVSDK_NGX_ProgressCallback InCallback /* = nullptr */));
+
+// NVSDK_NGX_Release
+// -------------------------------------
+// 
+// InHandle:
+//      Handle to feature to be released
+//
+// DESCRIPTION:
+//      Releases feature with a given handle.
+//      Handles are not reference counted so
+//      after this call it is invalid to use provided handle.
+//
+EXT_FN(NGX_DLL, NVSDK_NGX_Result, NVSDK_NGX_CUDA_ReleaseFeature, (NVSDK_NGX_Handle *InHandle));
+
+// NVSDK_NGX_Shutdown
+// -------------------------------------
+// 
+// DESCRIPTION:
+//      Shuts down the current SDK instance and releases all resources.
+//
+EXT_FN(NGX_DLL, NVSDK_NGX_Result, NVSDK_NGX_CUDA_Shutdown, ());
+#endif // !defined(NGX_NO_FUNCS)
+
+#endif // defined(NGX_H)
diff --git a/ngx/ngximpl.cc b/ngx/ngximpl.cc
new file mode 100644
index 0000000..62d3395
--- /dev/null
+++ b/ngx/ngximpl.cc
@@ -0,0 +1,78 @@
+#include <stdint.h>
+#include <string>
+#include <map>
+
+#include <stdio.h>
+
+#define NGX_NO_FUNCS
+#include "ngx.h"
+
+struct NVSDK_NGX_Parameter_Impl: public NVSDK_NGX_Parameter {
+	union u {
+		void *p;
+		long double d;
+		uint64_t u64;
+
+		u() : u64(0) {}
+		u(void *p) : p(p) {}
+		u(int i) : u64((int64_t)i) {}
+		u(unsigned int ui) : u64(ui) {}
+		u(float f) : d(f) {}
+		u(long double d) : d(d) {}
+		u(uint64_t u64) : u64(u64) {}
+		operator void *() const { return p; }
+		operator int() const { return (int)u64; }
+		operator unsigned int() const { return u64; }
+		operator float() const { return d; }
+		operator long double() const { return d; }
+		operator uint64_t() const { return u64; }
+	};
+	std::map<std::string, u> map;
+	template<typename T> NVSDK_NGX_Result get(const char *name, T* p) {
+		auto it = map.find(name);
+		if (it != map.end()) {
+			*p = it->second;
+			return NVSDK_NGX_Result_Success;
+		}
+		return NVSDK_NGX_Result_Fail;
+	}
+	void Set(const char *, void *) override;
+	void Set(const char *, struct ID3D12Resource *) override;
+	void Set(const char *, struct ID3D11Resource *) override;
+	void Set(const char *, int) override;
+	void Set(const char *, unsigned int) override;
+	void Set(const char *, long double) override;
+	void Set(const char *, float) override;
+	void Set(const char *, uint64_t) override;
+	NVSDK_NGX_Result Get(const char *, void **) override;
+	NVSDK_NGX_Result Get(const char *, struct ID3D12Resource **) override;
+	NVSDK_NGX_Result Get(const char *, struct ID3D11Resource **) override;
+	NVSDK_NGX_Result Get(const char *, int *) override;
+	NVSDK_NGX_Result Get(const char *, unsigned int *) override;
+	NVSDK_NGX_Result Get(const char *, long double *) override;
+	NVSDK_NGX_Result Get(const char *, float *) override;
+	NVSDK_NGX_Result Get(const char *, uint64_t *) override;
+	void Reset() override;
+};
+
+void NVSDK_NGX_Parameter_Impl::Set(const char *name, void *p) { map.insert({ name, u(p) }); }
+void NVSDK_NGX_Parameter_Impl::Set(const char *name, struct ID3D12Resource *p) { map.insert({ name, u((void*)p) }); }
+void NVSDK_NGX_Parameter_Impl::Set(const char *name, struct ID3D11Resource *p) { map.insert({ name, u((void*)p) }); }
+void NVSDK_NGX_Parameter_Impl::Set(const char *name, int i) { map.insert({ name, u(i) }); }
+void NVSDK_NGX_Parameter_Impl::Set(const char *name, unsigned int ui) { map.insert({ name, u(ui) }); }
+void NVSDK_NGX_Parameter_Impl::Set(const char *name, long double d) { map.insert({ name, u(d) }); }
+void NVSDK_NGX_Parameter_Impl::Set(const char *name, float f) { map.insert({ name, u(f) }); }
+void NVSDK_NGX_Parameter_Impl::Set(const char *name, uint64_t u64) { map.insert({ name, u(u64) }); }
+NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, void **p) { return get<void*>(name, p); }
+NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, struct ID3D12Resource **p) { return get<void*>(name, (void**)p); }
+NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, struct ID3D11Resource **p) { return get<void*>(name, (void**)p); }
+NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, int *p) { return get<int>(name, p); }
+NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, unsigned int *p) { return get<unsigned int>(name, p); }
+NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, long double *p) { return get<long double>(name, p); }
+NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, float *p) { return get<float>(name, p); }
+NVSDK_NGX_Result NVSDK_NGX_Parameter_Impl::Get(const char *name, uint64_t *p) { return get<uint64_t>(name, p); }
+void NVSDK_NGX_Parameter_Impl::Reset() { map.clear(); }
+
+void NV_new_Parameter(NVSDK_NGX_Parameter **p) {
+	*p = new NVSDK_NGX_Parameter_Impl();
+}
diff --git a/plugin.cpp b/plugin.cpp
index fe7aa1f..fb40fd4 100644
--- a/plugin.cpp
+++ b/plugin.cpp
@@ -1,8 +1,12 @@
 #include "VapourSynth.h"
 
 #include "expr/internalfilters.h"
+#include "ngx/internalfilters.h"
+#include "vfx/internalfilters.h"
 
 VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin) {
     configFunc("info.akarin.vsplugin", "akarin", "Akarin's Experimental Filters", VAPOURSYNTH_API_VERSION, 1, plugin);
     exprInitialize(configFunc, registerFunc, plugin);
+    ngxInitialize(configFunc, registerFunc, plugin);
+    vfxInitialize(configFunc, registerFunc, plugin);
 }
diff --git a/vfx/README.md b/vfx/README.md
new file mode 100644
index 0000000..3d7bae7
--- /dev/null
+++ b/vfx/README.md
@@ -0,0 +1,35 @@
+First install appropriate Video Effects library (v0.6 beta) from https://www.nvidia.com/en-us/geforce/broadcasting/broadcast-sdk/resources/.
+Make sure your environment is setup correctly by downloading
+[opencv_world346.dll](https://github.com/NVIDIA/MAXINE-VFX-SDK/blob/master/samples/external/opencv/bin/opencv_world346.dll) and
+[VideoEffectsApp.exe](https://github.com/NVIDIA/MAXINE-VFX-SDK/blob/master/samples/VideoEffectsApp/VideoEffectsApp.exe).
+Play with VideoEffectsApp.exe to make sure it works before proceeding.
+
+And then build the plugin like this with mingw:
+```
+g++ -DSTANDALONE_VFX -o akarin2.dll -shared -static vfx.cc -I ../include nvvfx/src/*.cpp -I nvvfx/include -Wall -O2
+```
+
+Example code:
+```python
+import os, os.path
+import vapoursynth as vs
+core = vs.core
+import mvsfunc as mvf
+
+core.std.LoadPlugin(os.path.abspath(os.path.join(os.getcwd(), 'akarin2.dll')))
+
+c = core.imwri.Read('input.png')
+c = mvf.Depth(c, 32) # only supports vs.RGBS formats
+
+# OP_AR: Artefact reduction, OP_SUPERRES: super resolution, OP_DENOISE: denoise.
+# strength: 0 for weak effect (weaker enhancement), 1 for strong effect (enhancement).
+# scale = 2/3/4 for super resolution, otherwise unused.
+OP_AR, OP_SUPERRES, OP_DENOISE = range(3)
+d = core.akarin2.DLVFX(c, op=OP_SUPERRES, scale=2, strength=0)
+
+d = core.imwri.Write(d, 'PNG', 'out-%d.png')
+
+d.set_output()
+```
+
+This plugin is provided as is, and I haven't been able to test it locally.
diff --git a/vfx/internalfilters.h b/vfx/internalfilters.h
new file mode 100644
index 0000000..1407114
--- /dev/null
+++ b/vfx/internalfilters.h
@@ -0,0 +1,8 @@
+#ifndef VFX_INTERNALFILTERS_H
+#define VFX_INTERNALFILTERS_H
+
+#include "VapourSynth.h"
+
+void VS_CC vfxInitialize(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin);
+
+#endif // INTERNALFILTERS_H
diff --git a/vfx/nvvfx/NVOSSLicense.txt b/vfx/nvvfx/NVOSSLicense.txt
new file mode 100644
index 0000000..ce0ca00
--- /dev/null
+++ b/vfx/nvvfx/NVOSSLicense.txt
@@ -0,0 +1,25 @@
+The contents of this folder are governed by the MIT license
+
+Copyright (C) 2019, NVIDIA Corporation, all rights reserved.
+
+                       MIT License
+					   
+Permission is hereby granted, free of charge, to any person 
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+ 
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+ 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/vfx/nvvfx/include/nvCVImage.h b/vfx/nvvfx/include/nvCVImage.h
new file mode 100644
index 0000000..c5ed9ed
--- /dev/null
+++ b/vfx/nvvfx/include/nvCVImage.h
@@ -0,0 +1,674 @@
+/*###############################################################################
+#
+# Copyright 2020-2021 NVIDIA Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+###############################################################################*/
+
+#ifndef __NVCVIMAGE_H__
+#define __NVCVIMAGE_H__
+
+#include "nvCVStatus.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // ___cplusplus
+
+
+#ifndef   RTX_CAMERA_IMAGE    // Compile with -DRTX_CAMERA_IMAGE=0 to get more functionality and bug fixes.
+  #define RTX_CAMERA_IMAGE  0 // Set to 1 for RTXCamera, which needs an old version, that avoids new functionality
+#endif // RTX_CAMERA_IMAGE
+
+
+struct CUstream_st;   // typedef struct CUstream_st *CUstream;
+
+//! The format of pixels in an image.
+typedef enum NvCVImage_PixelFormat {
+  NVCV_FORMAT_UNKNOWN  = 0,    //!< Unknown pixel format.
+  NVCV_Y               = 1,    //!< Luminance (gray).
+  NVCV_A               = 2,    //!< Alpha (opacity)
+  NVCV_YA              = 3,    //!< { Luminance, Alpha }
+  NVCV_RGB             = 4,    //!< { Red, Green, Blue }
+  NVCV_BGR             = 5,    //!< { Red, Green, Blue }
+  NVCV_RGBA            = 6,    //!< { Red, Green, Blue, Alpha }
+  NVCV_BGRA            = 7,    //!< { Red, Green, Blue, Alpha }
+#if RTX_CAMERA_IMAGE
+  NVCV_YUV420          = 8,    //!< Luminance and subsampled Chrominance { Y, Cb, Cr }
+  NVCV_YUV422          = 9,    //!< Luminance and subsampled Chrominance { Y, Cb, Cr }
+#else // !RTX_CAMERA_IMAGE
+  NVCV_ARGB            = 8,    //!< { Red, Green, Blue, Alpha }
+  NVCV_ABGR            = 9,    //!< { Red, Green, Blue, Alpha }
+  NVCV_YUV420          = 10,   //!< Luminance and subsampled Chrominance { Y, Cb, Cr }
+  NVCV_YUV422          = 11,   //!< Luminance and subsampled Chrominance { Y, Cb, Cr }
+#endif // !RTX_CAMERA_IMAGE
+  NVCV_YUV444          = 12,   //!< Luminance and full bandwidth Chrominance { Y, Cb, Cr }
+} NvCVImage_PixelFormat;
+
+
+//! The data type used to represent each component of an image.
+typedef enum NvCVImage_ComponentType {
+  NVCV_TYPE_UNKNOWN  = 0,      //!< Unknown type of component.
+  NVCV_U8            = 1,      //!< Unsigned 8-bit integer.
+  NVCV_U16           = 2,      //!< Unsigned 16-bit integer.
+  NVCV_S16           = 3,      //!< Signed 16-bit integer.
+  NVCV_F16           = 4,      //!< 16-bit floating-point.
+  NVCV_U32           = 5,      //!< Unsigned 32-bit integer.
+  NVCV_S32           = 6,      //!< Signed 32-bit integer.
+  NVCV_F32           = 7,      //!< 32-bit floating-point (float).
+  NVCV_U64           = 8,      //!< Unsigned 64-bit integer.
+  NVCV_S64           = 9,      //!< Signed 64-bit integer.
+  NVCV_F64           = 10,     //!< 64-bit floating-point (double).
+} NvCVImage_ComponentType;
+
+
+//! Value for the planar field or layout argument. Two values are currently accommodated for RGB:
+//! Interleaved or chunky storage locates all components of a pixel adjacent in memory,
+//! e.g. RGBRGBRGB... (denoted [RGB]).
+//! Planar storage locates the same component of all pixels adjacent in memory,
+//! e.g. RRRRR...GGGGG...BBBBB... (denoted [R][G][B])
+//! YUV has many more variants.
+//! 4:2:2 can be chunky, planar or semi-planar, with different orderings.
+//! 4:2:0 can be planar or semi-planar, with different orderings.
+//! Aliases are provided for FOURCCs defined at fourcc.org.
+//! Note: the LSB can be used to distinguish between chunky and planar formats.
+#define NVCV_INTERLEAVED   0   //!< All components of pixel(x,y) are adjacent (same as chunky) (default for non-YUV).
+#define NVCV_CHUNKY        0   //!< All components of pixel(x,y) are adjacent (same as interleaved).
+#define NVCV_PLANAR        1   //!< The same component of all pixels are adjacent.
+#define NVCV_UYVY          2   //!< [UYVY]    Chunky 4:2:2 (default for 4:2:2)
+#define NVCV_VYUY          4   //!< [VYUY]    Chunky 4:2:2
+#define NVCV_YUYV          6   //!< [YUYV]    Chunky 4:2:2
+#define NVCV_YVYU          8   //!< [YVYU]    Chunky 4:2:2
+#define NVCV_CYUV         10   //!< [YUV]     Chunky 4:4:4
+#define NVCV_CYVU         12   //!< [YVU]     Chunky 4:4:4
+#define NVCV_YUV           3   //!< [Y][U][V] Planar 4:2:2 or 4:2:0 or 4:4:4
+#define NVCV_YVU           5   //!< [Y][V][U] Planar 4:2:2 or 4:2:0 or 4:4:4
+#define NVCV_YCUV          7   //!< [Y][UV]   Semi-planar 4:2:2 or 4:2:0 (default for 4:2:0)
+#define NVCV_YCVU          9   //!< [Y][VU]   Semi-planar 4:2:2 or 4:2:0
+
+//! The following are FOURCC aliases for specific layouts. Note that it is still required to specify the format as well
+//! as the layout, e.g. NVCV_YUV420 and NVCV_NV12, even though the NV12 layout is only associated with YUV420 sampling.
+#define NVCV_I420  NVCV_YUV    //!< [Y][U][V] Planar 4:2:0
+#define NVCV_IYUV  NVCV_YUV    //!< [Y][U][V] Planar 4:2:0
+#define NVCV_YV12  NVCV_YVU    //!< [Y][V][U] Planar 4:2:0
+#define NVCV_NV12  NVCV_YCUV   //!< [Y][UV]   Semi-planar 4:2:0 (default for 4:2:0)
+#define NVCV_NV21  NVCV_YCVU   //!< [Y][VU]   Semi-planar 4:2:0
+#define NVCV_YUY2  NVCV_YUYV   //!< [YUYV]    Chunky 4:2:2
+#define NVCV_I444  NVCV_YUV    //!< [Y][U][V] Planar 4:4:4
+#define NVCV_YM24  NVCV_YUV    //!< [Y][U][V] Planar 4:4:4
+#define NVCV_YM42  NVCV_YVU    //!< [Y][V][U] Planar 4:4:4
+#define NVCV_NV24  NVCV_YCUV   //!< [Y][UV]   Semi-planar 4:4:4
+#define NVCV_NV42  NVCV_YCVU   //!< [Y][VU]   Semi-planar 4:4:4
+
+//! The following are ORed together for the colorspace field for YUV.
+//! NVCV_601 and NVCV_709 describe the color axes of YUV.
+//! NVCV_VIDEO_RANGE and NVCV_VIDEO_RANGE describe the range, [16, 235] or [0, 255], respectively.
+//! NVCV_CHROMA_COSITED and NVCV_CHROMA_INTSTITIAL describe the location of the chroma samples.
+#define NVCV_601               0x00   //!< The Rec.601  YUV colorspace, typically used for SD.
+#define NVCV_709               0x01   //!< The Rec.709  YUV colorspace, typically used for HD.
+#define NVCV_2020              0x02   //!< The Rec.2020 YUV colorspace.
+#define NVCV_VIDEO_RANGE       0x00   //!< The video range is [16, 235].
+#define NVCV_FULL_RANGE        0x04   //!< The video range is [ 0, 255].
+#define NVCV_CHROMA_COSITED    0x00   //!< The chroma is sampled at the same location as the luma samples horizontally.
+#define NVCV_CHROMA_INTSTITIAL 0x08   //!< The chroma is sampled between luma samples horizontally.
+#define NVCV_CHROMA_TOPLEFT    0x10   //!< The chroma is sampled at the same location as the luma samples horizontally and vertically.
+#define NVCV_CHROMA_MPEG2      NVCV_CHROMA_COSITED        //!< As is most video.
+#define NVCV_CHROMA_MPEG1      NVCV_CHROMA_INTSTITIAL
+#define NVCV_CHROMA_JPEG       NVCV_CHROMA_INTSTITIAL
+#define NVCV_CHROMA_H261       NVCV_CHROMA_INTSTITIAL
+#define NVCV_CHROMA_INTERSTITIAL  NVCV_CHROMA_INTSTITIAL  //!< Correct spelling
+
+//! This is the value for the gpuMem field or the memSpace argument.
+#define NVCV_CPU          0   //!< The buffer is stored in CPU memory.
+#define NVCV_GPU          1   //!< The buffer is stored in CUDA memory.
+#define NVCV_CUDA         1   //!< The buffer is stored in CUDA memory.
+#define NVCV_CPU_PINNED   2   //!< The buffer is stored in pinned CPU memory.
+#define NVCV_CUDA_ARRAY   3   //!< A CUDA array is used for storage.
+
+//! Image descriptor.
+typedef struct
+#ifdef _MSC_VER
+__declspec(dllexport)
+#endif // _MSC_VER
+NvCVImage {
+  unsigned int              width;                  //!< The number of pixels horizontally in the image.
+  unsigned int              height;                 //!< The number of pixels  vertically  in the image.
+  signed int                pitch;                  //!< The byte stride between pixels vertically.
+  NvCVImage_PixelFormat     pixelFormat;            //!< The format of the pixels in the image.
+  NvCVImage_ComponentType   componentType;          //!< The data type used to represent each component of the image.
+  unsigned char             pixelBytes;             //!< The number of bytes in a chunky pixel.
+  unsigned char             componentBytes;         //!< The number of bytes in each pixel component.
+  unsigned char             numComponents;          //!< The number of components in each pixel.
+  unsigned char             planar;                 //!< NVCV_CHUNKY, NVCV_PLANAR, NVCV_UYVY, ....
+  unsigned char             gpuMem;                 //!< NVCV_CPU, NVCV_CPU_PINNED, NVCV_CUDA, NVCV_GPU
+  unsigned char             colorspace;             //!< An OR of colorspace, range and chroma phase.
+  unsigned char             reserved[2];            //!< For structure padding and future expansion. Set to 0.
+  void                      *pixels;                //!< Pointer to pixel(0,0) in the image.
+  void                      *deletePtr;             //!< Buffer memory to be deleted (can be NULL).
+  void                      (*deleteProc)(void *p); //!< Delete procedure to call rather than free().
+  unsigned long long        bufferBytes;            //!< The maximum amount of memory available through pixels.
+
+
+#ifdef __cplusplus
+
+  //! Default constructor: fill with 0.
+  inline NvCVImage();
+
+  //! Allocation constructor.
+  //! \param[in]  width     the number of pixels horizontally.
+  //! \param[in]  height    the number of pixels vertically.
+  //! \param[in]  format    the format of the pixels.
+  //! \param[in]  type      the type of each pixel component.
+  //! \param[in]  layout    One of { NVCV_CHUNKY, NVCV_PLANAR } or one of the YUV layouts.
+  //! \param[in]  memSpace  One of { NVCV_CPU, NVCV_CPU_PINNED, NVCV_GPU, NVCV_CUDA }
+  //! \param[in]  alignment row byte alignment. Choose 0 or a power of 2.
+  //!                       1: yields no gap whatsoever between scanlines;
+  //!                       0: default alignment: 4 on CPU, and cudaMallocPitch's choice on GPU.
+  //!                       Other common values are 16 or 32 for cache line size.
+  inline NvCVImage(unsigned width, unsigned height, NvCVImage_PixelFormat format, NvCVImage_ComponentType type,
+          unsigned layout = NVCV_CHUNKY, unsigned memSpace = NVCV_CPU, unsigned alignment = 0);
+
+  //! Subimage constructor.
+  //! \param[in]  fullImg   the full image, from which this subImage view is to be created.
+  //! \param[in]  x         the left edge of the subImage, in reference to the full image.
+  //! \param[in]  y         the top edge  of the subImage, in reference to the full image.
+  //! \param[in]  width     the width  of the subImage, in pixels.
+  //! \param[in]  height    the height of the subImage, in pixels.
+  //! \bug        This does not work for planar or semi-planar formats, neither RGB nor YUV.
+  //! \note       This does work for all chunky formats, including UYVY, VYUY, YUYV, YVYU.
+  inline NvCVImage(NvCVImage *fullImg, int x, int y, unsigned width, unsigned height);
+
+  //! Destructor
+  inline ~NvCVImage();
+
+  //! Copy a rectangular subimage. This works for CPU->CPU, CPU->GPU, GPU->GPU, and GPU->CPU.
+  //! \param[in]  src     The source image from which to copy.
+  //! \param[in]  srcX    The left coordinate of the src rectangle.
+  //! \param[in]  srcY    The top  coordinate of the src rectangle.
+  //! \param[in]  dstX    The left coordinate of the dst rectangle.
+  //! \param[in]  dstY    The top  coordinate of the dst rectangle.
+  //! \param[in]  width   The width  of the rectangle to be copied, in pixels.
+  //! \param[in]  height  The height of the rectangle to be copied, in pixels.
+  //! \note   NvCVImage_Transfer() can handle more cases.
+  //! \return NVCV_SUCCESS         if successful
+  //! \return NVCV_ERR_MISMATCH    if the formats are different
+  //! \return NVCV_ERR_CUDA        if a CUDA error occurred
+  //! \return NVCV_ERR_PIXELFORMAT if the pixel format is not yet accommodated.
+  inline NvCV_Status copyFrom(const NvCVImage *src, int srcX, int srcY, int dstX, int dstY, unsigned width, unsigned height);
+
+  //! Copy from one image to another. This works for CPU->CPU, CPU->GPU, GPU->GPU, and GPU->CPU.
+  //! \param[in]  src     The source image from which to copy.
+  //! \note   NvCVImage_Transfer() can handle more cases.
+  //! \return NVCV_SUCCESS         if successful
+  //! \return NVCV_ERR_MISMATCH    if the formats are different
+  //! \return NVCV_ERR_CUDA        if a CUDA error occurred
+  //! \return NVCV_ERR_PIXELFORMAT if the pixel format is not yet accommodated.
+  inline NvCV_Status copyFrom(const NvCVImage *src);
+
+#endif // ___cplusplus
+} NvCVImage;
+
+
+//! Integer rectangle.
+typedef struct NvCVRect2i   {
+  int x;      //!< The left edge of the rectangle.
+  int y;      //!< The top  edge of the rectangle.
+  int width;  //!< The width  of the rectangle.
+  int height; //!< The height of the rectangle.
+} NvCVRect2i;
+
+
+//! Integer point.
+typedef struct NvCVPoint2i {
+  int x;  //!< The horizontal coordinate.
+  int y;  //!< The vertical coordinate
+} NvCVPoint2i;
+
+
+//! Initialize an image. The C++ constructors can initialize this appropriately.
+//! This is called by the C++ constructor, but C code should call this explicitly.
+//! \param[in,out]  im        the image to initialize.
+//! \param[in]      width     the desired width  of the image, in pixels.
+//! \param[in]      height    the desired height of the image, in pixels.
+//! \param[in]      pitch     the byte stride between pixels vertically.
+//! \param[in]      pixels    a pointer to the pixel buffer.
+//! \param[in]      format    the format of the pixels.
+//! \param[in]      type      the type of the components of the pixels.
+//! \param[in]      layout    One of { NVCV_CHUNKY, NVCV_PLANAR } or one of the YUV layouts.
+//! \param[in]      memSpace  Location of the buffer: one of { NVCV_CPU, NVCV_CPU_PINNED, NVCV_GPU, NVCV_CUDA }
+//! \return NVCV_SUCCESS         if successful
+//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not yet accommodated.
+NvCV_Status NvCV_API NvCVImage_Init(NvCVImage *im, unsigned width, unsigned height, int pitch, void *pixels,
+  NvCVImage_PixelFormat format, NvCVImage_ComponentType type, unsigned layout, unsigned memSpace);
+
+
+//! Initialize a view into a subset of an existing image.
+//! No memory is allocated -- the fullImg buffer is used.
+//! \param[in]  subImg  the sub-image view into the existing full image.
+//! \param[in]  fullImg the existing full image.
+//! \param[in]  x       the left edge of the sub-image, as coordinate of the full image.
+//! \param[in]  y       the top  edge of the sub-image, as coordinate of the full image.
+//! \param[in]  width   the desired width  of the subImage, in pixels.
+//! \param[in]  height  the desired height of the subImage, in pixels.
+//! \bug        This does not work in general for planar or semi-planar formats, neither RGB nor YUV.
+//!             However, it does work for all formats with the full image, to make a shallow copy, e.g.
+//!             NvCVImage_InitView(&subImg, &fullImg, 0, 0, fullImage.width, fullImage.height).
+//!             Cropping a planar or semi-planar image can be accomplished with NvCVImage_TransferRect().
+//! \note       This does work for all chunky formats, including UYVY, VYUY, YUYV, YVYU.
+//! \sa         { NvCVImage_TransferRect }
+void NvCV_API NvCVImage_InitView(NvCVImage *subImg, NvCVImage *fullImg, int x, int y, unsigned width, unsigned height);
+
+
+//! Allocate memory for, and initialize an image. This assumes that the image data structure has nothing meaningful in it.
+//! This is called by the C++ constructor, but C code can call this to allocate an image.
+//! \param[in,out]  im        the image to initialize.
+//! \param[in]      width     the desired width  of the image, in pixels.
+//! \param[in]      height    the desired height of the image, in pixels.
+//! \param[in]      format    the format of the pixels.
+//! \param[in]      type      the type of the components of the pixels.
+//! \param[in]      layout    One of { NVCV_CHUNKY, NVCV_PLANAR } or one of the YUV layouts.
+//! \param[in]      memSpace  Location of the buffer: one of { NVCV_CPU, NVCV_CPU_PINNED, NVCV_GPU, NVCV_CUDA }
+//! \param[in]      alignment row byte alignment. Choose 0 or a power of 2.
+//!                           1: yields no gap whatsoever between scanlines;
+//!                           0: default alignment: 4 on CPU, and cudaMallocPitch's choice on GPU.
+//!                           Other common values are 16 or 32 for cache line size.
+//! \return NVCV_SUCCESS         if the operation was successful.
+//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not accommodated.
+//! \return NVCV_ERR_MEMORY      if there is not enough memory to allocate the buffer.
+NvCV_Status NvCV_API NvCVImage_Alloc(NvCVImage *im, unsigned width, unsigned height, NvCVImage_PixelFormat format,
+  NvCVImage_ComponentType type, unsigned layout, unsigned memSpace, unsigned alignment);
+
+
+//! Reallocate memory for, and initialize an image. This assumes that the image is valid.
+//! It will check bufferBytes to see if enough memory is already available, and will reshape rather than realloc if true.
+//! Otherwise, it will free the previous buffer and reallocate a new one.
+//! \param[in,out]  im        the image to initialize.
+//! \param[in]      width     the desired width  of the image, in pixels.
+//! \param[in]      height    the desired height of the image, in pixels.
+//! \param[in]      format    the format of the pixels.
+//! \param[in]      type      the type of the components of the pixels.
+//! \param[in]      layout    One of { NVCV_CHUNKY, NVCV_PLANAR } or one of the YUV layouts.
+//! \param[in]      memSpace  Location of the buffer: one of { NVCV_CPU, NVCV_CPU_PINNED, NVCV_GPU, NVCV_CUDA }
+//! \param[in]      alignment row byte alignment. Choose 0 or a power of 2.
+//!                           1: yields no gap whatsoever between scanlines;
+//!                           0: default alignment: 4 on CPU, and cudaMallocPitch's choice on GPU.
+//!                           Other common values are 16 or 32 for cache line size.
+//! \return NVCV_SUCCESS         if the operation was successful.
+//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not accommodated.
+//! \return NVCV_ERR_MEMORY      if there is not enough memory to allocate the buffer.
+NvCV_Status NvCV_API NvCVImage_Realloc(NvCVImage *im, unsigned width, unsigned height, NvCVImage_PixelFormat format,
+  NvCVImage_ComponentType type, unsigned layout, unsigned memSpace, unsigned alignment);
+
+
+//! Deallocate the image buffer from the image. The image is not deallocated.
+//! param[in,out] im  the image whose buffer is to be deallocated.
+void NvCV_API NvCVImage_Dealloc(NvCVImage *im);
+
+
+//! Allocate a new image, with storage (C-style constructor).
+//! \param[in]      width     the desired width  of the image, in pixels.
+//! \param[in]      height    the desired height of the image, in pixels.
+//! \param[in]      format    the format of the pixels.
+//! \param[in]      type      the type of the components of the pixels.
+//! \param[in]      layout    One of { NVCV_CHUNKY, NVCV_PLANAR } or one of the YUV layouts.
+//! \param[in]      memSpace  Location of the buffer: one of { NVCV_CPU, NVCV_CPU_PINNED, NVCV_GPU, NVCV_CUDA }
+//! \param[in]      alignment row byte alignment. Choose 0 or a power of 2.
+//!                           1: yields no gap whatsoever between scanlines;
+//!                           0: default alignment: 4 on CPU, and cudaMallocPitch's choice on GPU.
+//!                           Other common values are 16 or 32 for cache line size.
+//! \param[out]         *out will be a pointer to the new image if successful; otherwise NULL.
+//! \return NVCV_SUCCESS         if the operation was successful.
+//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not accommodated.
+//! \return NVCV_ERR_MEMORY      if there is not enough memory to allocate the buffer.
+NvCV_Status NvCV_API NvCVImage_Create(unsigned width, unsigned height, NvCVImage_PixelFormat format,
+  NvCVImage_ComponentType type, unsigned layout, unsigned memSpace, unsigned alignment, NvCVImage **out);
+
+
+//! Deallocate the image allocated with NvCVImage_Create() (C-style destructor).
+void NvCV_API NvCVImage_Destroy(NvCVImage *im);
+
+
+//! Get offsets for the components of a pixel format.
+//! These are not byte offsets, but component offsets.
+//! \param[in]  format  the pixel format to be interrogated.
+//! \param[out] rOff    a place to store the offset for the red       channel (can be NULL).
+//! \param[out] gOff    a place to store the offset for the green     channel (can be NULL).
+//! \param[out] bOff    a place to store the offset for the blue      channel (can be NULL).
+//! \param[out] aOff    a place to store the offset for the alpha     channel (can be NULL).
+//! \param[out] yOff    a place to store the offset for the luminance channel (can be NULL).
+void NvCV_API NvCVImage_ComponentOffsets(NvCVImage_PixelFormat format, int *rOff, int *gOff, int *bOff, int *aOff, int *yOff);
+
+
+//! Transfer one image to another, with a limited set of conversions.
+//!
+//! If any of the images resides on the GPU, it may run asynchronously,
+//! so cudaStreamSynchronize() should be called if it is necessary to run synchronously.
+//! The following table indicates (with X) the currently-implemented conversions:
+//!    +-------------------+-------------+-------------+-------------+-------------+
+//!    |                   |  u8 --> u8  |  u8 --> f32 | f32 --> u8  | f32 --> f32 |
+//!    +-------------------+-------------+-------------+-------------+-------------+
+//!    | Y      --> Y      |      X      |             |      X      |      X      |
+//!    | Y      --> A      |      X      |             |      X      |      X      |
+//!    | Y      --> RGB    |      X      |      X      |      X      |      X      |
+//!    | Y      --> RGBA   |      X      |      X      |      X      |      X      |
+//!    | A      --> Y      |      X      |             |      X      |      X      |
+//!    | A      --> A      |      X      |             |      X      |      X      |
+//!    | A      --> RGB    |      X      |      X      |      X      |      X      |
+//!    | A      --> RGBA   |      X      |             |             |             |
+//!    | RGB    --> Y      |      X      |      X      |             |             |
+//!    | RGB    --> A      |      X      |      X      |             |             |
+//!    | RGB    --> RGB    |      X      |      X      |      X      |      X      |
+//!    | RGB    --> RGBA   |      X      |      X      |      X      |      X      |
+//!    | RGBA   --> Y      |      X      |      X      |             |             |
+//!    | RGBA   --> A      |             |      X      |             |             |
+//!    | RGBA   --> RGB    |      X      |      X      |      X      |      X      |
+//!    | RGBA   --> RGBA   |      X      |      X      |      X      |      X      |
+//!    | RGB    --> YUV420 |      X      |             |      X      |             |
+//!    | RGBA   --> YUV420 |      X      |             |      X      |             |
+//!    | RGB    --> YUV422 |      X      |             |      X      |             |
+//!    | RGBA   --> YUV422 |      X      |             |      X      |             |
+//!    | RGB    --> YUV444 |      X      |             |      X      |             |
+//!    | RGBA   --> YUV444 |      X      |             |      X      |             |
+//!    | YUV420 --> RGB    |      X      |      X      |             |             |
+//!    | YUV420 --> RGBA   |      X      |      X      |             |             |
+//!    | YUV422 --> RGB    |      X      |      X      |             |             |
+//!    | YUV422 --> RGBA   |      X      |      X      |             |             |
+//!    | YUV444 --> RGB    |      X      |      X      |             |             |
+//!    | YUV444 --> RGBA   |      X      |      X      |             |             |
+//!    +-------------------+-------------+-------------+-------------+-------------+
+//! where
+//! * Either source or destination can be CHUNKY or PLANAR.
+//! * Either source or destination can reside on the CPU or the GPU.
+//! * The RGB components are in any order (i.e. RGB or BGR; RGBA or BGRA).
+//! * For RGBA (or BGRA) destinations, most implementations do not change the alpha channel, so it is recommended to
+//!   set it at initialization time with [cuda]memset(im.pixels, -1, im.pitch * im.height) or
+//!   [cuda]memset(im.pixels, -1, im.pitch * im.height * im.numComponents) for chunky and planar images respectively.
+//! * YUV requires that the colorspace field be set manually prior to Transfer, e.g. typical for layout=NVCV_NV12:
+//!   image.colorspace = NVCV_709 | NVCV_VIDEO_RANGE | NVCV_CHROMA_INTSTITIAL;
+//! * There are also RGBf16-->RGBf32 and RGBf32-->RGBf16 transfers.
+//! * Additionally, when the src and dst formats are the same, all formats are accommodated on CPU and GPU,
+//!   and this can be used as a replacement for cudaMemcpy2DAsync() (which it utilizes). This is also true for YUV,
+//!   whose src and dst must share the same format, layout and colorspace.
+//!
+//! When there is some kind of conversion AND the src and dst reside on different processors (CPU, GPU),
+//! it is necessary to have a temporary GPU buffer, which is reshaped as needed to match the characteristics
+//! of the CPU image. The same temporary image can be used in subsequent calls to NvCVImage_Transfer(),
+//! regardless of the shape, format or component type, as it will grow as needed to accommodate
+//! the largest memory requirement. The recommended usage for most cases is to supply an empty image
+//! as the temporary; if it is not needed, no buffer is allocated. NULL can be supplied as the tmp
+//! image, in which case an ephemeral buffer is allocated if needed, with resultant
+//! performance degradation for image sequences.
+//!
+//! \param[in]      src     the source image.
+//! \param[out]     dst     the destination image.
+//! \param[in]      scale   a scale factor that can be applied when one (but not both) of the images
+//!                         is based on floating-point components; this parameter is ignored when all image components
+//!                         are represented with integer data types, or all image components are represented with
+//!                         floating-point data types.
+//! \param[in]      stream  the stream on which to perform the copy. This is ignored if both images reside on the CPU.
+//! \param[in,out]  tmp     a temporary buffer that is sometimes needed when transferring images
+//!                         between the CPU and GPU in either direction (can be empty or NULL).
+//!                         It has the same characteristics as the CPU image, but it resides on the GPU.
+//! \return         NVCV_SUCCESS           if successful,
+//!                 NVCV_ERR_PIXELFORMAT   if one of the pixel formats is not accommodated.
+//!                 NVCV_ERR_CUDA          if a CUDA error has occurred.
+//!                 NVCV_ERR_GENERAL       if an otherwise unspecified error has occurred.
+NvCV_Status NvCV_API NvCVImage_Transfer(
+             const NvCVImage *src, NvCVImage *dst, float scale, struct CUstream_st *stream, NvCVImage *tmp);
+
+
+//! Transfer a rectangular portion of an image.
+//! See NvCVImage_Transfer() for the pixel format combinations that are implemented.
+//! \param[in]  src     the source image.
+//! \param[in]  srcRect the subRect of the src to be transferred (NULL implies the whole image).
+//! \param[out] dst     the destination image.
+//! \param[in]  dstPt   location to which the srcRect is to be copied (NULL implies (0,0)).
+//! \param[in]  scale   scale factor applied to the magnitude during transfer, typically 1, 255 or 1/255.
+//! \param[in]  stream  the CUDA stream.
+//! \param[in]  tmp     a staging image.
+//! \return     NVCV_SUCCESS  if the operation was completed successfully.
+//! \note       The actual transfer region may be smaller, because the rects are clipped against the images.
+NvCV_Status NvCV_API NvCVImage_TransferRect(
+  const NvCVImage *src, const NvCVRect2i *srcRect, NvCVImage *dst, const NvCVPoint2i *dstPt,
+  float scale, struct CUstream_st *stream, NvCVImage *tmp);
+
+
+//! Transfer from a YUV image.
+//! YUVu8 --> RGBu8 and YUVu8 --> RGBf32 are currently available.
+//! \param[in]  y             pointer to pixel(0,0) of the luminance channel.
+//! \param[in]  yPixBytes     the byte stride between y pixels horizontally.
+//! \param[in]  yPitch        the byte stride between y pixels vertically.
+//! \param[in]  u             pointer to pixel(0,0) of the u (Cb) chrominance channel.
+//! \param[in]  v             pointer to pixel(0,0) of the v (Cr) chrominance channel.
+//! \param[in]  uvPixBytes    the byte stride between u or v pixels horizontally.
+//! \param[in]  uvPitch       the byte stride between u or v pixels vertically.
+//! \param[in]  yuvColorSpace the yuv colorspace, specifying range, chromaticities, and chrominance phase.
+//! \param[in]  yuvMemSpace   the memory space where the pixel buffers reside.
+//! \param[out] dst           the destination image.
+//! \param[in]  dstRect       the destination rectangle (NULL implies the whole image).
+//! \param[in]  scale         scale factor applied to the magnitude during transfer, typically 1, 255 or 1/255.
+//! \param[in]  stream        the CUDA stream.
+//! \param[in]  tmp           a staging image.
+//! \return     NVCV_SUCCESS  if the operation was completed successfully.
+//! \note       The actual transfer region may be smaller, because the rects are clipped against the images.
+NvCV_Status NvCV_API NvCVImage_TransferFromYUV(
+  const void *y,                int yPixBytes,  int yPitch,
+  const void *u, const void *v, int uvPixBytes, int uvPitch,
+  NvCVImage_PixelFormat yuvFormat, NvCVImage_ComponentType yuvType,
+  unsigned yuvColorSpace, unsigned yuvMemSpace,
+  NvCVImage *dst, const NvCVRect2i *dstRect, float scale, struct CUstream_st *stream, NvCVImage *tmp);
+
+
+//! Transfer to a YUV image.
+//! RGBu8 --> YUVu8 and RGBf32 --> YUVu8 are currently available.
+//! \param[in]  src           the source image.
+//! \param[in]  srcRect       the destination rectangle (NULL implies the whole image).
+//! \param[out] y             pointer to pixel(0,0) of the luminance channel.
+//! \param[in]  yPixBytes     the byte stride between y pixels horizontally.
+//! \param[in]  yPitch        the byte stride between y pixels vertically.
+//! \param[out] u             pointer to pixel(0,0) of the u (Cb) chrominance channel.
+//! \param[out] v             pointer to pixel(0,0) of the v (Cr) chrominance channel.
+//! \param[in]  uvPixBytes    the byte stride between u or v pixels horizontally.
+//! \param[in]  uvPitch       the byte stride between u or v pixels vertically.
+//! \param[in]  yuvColorSpace the yuv colorspace, specifying range, chromaticities, and chrominance phase.
+//! \param[in]  yuvMemSpace   the memory space where the pixel buffers reside.
+//! \param[in]  scale         scale factor applied to the magnitude during transfer, typically 1, 255 or 1/255.
+//! \param[in]  stream        the CUDA stream.
+//! \param[in]  tmp           a staging image.
+//! \return     NVCV_SUCCESS  if the operation was completed successfully.
+//! \note       The actual transfer region may be smaller, because the rects are clipped against the images.
+NvCV_Status NvCV_API NvCVImage_TransferToYUV(
+  const NvCVImage *src, const NvCVRect2i *srcRect, 
+  const void *y,                int yPixBytes,  int yPitch,
+  const void *u, const void *v, int uvPixBytes, int uvPitch,
+  NvCVImage_PixelFormat yuvFormat, NvCVImage_ComponentType yuvType,
+  unsigned yuvColorSpace, unsigned yuvMemSpace,
+  float scale, struct CUstream_st *stream, NvCVImage *tmp);
+
+
+//! Between rendering by a graphics system and Transfer by CUDA, it is necessary to map the texture resource.
+//! There is a fair amount of overhead, so its use should be minimized.
+//! Every call to NvCVImage_MapResource() should be matched by a subsequent call to NvCVImage_UnmapResource().
+//! \param[in,out]  im      the image to be mapped.
+//! \param[in]      stream  the stream on which the mapping is to be performed.
+//! \return         NVCV_SUCCESS is the operation was completed successfully.
+NvCV_Status NvCV_API NvCVImage_MapResource(NvCVImage *im, struct CUstream_st *stream);
+
+
+//! After transfer by CUDA, the texture resource must be unmapped in order to be used by the graphics system again.
+//! There is a fair amount of overhead, so its use should be minimized.
+//! Every call to NvCVImage_UnmapResource() should correspond to a preceding call to NvCVImage_MapResource().
+//! \param[in,out]  im      the image to be mapped.
+//! \param[in]      stream  the CUDA stream on which the mapping is to be performed.
+//! \return         NVCV_SUCCESS is the operation was completed successfully.
+NvCV_Status NvCV_API NvCVImage_UnmapResource(NvCVImage *im, struct CUstream_st *stream);
+
+
+//! Composite one source image over another using the given matte.
+//! This accommodates all RGB and RGBA formats, with u8 and f32 components.
+//! \param[in]  fg      the foreground source image.
+//! \param[in]  bg      the background source image.
+//! \param[in]  mat     the matte  Yu8   (or Au8)   image, indicating where the src should come through.
+//! \param[out] dst     the destination image. This can be the same as fg or bg.
+//! \param[in]  stream  the CUDA stream on which the composition is to be performed.
+//! \return NVCV_SUCCESS         if the operation was successful.
+//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not accommodated.
+//! \return NVCV_ERR_MISMATCH    if either the fg & bg & dst formats do not match, or if fg & bg & dst & mat are not
+//!                              in the same address space (CPU or GPU).
+#if RTX_CAMERA_IMAGE == 0
+NvCV_Status NvCV_API NvCVImage_Composite(const NvCVImage *fg, const NvCVImage *bg, const NvCVImage *mat, NvCVImage *dst,
+            struct CUstream_st *stream);
+#else // RTX_CAMERA_IMAGE == 1  // No GPU acceleration
+NvCV_Status NvCV_API NvCVImage_Composite(const NvCVImage *fg, const NvCVImage *bg, const NvCVImage *mat, NvCVImage *dst);
+#endif // RTX_CAMERA_IMAGE == 1
+
+//! Composite one source image over another using the given matte.
+//! Not all pixel format combinations are accommodated.
+//! \param[in]      fg      the foreground source image.
+//! \param[in]      fgOrg   the upper-left corner of the fg image to be composited (NULL implies (0,0)).
+//! \param[in]      bg      the background source image.
+//! \param[in]      bgOrg   the upper-left corner of the bg image to be composited (NULL implies (0,0)).
+//! \param[in]      mat     the matte image, indicating where the src should come through.
+//!                         This determines the size of the rectangle to be composited.
+//!                         If this is multi-channel, the alpha channel is used as the matte.
+//! \param[in]      mode    the composition mode. Only 0 (straight alpha over) is implemented at this time.
+//! \param[out]     dst     the destination image. This can be the same as fg or bg.
+//! \param[in]      dstOrg  the upper-left corner of the dst image to be updated (NULL implies (0,0)).
+//! \param[in]      stream  the CUDA stream on which the composition is to be performed.
+//! \note   If a smaller region of a matte is desired, a window can be created using
+//!         NvCVImage_InitView() for chunky or NvCVImage_Init() for planar pixels.
+//! \return NVCV_SUCCESS         if the operation was successful.
+//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not accommodated.
+//! \return NVCV_ERR_MISMATCH    if either the fg & bg & dst formats do not match, or if fg & bg & dst & mat are not
+//!                              in the same address space (CPU or GPU).
+//! \bug  Though RGBA destinations are accommodated, the A channel is not updated at all.
+//! \todo Accommodate premultiplied alpha, either as a flag in NvCVImage or as a different mode.
+//! \todo If the destination has an A channel, update it as per Adobe and Pixar.
+NvCV_Status NvCV_API NvCVImage_CompositeRect(
+    const NvCVImage *fg,  const NvCVPoint2i *fgOrg,
+    const NvCVImage *bg,  const NvCVPoint2i *bgOrg,
+    const NvCVImage *mat, unsigned mode,
+    NvCVImage       *dst, const NvCVPoint2i *dstOrg,
+    struct CUstream_st *stream);
+
+//! Composite a BGRu8 source image over a constant color field using the given matte.
+//! \param[in]      src     the source BGRu8 (or RGBu8) image.
+//! \param[in]      mat     the matte  Yu8   (or Au8)   image, indicating where the src should come through.
+//! \param[in]      bgColor the desired flat background color, with the same component ordering as the src and dst.
+//! \param[in,out]  dst     the destination BGRu8 (or RGBu8) image. May be the same as src.
+//! \return NVCV_SUCCESS         if the operation was successful.
+//! \return NVCV_ERR_PIXELFORMAT if the pixel format is not accommodated.
+//! \bug    This is only implemented for 3-component u8 src and dst, and 1-component mat,
+//!         where all images are resident on the CPU.
+NvCV_Status NvCV_API NvCVImage_CompositeOverConstant(
+              const NvCVImage *src, const NvCVImage *mat, const unsigned char bgColor[3], NvCVImage *dst);
+
+
+//! Flip the image vertically.
+//! No actual pixels are moved: it is just an accounting procedure.
+//! This is extremely low overhead, but requires appropriate interpretation of the pitch.
+//! Flipping twice yields the original orientation.
+//! \param[in]  src  the source image (NULL implies src == dst).
+//! \param[out] dst  the flipped image (can be the same as the src).
+//! \return     NVCV_SUCCESS         if successful.
+//! \return     NVCV_ERR_PIXELFORMAT for most planar formats.
+//! \bug        This does not work for planar or semi-planar formats, neither RGB nor YUV.
+//! \note       This does work for all chunky formats, including UYVY, VYUY, YUYV, YVYU.
+NvCV_Status NvCV_API NvCVImage_FlipY(const NvCVImage *src, NvCVImage *dst);
+
+
+//! Get the pointers for YUV, based on the format.
+//! \param[in]  im          The image to be deconstructed.
+//! \param[out] y           A place to store the pointer to y(0,0).
+//! \param[out] u           A place to store the pointer to u(0,0).
+//! \param[out] v           A place to store the pointer to v(0,0).
+//! \param[out] yPixBytes   A place to store the byte stride between  luma  samples horizontally.
+//! \param[out] cPixBytes   A place to store the byte stride between chroma samples horizontally.
+//! \param[out] yRowBytes   A place to store the byte stride between  luma  samples vertically.
+//! \param[out] cRowBytes   A place to store the byte stride between chroma samples vertically.
+//! \return     NVCV_SUCCESS           If the information was gathered successfully.
+//!             NVCV_ERR_PIXELFORMAT   Otherwise.
+NvCV_Status NvCV_API NvCVImage_GetYUVPointers(NvCVImage *im,
+  unsigned char **y, unsigned char **u, unsigned char **v,
+  int *yPixBytes, int *cPixBytes, int *yRowBytes, int *cRowBytes);
+
+
+#ifdef __cplusplus
+} // extern "C"
+
+/********************************************************************************
+ * NvCVImage default constructor
+ ********************************************************************************/
+
+NvCVImage::NvCVImage() {
+  pixels = nullptr;
+  (void)NvCVImage_Alloc(this, 0, 0, NVCV_FORMAT_UNKNOWN, NVCV_TYPE_UNKNOWN, 0, 0, 0);
+}
+
+/********************************************************************************
+ * NvCVImage allocation constructor
+ ********************************************************************************/
+
+NvCVImage::NvCVImage(unsigned width, unsigned height, NvCVImage_PixelFormat format, NvCVImage_ComponentType type,
+                       unsigned layout, unsigned memSpace, unsigned alignment) {
+  pixels = nullptr;
+  (void)NvCVImage_Alloc(this, width, height, format, type, layout, memSpace, alignment);
+}
+
+/********************************************************************************
+ * Subimage constructor
+ ********************************************************************************/
+
+NvCVImage::NvCVImage(NvCVImage *fullImg, int x, int y, unsigned width, unsigned height) {
+  NvCVImage_InitView(this, fullImg, x, y, width, height);
+}
+
+/********************************************************************************
+ * NvCVImage destructor
+ ********************************************************************************/
+
+NvCVImage::~NvCVImage() { NvCVImage_Dealloc(this); }
+
+/********************************************************************************
+ * copy subimage
+ ********************************************************************************/
+
+NvCV_Status NvCVImage::copyFrom(const NvCVImage *src, int srcX, int srcY, int dstX, int dstY, unsigned wd,
+                                  unsigned ht) {
+#if RTX_CAMERA_IMAGE // This only works for chunky images
+  NvCVImage srcView, dstView;
+  NvCVImage_InitView(&srcView, const_cast<NvCVImage *>(src), srcX, srcY, wd, ht);
+  NvCVImage_InitView(&dstView, this, dstX, dstY, wd, ht);
+  return NvCVImage_Transfer(&srcView, &dstView, 1.f, 0, nullptr);
+#else // !RTX_CAMERA_IMAGE bug fix for non-chunky images
+  NvCVRect2i  srcRect = { (int)srcX, (int)srcY, (int)wd, (int)ht };
+  NvCVPoint2i dstPt   = { (int)dstX, (int)dstY };
+  return NvCVImage_TransferRect(src, &srcRect, this, &dstPt, 1.f, 0, nullptr);
+#endif // RTX_CAMERA_IMAGE
+}
+
+/********************************************************************************
+ * copy image
+ ********************************************************************************/
+
+NvCV_Status NvCVImage::copyFrom(const NvCVImage *src) { return NvCVImage_Transfer(src, this, 1.f, 0, nullptr); }
+
+
+#endif // ___cplusplus
+
+#endif // __NVCVIMAGE_H__
diff --git a/vfx/nvvfx/include/nvCVStatus.h b/vfx/nvvfx/include/nvCVStatus.h
new file mode 100644
index 0000000..dd47ba3
--- /dev/null
+++ b/vfx/nvvfx/include/nvCVStatus.h
@@ -0,0 +1,113 @@
+/*###############################################################################
+#
+# Copyright 2020 NVIDIA Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+###############################################################################*/
+
+#ifndef __NVCVSTATUS_H__
+#define __NVCVSTATUS_H__
+
+#ifndef NvCV_API
+  #ifdef _WIN32
+    #ifdef NVCV_API_EXPORT
+      #define NvCV_API __declspec(dllexport) __cdecl
+    #else
+      #define NvCV_API
+    #endif
+  #else //if linux
+    #define NvCV_API   // TODO: Linux code goes here
+  #endif // _WIN32 or linux
+#endif //NvCV_API
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif // ___cplusplus
+
+
+//! Status codes returned from APIs.
+typedef enum NvCV_Status {
+  NVCV_SUCCESS                   = 0,    //!< The procedure returned successfully.
+  NVCV_ERR_GENERAL               = -1,   //!< An otherwise unspecified error has occurred.
+  NVCV_ERR_UNIMPLEMENTED         = -2,   //!< The requested feature is not yet implemented.
+  NVCV_ERR_MEMORY                = -3,   //!< There is not enough memory for the requested operation.
+  NVCV_ERR_EFFECT                = -4,   //!< An invalid effect handle has been supplied.
+  NVCV_ERR_SELECTOR              = -5,   //!< The given parameter selector is not valid in this effect filter.
+  NVCV_ERR_BUFFER                = -6,   //!< An image buffer has not been specified.
+  NVCV_ERR_PARAMETER             = -7,   //!< An invalid parameter value has been supplied for this effect+selector.
+  NVCV_ERR_MISMATCH              = -8,   //!< Some parameters are not appropriately matched.
+  NVCV_ERR_PIXELFORMAT           = -9,   //!< The specified pixel format is not accommodated.
+  NVCV_ERR_MODEL                 = -10,  //!< Error while loading the TRT model.
+  NVCV_ERR_LIBRARY               = -11,  //!< Error loading the dynamic library.
+  NVCV_ERR_INITIALIZATION        = -12,  //!< The effect has not been properly initialized.
+  NVCV_ERR_FILE                  = -13,  //!< The file could not be found.
+  NVCV_ERR_FEATURENOTFOUND       = -14,  //!< The requested feature was not found
+  NVCV_ERR_MISSINGINPUT          = -15,  //!< A required parameter was not set
+  NVCV_ERR_RESOLUTION            = -16,  //!< The specified image resolution is not supported.
+  NVCV_ERR_UNSUPPORTEDGPU        = -17,  //!< The GPU is not supported
+  NVCV_ERR_WRONGGPU              = -18,  //!< The current GPU is not the one selected.
+  NVCV_ERR_UNSUPPORTEDDRIVER     = -19,  //!< The currently installed graphics driver is not supported
+  NVCV_ERR_MODELDEPENDENCIES     = -20,  //!< There is no model with dependencies that match this system
+  NVCV_ERR_PARSE                 = -21,  //!< There has been a parsing or syntax error while reading a file
+  NVCV_ERR_MODELSUBSTITUTION     = -22,  //!< The specified model does not exist and has been substituted.
+  NVCV_ERR_READ                  = -23,  //!< An error occurred while reading a file.
+  NVCV_ERR_WRITE                 = -24,  //!< An error occurred while writing a file.
+  NVCV_ERR_PARAMREADONLY         = -25,  //!< The selected parameter is read-only.
+  NVCV_ERR_TRT_ENQUEUE           = -26,  //!< TensorRT enqueue failed.
+  NVCV_ERR_TRT_BINDINGS          = -27,  //!< Unexpected TensorRT bindings.
+  NVCV_ERR_TRT_CONTEXT           = -28,  //!< An error occurred while creating a TensorRT context.
+  NVCV_ERR_TRT_INFER             = -29,  ///< The was a problem creating the inference engine.
+  NVCV_ERR_TRT_ENGINE            = -30,  ///< There was a problem deserializing the inference runtime engine.
+  NVCV_ERR_NPP                   = -31,  //!< An error has occurred in the NPP library.
+  NVCV_ERR_CONFIG                = -32,  //!< No suitable model exists for the specified parameter configuration.
+
+  NVCV_ERR_DIRECT3D              = -99,  //!< A Direct3D error has occurred.
+
+  NVCV_ERR_CUDA_BASE             = -100,  //!< CUDA errors are offset from this value.
+  NVCV_ERR_CUDA_VALUE            = -101,  //!< A CUDA parameter is not within the acceptable range.
+  NVCV_ERR_CUDA_MEMORY           = -102,  //!< There is not enough CUDA memory for the requested operation.
+  NVCV_ERR_CUDA_PITCH            = -112,  //!< A CUDA pitch is not within the acceptable range.
+  NVCV_ERR_CUDA_INIT             = -127,  //!< The CUDA driver and runtime could not be initialized.
+  NVCV_ERR_CUDA_LAUNCH           = -819,  //!< The CUDA kernel launch has failed.
+  NVCV_ERR_CUDA_KERNEL           = -309,  //!< No suitable kernel image is available for the device.
+  NVCV_ERR_CUDA_DRIVER           = -135,  //!< The installed NVIDIA CUDA driver is older than the CUDA runtime library.
+  NVCV_ERR_CUDA_UNSUPPORTED      = -901,  //!< The CUDA operation is not supported on the current system or device.
+  NVCV_ERR_CUDA_ILLEGAL_ADDRESS  = -800,  //!< CUDA tried to load or store on an invalid memory address.
+  NVCV_ERR_CUDA                  = -1099, //!< An otherwise unspecified CUDA error has been reported.
+} NvCV_Status;
+
+
+//! Get an error string corresponding to the given status code.
+//! \param[in]  code  the NvCV status code.
+//! \return     the corresponding string.
+//! \todo Find a cleaner way to do this, because NvCV_API doesn't work.
+#ifdef _WIN32
+  __declspec(dllexport) const char* __cdecl
+#else
+  const char* 
+#endif // _WIN32 or linux
+NvCV_GetErrorStringFromCode(NvCV_Status code);
+
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#endif // __NVCVSTATUS_H__
diff --git a/vfx/nvvfx/include/nvTransferD3D.h b/vfx/nvvfx/include/nvTransferD3D.h
new file mode 100644
index 0000000..e914eb5
--- /dev/null
+++ b/vfx/nvvfx/include/nvTransferD3D.h
@@ -0,0 +1,72 @@
+/*###############################################################################
+#
+# Copyright(c) 2021 NVIDIA CORPORATION.All Rights Reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+#
+###############################################################################*/
+
+#ifndef __NVTRANSFER_D3D_H__
+#define __NVTRANSFER_D3D_H__
+
+#ifndef _WINDOWS_
+  #define WIN32_LEAN_AND_MEAN
+  #include <Windows.h>
+#endif // _WINDOWS_
+#include <dxgitype.h>
+#include "nvCVImage.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // ___cplusplus
+
+
+
+//! Utility to determine the D3D format from the NvCVImage format, type and layout.
+//! \param[in]  format    the pixel format.
+//! \param[in]  type      the component type.
+//! \param[in]  layout    the layout.
+//! \param[out] d3dFormat a place to store the corresponding D3D format.
+//! \return     NVCV_SUCCESS if successful.
+NvCV_Status NvCV_API NvCVImage_ToD3DFormat(NvCVImage_PixelFormat format, NvCVImage_ComponentType type, unsigned layout, DXGI_FORMAT *d3dFormat);
+
+
+//! Utility to determine the NvCVImage format, component type and layout from a D3D format.
+//! \param[in]  d3dFormat the D3D format to translate.
+//! \param[out] format    a place to store the NvCVImage pixel format.
+//! \param[out] type      a place to store the NvCVImage component type.
+//! \param[out] layout    a place to store the NvCVImage layout.
+//! \return     NVCV_SUCCESS if successful.
+NvCV_Status NvCV_API NvCVImage_FromD3DFormat(DXGI_FORMAT d3dFormat, NvCVImage_PixelFormat *format, NvCVImage_ComponentType *type, unsigned char *layout);
+
+
+#ifdef __dxgicommon_h__
+
+//! Utility to determine the D3D color space from the NvCVImage color space.
+//! \param[in]  nvcvColorSpace  the NvCVImage colro space.
+//! \param[out] pD3dColorSpace  a place to store the resultant D3D color space.
+//! \return     NVCV_SUCCESS          if successful.
+//! \return     NVCV_ERR_PIXELFORMAT  if there is no equivalent color space.
+NvCV_Status NvCV_API NvCVImage_ToD3DColorSpace(unsigned char nvcvColorSpace, DXGI_COLOR_SPACE_TYPE *pD3dColorSpace);
+
+
+//! Utility to determine the NvCVImage color space from the D3D color space.
+//! \param[in]  d3dColorSpace   the D3D color space.
+//! \param[out] pNvcvColorSpace a place to store the resultant NvCVImage color space.
+//! \return     NVCV_SUCCESS          if successful.
+//! \return     NVCV_ERR_PIXELFORMAT  if there is no equivalent color space.
+NvCV_Status NvCV_API NvCVImage_FromD3DColorSpace(DXGI_COLOR_SPACE_TYPE d3dColorSpace, unsigned char *pNvcvColorSpace);
+
+#endif // __dxgicommon_h__
+
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // __NVTRANSFER_D3D_H__
+
diff --git a/vfx/nvvfx/include/nvTransferD3D11.h b/vfx/nvvfx/include/nvTransferD3D11.h
new file mode 100644
index 0000000..fabf067
--- /dev/null
+++ b/vfx/nvvfx/include/nvTransferD3D11.h
@@ -0,0 +1,44 @@
+/*###############################################################################
+#
+# Copyright(c) 2021 NVIDIA CORPORATION.All Rights Reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+#
+###############################################################################*/
+
+#ifndef __NVTRANSFER_D3D11_H__
+#define __NVTRANSFER_D3D11_H__
+
+#include <d3d11.h>
+#include "nvCVImage.h"
+#include "nvTransferD3D.h"  // for NvCVImage_ToD3DFormat() and NvCVImage_FromD3DFormat()
+
+#ifdef __cplusplus
+extern "C" {
+#endif // ___cplusplus
+
+
+
+//! Initialize an NvCVImage from a D3D11 texture.
+//! The pixelFormat and component types with be transferred over, and a cudaGraphicsResource will be registered;
+//! the NvCVImage destructor will unregister the resource.
+//! This is designed to work with NvCVImage_TransferFromArray() (and eventually NvCVImage_Transfer());
+//! however it is necessary to call NvCVImage_MapResource beforehand, and NvCVImage_UnmapResource
+//! before allowing D3D to render into it.
+//! \param[in,out]  im  the image to be initialized.
+//! \param[in]      tx  the texture to be used for initialization.
+//! \return         NVCV_SUCCESS if successful.
+NvCV_Status NvCV_API NvCVImage_InitFromD3D11Texture(NvCVImage *im, struct ID3D11Texture2D *tx);
+
+
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // __NVTRANSFER_D3D11_H__
+
diff --git a/vfx/nvvfx/include/nvVideoEffects.h b/vfx/nvvfx/include/nvVideoEffects.h
new file mode 100644
index 0000000..9d461bc
--- /dev/null
+++ b/vfx/nvvfx/include/nvVideoEffects.h
@@ -0,0 +1,228 @@
+/*###############################################################################
+#
+# Copyright (c) 2020 NVIDIA Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+###############################################################################*/
+
+#ifndef __NVVIDEO_EFFECTS_H__
+#define __NVVIDEO_EFFECTS_H__
+
+#include "nvCVImage.h"
+
+#ifndef NvVFX_API
+  #ifdef _WIN32
+    #ifdef NVVFX_API_EXPORT
+      #define NvVFX_API __declspec(dllexport) __cdecl
+    #else
+      #define NvVFX_API
+    #endif
+  #else //if linux
+    #define NvVFX_API   // TODO: Linux code goes here
+  #endif // _WIN32 or linux
+#endif //NvVFX_API
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Forward declaration for CUDA API
+typedef struct CUstream_st* CUstream;
+
+//! We use strings as effect selectors.
+typedef const char* NvVFX_EffectSelector;
+
+//! We use strings as parameter selectors.
+typedef const char* NvVFX_ParameterSelector;
+
+//! Each effect instantiation is associated with an opaque handle.
+struct NvVFX_Object;
+typedef struct NvVFX_Object NvVFX_Object, *NvVFX_Handle;
+
+//! Get the SDK version
+//! \param[in,out]  version    Pointer to an unsigned int set to 
+//!                            (major << 24) | (minor << 16) | (build << 8) | 0
+//! \return         NVCV_SUCCESS  if the version was set
+//! \return         NVCV_ERR_PARAMETER  if version was NULL
+NvCV_Status NvVFX_API NvVFX_GetVersion(unsigned int *version);
+
+//! Create an new instantiation of a video effect.
+//! \param[in]  code    the selector code for the desired video effect.
+//! \param[out] effect  a handle to the Video Effect instantiation.
+//! \return     NVCV_SUCCESS   if the operation was successful.
+NvCV_Status NvVFX_API NvVFX_CreateEffect(NvVFX_EffectSelector code, NvVFX_Handle *effect);
+
+
+//! Delete a previously allocated video effect.
+//! \param[in]  effect a handle to the video effect to be deleted.
+void NvVFX_API NvVFX_DestroyEffect(NvVFX_Handle effect);
+
+
+//! Set the value of the selected parameter (unsigned int, int, float double, unsigned long long, void*, CUstream).
+//! \param[in,out]  effect      The effect to configure.
+//! \param[in]      paramName   The selector of the effect parameter to configure.
+//! \param[in]      val         The value to be assigned to the selected effect parameter.
+//! \return NVCV_SUCCESS       if the operation was successful.
+//! \return NVCV_ERR_EFFECT    if an invalid effect handle was supplied.
+//! \return NVCV_ERR_SELECTOR  if the chosen effect does not understand the specified selector and data type.
+//! \return NVCV_ERR_PARAMETER if the value was out of range.
+NvCV_Status NvVFX_API NvVFX_SetU32(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, unsigned int val);
+NvCV_Status NvVFX_API NvVFX_SetS32(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, int val);
+NvCV_Status NvVFX_API NvVFX_SetF32(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, float val);
+NvCV_Status NvVFX_API NvVFX_SetF64(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, double val);
+NvCV_Status NvVFX_API NvVFX_SetU64(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, unsigned long long val);
+NvCV_Status NvVFX_API NvVFX_SetObject(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, void *ptr);
+NvCV_Status NvVFX_API NvVFX_SetCudaStream(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, CUstream stream);
+
+//! Set the selected image descriptor.
+//! A shallow copy of the descriptor is made (preserving the pixel pointer), so that an ephemeral NvVFXImage_Init()
+//! wrapper may be used in the call to NvVFX_SetImage() if desired, without having to preserve it for the lifetime
+//! of the effect. The effect does not take ownership of the pixel buffer.
+//! \param[in,out]  effect      The effect to configure.
+//! \param[in]      paramName   The selector of the effect image to configure.
+//! \param[in]      im          Pointer to the image descriptor to be used for the selected effect image.
+//!                             NULL clears the selected internal image descriptor.
+//! \return NVCV_SUCCESS       if the operation was successful.
+//! \return NVCV_ERR_EFFECT    if an invalid effect handle was supplied.
+//! \return NVCV_ERR_SELECTOR  if the chosen effect does not understand the specified image selector.
+//! \return NVCV_ERR_PARAMETER if an unexpected NULL pointer was supplied.
+NvCV_Status NvVFX_API NvVFX_SetImage(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, NvCVImage *im);
+
+//! Set the value of the selected string, by making a copy in the effect handle.
+//! \param[in,out]  effect      The effect to configure.
+//! \param[in]      paramName   The selector of the effect string to configure.
+//! \param[in]      str         The value to be assigned to the selected effect string. NULL clears the selected string.
+//! \return NVCV_SUCCESS       if the operation was successful.
+//! \return NVCV_ERR_EFFECT    if an invalid effect handle was supplied.
+//! \return NVCV_ERR_SELECTOR  if the chosen effect does not understand the specified string selector.
+//! \return NVCV_ERR_PARAMETER if an unexpected NULL pointer was supplied.
+NvCV_Status NvVFX_API NvVFX_SetString(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, const char *str);
+
+
+//! Get the value of the selected parameter (unsigned int, int, float double, unsigned long long, void*, CUstream).
+//! These are not typically used except for testing.
+//! \param[in]  effect    the effect to be queried.
+//! \param[in]  paramName the selector of the effect parameter to retrieve.
+//! \param[out] val       a place to store the retrieved parameter.
+//! \return NVCV_SUCCESS       if the operation was successful.
+//! \return NVCV_ERR_EFFECT    if an invalid effect handle was supplied.
+//! \return NVCV_ERR_SELECTOR  if the chosen effect does not understand the specified selector and data type.
+//! \return NVCV_ERR_PARAMETER if an unexpected NULL pointer was supplied.
+//! \note Typically, these are not used outside of testing.
+NvCV_Status NvVFX_API NvVFX_GetU32(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, unsigned int *val);
+NvCV_Status NvVFX_API NvVFX_GetS32(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, int *val);
+NvCV_Status NvVFX_API NvVFX_GetF32(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, float *val);
+NvCV_Status NvVFX_API NvVFX_GetF64(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, double *val);
+NvCV_Status NvVFX_API NvVFX_GetU64(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, unsigned long long *val);
+NvCV_Status NvVFX_API NvVFX_GetObject(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, void **ptr);
+NvCV_Status NvVFX_API NvVFX_GetCudaStream(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, CUstream *stream);
+
+//! Get a copy of the selected image descriptor.
+//! If GetImage() is called before SetImage(), the returned descriptor will be filled with zeros.
+//! Otherwise, the values will be identical to that in the previous SetImage() call,
+//! with the exception of deletePtr, deleteProc and bufferBytes, which will be 0.
+//! \param[in]  effect    the effect to be queried.
+//! \param[in]  paramName the selector of the effect image to retrieve.
+//! \param[out] val       a place to store the selected image descriptor.
+//!                       A pointer to an empty NvCVImage (deletePtr==NULL) should be supplied to avoid memory leaks.
+//! \return NVCV_SUCCESS       if the operation was successful.
+//! \return NVCV_ERR_EFFECT    if an invalid effect handle was supplied.
+//! \return NVCV_ERR_SELECTOR  if the chosen effect does not understand the specified image selector.
+//! \return NVCV_ERR_PARAMETER if an unexpected NULL pointer was supplied.
+//! \note Typically, this is not used outside of testing.
+NvCV_Status NvVFX_API NvVFX_GetImage(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, NvCVImage *im);
+
+//! Get the specified string.
+//! If GetString() is called before SetString(), the returned string will be empty.
+//! Otherwise, the string will be identical to that in the previous SetString() call,
+//! though it will be stored in a different location.
+//! \param[in]  effect    the effect to be queried.
+//! \param[in]  paramName the selector of the effect string to retrieve.
+//! \param[out] val       a place to store a pointer to the selected string.
+//! \return NVCV_SUCCESS       if the operation was successful.
+//! \return NVCV_ERR_EFFECT    if an invalid effect handle was supplied.
+//! \return NVCV_ERR_SELECTOR  if the chosen effect does not understand the specified string selector.
+//! \return NVCV_ERR_PARAMETER if an unexpected NULL pointer was supplied.
+//! \note Typically, this is not used outside of testing.
+NvCV_Status NvVFX_API NvVFX_GetString(NvVFX_Handle effect, NvVFX_ParameterSelector paramName, const char **str);
+
+//! Run the selected effect.
+//! \param[in]  effect     the effect object handle.
+//! \param[in]  async   run the effect asynchronously if nonzero; otherwise run synchronously.
+//! \todo       Should async instead be a pointer to a place to store a token that can be useful
+//!             for synchronizing two streams alter?
+//! \return     NVFVX_SUCCESS     if the operation was successful.
+//! \return     NVCV_ERR_EFFECT  if an invalid effect handle was supplied.
+NvCV_Status NvVFX_API NvVFX_Run(NvVFX_Handle effect, int async);
+
+//! Load the model based on the set params.
+//! \param[in]  effect     the effect object handle.
+//! \return     NVFVX_SUCCESS     if the operation was successful.
+//! \return     NVCV_ERR_EFFECT  if an invalid effect handle was supplied.
+NvCV_Status NvVFX_API NvVFX_Load(NvVFX_Handle effect);
+
+//! Wrapper for cudaStreamCreate(), if it is desired to avoid linking with the cuda lib.
+//! \param[out] stream  A place to store the newly allocated stream.
+//! \return     NVFVX_SUCCESS         if the operation was successful,
+//!             NVCV_ERR_CUDA_VALUE  if not.
+NvCV_Status NvVFX_API NvVFX_CudaStreamCreate(CUstream *stream);
+
+//! Wrapper for cudaStreamDestroy(), if it is desired to avoid linking with the cuda lib.
+//! \param[in]  stream  The stream to destroy.
+//! \return     NVFVX_SUCCESS         if the operation was successful,
+//!             NVCV_ERR_CUDA_VALUE  if not.
+NvCV_Status NvVFX_API NvVFX_CudaStreamDestroy(CUstream stream);
+
+
+// Filter selectors
+#define NVVFX_FX_TRANSFER               "Transfer"
+#define NVVFX_FX_GREEN_SCREEN           "GreenScreen"         // Green Screen 
+#define NVVFX_FX_BGBLUR                 "BackgroundBlur"     // Background blur
+#define NVVFX_FX_ARTIFACT_REDUCTION     "ArtifactReduction"   // Artifact Reduction  
+#define NVVFX_FX_SUPER_RES              "SuperRes"            // Super Res 
+#define NVVFX_FX_SR_UPSCALE             "Upscale"             // Super Res Upscale 
+#define NVVFX_FX_DENOISING              "Denoising"           // Denoising 
+
+// Parameter selectors
+#define NVVFX_INPUT_IMAGE_0             "SrcImage0"           //!< There may be multiple input images
+#define NVVFX_INPUT_IMAGE               NVVFX_INPUT_IMAGE_0   //!< but there is usually only one input image
+#define NVVFX_INPUT_IMAGE_1             "SrcImage1"           //!< Source Image 1
+#define NVVFX_OUTPUT_IMAGE_0            "DstImage0"           //!< There may be multiple output images
+#define NVVFX_OUTPUT_IMAGE              NVVFX_OUTPUT_IMAGE_0  //!< but there is usually only one output image
+#define NVVFX_MODEL_DIRECTORY           "ModelDir"            //!< The directory where the model may be found
+#define NVVFX_CUDA_STREAM               "CudaStream"          //!< The CUDA stream to use
+#define NVVFX_INFO                      "Info"                //!< Get info about the effects
+#define NVVFX_SCALE                     "Scale"               //!< Scale factor
+#define NVVFX_STRENGTH                  "Strength"            //!< Strength for different filters
+#define NVVFX_STRENGTH_LEVELS           "StrengthLevels"      //!< Number of strength levels
+#define NVVFX_MODE                      "Mode"                //!< Mode for different filters
+#define NVVFX_TEMPORAL                  "Temporal"            //!< Temporal mode: 0=image, 1=video
+#define NVVFX_GPU                       "GPU"                 //!< Preferred GPU (optional)
+#define NVVFX_BATCH_SIZE                "BatchSize"           //!< Batch Size (default 1)
+#define NVVFX_MODEL_BATCH               "ModelBatch"          //!< The preferred batching model to use (default 1)
+#define NVVFX_STATE                     "State"               //!< State variable  
+#define NVVFX_STATE_SIZE                "StateSize"           //!< Number of bytes needed to store state  
+
+
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#endif // __NVVIDEO_EFFECTS_H__
diff --git a/vfx/nvvfx/src/NVVideoEffectsProxy.cpp b/vfx/nvvfx/src/NVVideoEffectsProxy.cpp
new file mode 100644
index 0000000..c4770b9
--- /dev/null
+++ b/vfx/nvvfx/src/NVVideoEffectsProxy.cpp
@@ -0,0 +1,258 @@
+/*###############################################################################
+#
+# Copyright (c) 2020 NVIDIA Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+###############################################################################*/
+#include <string>
+
+#include "nvVideoEffects.h"
+
+#ifdef _WIN32
+  #define _WINSOCKAPI_
+  #include <windows.h>
+  #include <tchar.h>
+#else // !_WIN32
+  #include <dlfcn.h>
+  typedef void* HMODULE;
+  typedef void* HANDLE;
+  typedef void* HINSTANCE;
+#endif // _WIN32
+
+// Parameter string does not include the file extension
+#ifdef _WIN32
+#define nvLoadLibrary(library) LoadLibrary(TEXT(library ".dll"))
+#else // !_WIN32
+#define nvLoadLibrary(library) dlopen("lib" library ".so", RTLD_LAZY)
+#endif // _WIN32
+
+
+inline void* nvGetProcAddress(HINSTANCE handle, const char* proc) {
+  if (nullptr == handle) return nullptr;
+#ifdef _WIN32
+  return (void *)GetProcAddress(handle, proc);
+#else // !_WIN32
+  return dlsym(handle, proc);
+#endif // _WIN32
+}
+
+inline int nvFreeLibrary(HINSTANCE handle) {
+#ifdef _WIN32
+  return FreeLibrary(handle);
+#else
+  return dlclose(handle);
+#endif
+}
+
+HINSTANCE getNvVfxLib() {
+
+  TCHAR path[MAX_PATH], fullPath[MAX_PATH];
+
+  // There can be multiple apps on the system,
+  // some might include the SDK in the app package and
+  // others might expect the SDK to be installed in Program Files
+  GetEnvironmentVariable(TEXT("NV_VIDEO_EFFECTS_PATH"), path, MAX_PATH);
+  if (_tcscmp(path, TEXT("USE_APP_PATH"))) {
+    // App has not set environment variable to "USE_APP_PATH"
+    // So pick up the SDK dll and dependencies from Program Files
+    GetEnvironmentVariable(TEXT("ProgramFiles"), path, MAX_PATH);
+    size_t max_len = sizeof(fullPath)/sizeof(TCHAR);
+    _stprintf_s(fullPath, max_len, TEXT("%s\\NVIDIA Corporation\\NVIDIA Video Effects\\"), path);
+    SetDllDirectory(fullPath);
+  }
+  static const HINSTANCE NvVfxLib = nvLoadLibrary("NVVideoEffects");
+  return NvVfxLib;
+}
+
+NvCV_Status NvVFX_API NvVFX_GetVersion(unsigned int* version) {
+  static const auto funcPtr = (decltype(NvVFX_GetVersion)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetVersion");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(version);
+}
+
+NvCV_Status NvVFX_API NvVFX_CreateEffect(NvVFX_EffectSelector code, NvVFX_Handle* obj) {
+  static const auto funcPtr = (decltype(NvVFX_CreateEffect)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_CreateEffect");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(code, obj);
+}
+
+void NvVFX_API NvVFX_DestroyEffect(NvVFX_Handle obj) {
+  static const auto funcPtr = (decltype(NvVFX_DestroyEffect)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_DestroyEffect");
+
+  if (nullptr != funcPtr) funcPtr(obj);
+}
+
+NvCV_Status NvVFX_API NvVFX_SetU32(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, unsigned int val) {
+  static const auto funcPtr = (decltype(NvVFX_SetU32)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetU32");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, val);
+}
+
+NvCV_Status NvVFX_API NvVFX_SetS32(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, int val) {
+  static const auto funcPtr = (decltype(NvVFX_SetS32)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetS32");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, val);
+}
+
+NvCV_Status NvVFX_API NvVFX_SetF32(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, float val) {
+  static const auto funcPtr = (decltype(NvVFX_SetF32)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetF32");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, val);
+}
+
+NvCV_Status NvVFX_API NvVFX_SetF64(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, double val) {
+  static const auto funcPtr = (decltype(NvVFX_SetF64)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetF64");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, val);
+}
+
+NvCV_Status NvVFX_API NvVFX_SetU64(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, unsigned long long val) {
+  static const auto funcPtr = (decltype(NvVFX_SetU64)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetU64");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, val);
+}
+
+NvCV_Status NvVFX_API NvVFX_SetImage(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, NvCVImage* im) {
+  static const auto funcPtr = (decltype(NvVFX_SetImage)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetImage");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, im);
+}
+
+NvCV_Status NvVFX_API NvVFX_SetObject(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, void* ptr) {
+  static const auto funcPtr = (decltype(NvVFX_SetObject)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetObject");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, ptr);
+}
+
+NvCV_Status NvVFX_API NvVFX_SetString(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, const char* str) {
+  static const auto funcPtr = (decltype(NvVFX_SetString)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetString");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, str);
+}
+
+NvCV_Status NvVFX_API NvVFX_SetCudaStream(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, CUstream stream) {
+  static const auto funcPtr = (decltype(NvVFX_SetCudaStream)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_SetCudaStream");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, stream);
+}
+
+NvCV_Status NvVFX_API NvVFX_GetU32(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, unsigned int* val) {
+  static const auto funcPtr = (decltype(NvVFX_GetU32)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetU32");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, val);
+}
+
+NvCV_Status NvVFX_API NvVFX_GetS32(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, int* val) {
+  static const auto funcPtr = (decltype(NvVFX_GetS32)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetS32");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, val);
+}
+
+NvCV_Status NvVFX_API NvVFX_GetF32(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, float* val) {
+  static const auto funcPtr = (decltype(NvVFX_GetF32)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetF32");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, val);
+}
+
+NvCV_Status NvVFX_API NvVFX_GetF64(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, double* val) {
+  static const auto funcPtr = (decltype(NvVFX_GetF64)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetF64");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, val);
+}
+
+NvCV_Status NvVFX_API NvVFX_GetU64(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, unsigned long long* val) {
+  static const auto funcPtr = (decltype(NvVFX_GetU64)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetU64");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, val);
+}
+
+NvCV_Status NvVFX_API NvVFX_GetImage(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, NvCVImage* im) {
+  static const auto funcPtr = (decltype(NvVFX_GetImage)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetImage");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, im);
+}
+
+NvCV_Status NvVFX_API NvVFX_GetObject(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, void** ptr) {
+  static const auto funcPtr = (decltype(NvVFX_GetObject)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetObject");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, ptr);
+}
+
+NvCV_Status NvVFX_API NvVFX_GetString(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, const char** str) {
+  static const auto funcPtr = (decltype(NvVFX_GetString)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetString");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, str);
+}
+
+NvCV_Status NvVFX_API NvVFX_GetCudaStream(NvVFX_Handle obj, NvVFX_ParameterSelector paramName, CUstream* stream) {
+  static const auto funcPtr = (decltype(NvVFX_GetCudaStream)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_GetCudaStream");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, paramName, stream);
+}
+
+NvCV_Status NvVFX_API NvVFX_Run(NvVFX_Handle obj, int async) {
+  static const auto funcPtr = (decltype(NvVFX_Run)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_Run");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj, async);
+}
+
+NvCV_Status NvVFX_API NvVFX_Load(NvVFX_Handle obj) {
+  static const auto funcPtr = (decltype(NvVFX_Load)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_Load");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(obj);
+}
+
+NvCV_Status NvVFX_API NvVFX_CudaStreamCreate(CUstream* stream) {
+  static const auto funcPtr =
+      (decltype(NvVFX_CudaStreamCreate)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_CudaStreamCreate");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(stream);
+}
+
+NvCV_Status NvVFX_API NvVFX_CudaStreamDestroy(CUstream stream) {
+  static const auto funcPtr =
+      (decltype(NvVFX_CudaStreamDestroy)*)nvGetProcAddress(getNvVfxLib(), "NvVFX_CudaStreamDestroy");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(stream);
+}
+
diff --git a/vfx/nvvfx/src/nvCVImageProxy.cpp b/vfx/nvvfx/src/nvCVImageProxy.cpp
new file mode 100644
index 0000000..a6694da
--- /dev/null
+++ b/vfx/nvvfx/src/nvCVImageProxy.cpp
@@ -0,0 +1,311 @@
+#if defined(linux) || defined(unix) || defined(__linux)
+#warning nvCVImageProxy.cpp not ported
+#else
+/*###############################################################################
+#
+# Copyright 2020 NVIDIA Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+###############################################################################*/
+#include <string>
+#include "nvCVImage.h"
+
+#ifdef _WIN32
+  #define _WINSOCKAPI_
+  #include <windows.h>
+  #include <tchar.h>
+  #include "nvTransferD3D.h"
+  #include "nvTransferD3D11.h"
+#else // !_WIN32
+  #include <dlfcn.h>
+  typedef void* HMODULE;
+  typedef void* HANDLE;
+  typedef void* HINSTANCE;
+#endif // _WIN32
+
+// Parameter string does not include the file extension
+#ifdef _WIN32
+#define nvLoadLibrary(library) LoadLibrary(TEXT(library ".dll"))
+#else // !_WIN32
+#define nvLoadLibrary(library) dlopen("lib" library ".so", RTLD_LAZY)
+#endif // _WIN32
+
+
+inline void* nvGetProcAddress(HINSTANCE handle, const char* proc) {
+  if (nullptr == handle) return nullptr;
+#ifdef _WIN32
+  return (void *)GetProcAddress(handle, proc);
+#else // !_WIN32
+  return dlsym(handle, proc);
+#endif // _WIN32
+}
+
+inline int nvFreeLibrary(HINSTANCE handle) {
+#ifdef _WIN32
+  return FreeLibrary(handle);
+#else
+  return dlclose(handle);
+#endif
+}
+
+HINSTANCE getNvCVImageLib() {
+  TCHAR path[MAX_PATH], tmpPath[MAX_PATH], fullPath[MAX_PATH];
+  static HINSTANCE nvCVImageLib = NULL;
+  static bool bSDKPathSet = false;  
+  if (!bSDKPathSet) {
+    // There can be multiple apps on the system,
+    // some might include the SDK in the app package and
+    // others might expect the SDK to be installed in Program Files
+    GetEnvironmentVariable(TEXT("NV_VIDEO_EFFECTS_PATH"), path, MAX_PATH);
+    GetEnvironmentVariable(TEXT("NV_AR_SDK_PATH"), tmpPath, MAX_PATH);
+    if (_tcscmp(path, TEXT("USE_APP_PATH")) && _tcscmp(tmpPath, TEXT("USE_APP_PATH"))) {
+      // App has not set environment variable to "USE_APP_PATH"
+      // So pick up the SDK dll and dependencies from Program Files
+      GetEnvironmentVariable(TEXT("ProgramFiles"), path, MAX_PATH);
+      size_t max_len = sizeof(fullPath) / sizeof(TCHAR);
+      _stprintf_s(fullPath, max_len, TEXT("%s\\NVIDIA Corporation\\NVIDIA Video Effects\\"), path);
+      SetDllDirectory(fullPath);
+      nvCVImageLib = nvLoadLibrary("NVCVImage");
+      if (!nvCVImageLib) {
+        _stprintf_s(fullPath, max_len, TEXT("%s\\NVIDIA Corporation\\NVIDIA AR SDK\\"), path);
+        SetDllDirectory(fullPath);
+        nvCVImageLib = nvLoadLibrary("NVCVImage");
+      }
+    }
+    bSDKPathSet = true;
+  }
+  return nvCVImageLib;
+}
+ 
+NvCV_Status NvCV_API NvCVImage_Init(NvCVImage* im, unsigned width, unsigned height, int pitch, void* pixels,
+                                       NvCVImage_PixelFormat format, NvCVImage_ComponentType type, unsigned isPlanar,
+                                       unsigned onGPU) {
+  static const auto funcPtr = (decltype(NvCVImage_Init)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Init");
+  
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(im, width, height, pitch, pixels, format, type, isPlanar, onGPU);
+}
+
+void NvCV_API NvCVImage_InitView(NvCVImage* subImg, NvCVImage* fullImg, int x, int y, unsigned width,
+                                   unsigned height) {
+  static const auto funcPtr = (decltype(NvCVImage_InitView)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_InitView");
+ 
+  if (nullptr != funcPtr) funcPtr(subImg, fullImg, x, y, width, height);
+}
+
+NvCV_Status NvCV_API NvCVImage_Alloc(NvCVImage* im, unsigned width, unsigned height, NvCVImage_PixelFormat format,
+                              NvCVImage_ComponentType type, unsigned isPlanar, unsigned onGPU, unsigned alignment) {
+  static const auto funcPtr = (decltype(NvCVImage_Alloc)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Alloc");
+  
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(im, width, height, format, type, isPlanar, onGPU, alignment);
+}
+
+NvCV_Status NvCV_API NvCVImage_Realloc(NvCVImage* im, unsigned width, unsigned height,
+                                          NvCVImage_PixelFormat format, NvCVImage_ComponentType type,
+                                          unsigned isPlanar, unsigned onGPU, unsigned alignment) {
+  static const auto funcPtr = (decltype(NvCVImage_Realloc)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Realloc");
+  
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(im, width, height, format, type, isPlanar, onGPU, alignment);
+}
+
+void NvCV_API NvCVImage_Dealloc(NvCVImage* im) {
+  static const auto funcPtr = (decltype(NvCVImage_Dealloc)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Dealloc");
+  
+  if (nullptr != funcPtr) funcPtr(im);
+}
+
+NvCV_Status NvCV_API NvCVImage_Create(unsigned width, unsigned height, NvCVImage_PixelFormat format,
+                                         NvCVImage_ComponentType type, unsigned isPlanar, unsigned onGPU,
+                                         unsigned alignment, NvCVImage** out) {
+  static const auto funcPtr = (decltype(NvCVImage_Create)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Create");
+  
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(width, height, format, type, isPlanar, onGPU, alignment, out);
+}
+
+void NvCV_API NvCVImage_Destroy(NvCVImage* im) {
+  static const auto funcPtr = (decltype(NvCVImage_Destroy)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Destroy");
+  
+  if (nullptr != funcPtr) funcPtr(im);
+}
+
+void NvCV_API NvCVImage_ComponentOffsets(NvCVImage_PixelFormat format, int* rOff, int* gOff, int* bOff, int* aOff,
+                                           int* yOff) {
+  static const auto funcPtr =
+      (decltype(NvCVImage_ComponentOffsets)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_ComponentOffsets");
+  
+  if (nullptr != funcPtr) funcPtr(format, rOff, gOff, bOff, aOff, yOff);
+}
+
+NvCV_Status NvCV_API NvCVImage_Transfer(const NvCVImage* src, NvCVImage* dst, float scale, CUstream_st* stream,
+                                           NvCVImage* tmp) {
+  static const auto funcPtr = (decltype(NvCVImage_Transfer)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Transfer");
+  
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(src, dst, scale, stream, tmp);
+}
+
+NvCV_Status NvCV_API NvCVImage_TransferRect(const NvCVImage *src, const NvCVRect2i *srcRect, NvCVImage *dst,
+  const NvCVPoint2i *dstPt, float scale, struct CUstream_st *stream, NvCVImage *tmp) {
+  static const auto funcPtr = (decltype(NvCVImage_TransferRect)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_TransferRect");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(src, srcRect, dst, dstPt, scale, stream, tmp);
+}
+
+NvCV_Status NvCV_API NvCVImage_TransferFromYUV(const void *y, int yPixBytes, int yPitch, const void *u, const void *v,
+  int uvPixBytes, int uvPitch, NvCVImage_PixelFormat yuvFormat, NvCVImage_ComponentType yuvType, unsigned yuvColorSpace,
+  unsigned yuvMemSpace, NvCVImage *dst, const NvCVRect2i *dstRect, float scale, struct CUstream_st *stream, NvCVImage *tmp) {
+  static const auto funcPtr = (decltype(NvCVImage_TransferFromYUV)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_TransferFromYUV");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(y, yPixBytes, yPitch, u, v, uvPixBytes, uvPitch, yuvFormat, yuvType, yuvColorSpace, yuvMemSpace, dst,
+    dstRect, scale, stream, tmp);
+}
+
+NvCV_Status NvCV_API NvCVImage_TransferToYUV(const NvCVImage *src, const NvCVRect2i *srcRect, 
+  const void *y, int yPixBytes, int yPitch, const void *u, const void *v, int uvPixBytes, int uvPitch,
+  NvCVImage_PixelFormat yuvFormat, NvCVImage_ComponentType yuvType, unsigned yuvColorSpace, unsigned yuvMemSpace,
+  float scale, struct CUstream_st *stream, NvCVImage *tmp) {
+  static const auto funcPtr = (decltype(NvCVImage_TransferToYUV)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_TransferToYUV");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(src, srcRect, y, yPixBytes, yPitch, u, v, uvPixBytes, uvPitch, yuvFormat, yuvType, yuvColorSpace, yuvMemSpace, scale, stream, tmp);
+}
+
+NvCV_Status NvCV_API NvCVImage_MapResource(NvCVImage *im, struct CUstream_st *stream) {
+  static const auto funcPtr = (decltype(NvCVImage_MapResource)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_MapResource");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(im, stream);
+}
+
+NvCV_Status NvCV_API NvCVImage_UnmapResource(NvCVImage *im, struct CUstream_st *stream) {
+  static const auto funcPtr = (decltype(NvCVImage_UnmapResource)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_UnmapResource");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(im, stream);
+}
+
+#if RTX_CAMERA_IMAGE == 0
+NvCV_Status NvCV_API NvCVImage_Composite(const NvCVImage* fg, const NvCVImage* bg, const NvCVImage* mat, NvCVImage* dst,
+    struct CUstream_st *stream) {
+  static const auto funcPtr = (decltype(NvCVImage_Composite)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Composite");
+   
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(fg, bg, mat, dst, stream);
+}
+#else //  RTX_CAMERA_IMAGE == 1
+NvCV_Status NvCV_API NvCVImage_Composite(const NvCVImage* fg, const NvCVImage* bg, const NvCVImage* mat, NvCVImage* dst) {
+  static const auto funcPtr = (decltype(NvCVImage_Composite)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_Composite");
+   
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(fg, bg, mat, dst);
+}
+#endif //  RTX_CAMERA_IMAGE
+
+NvCV_Status NvCV_API NvCVImage_CompositeRect(
+      const NvCVImage *fg,  const NvCVPoint2i *fgOrg,
+      const NvCVImage *bg,  const NvCVPoint2i *bgOrg,
+      const NvCVImage *mat, unsigned mode,
+      NvCVImage       *dst, const NvCVPoint2i *dstOrg,
+      struct CUstream_st *stream) {
+  static const auto funcPtr = (decltype(NvCVImage_CompositeRect)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_CompositeRect");
+   
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(fg, fgOrg, bg, bgOrg, mat, mode, dst, dstOrg, stream);
+}
+
+NvCV_Status NvCV_API NvCVImage_CompositeOverConstant(const NvCVImage* src, const NvCVImage* mat,
+                                                        const unsigned char bgColor[3], NvCVImage* dst) {
+  static const auto funcPtr =
+      (decltype(NvCVImage_CompositeOverConstant)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_CompositeOverConstant");
+   
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(src, mat, bgColor, dst);
+}
+
+NvCV_Status NvCV_API NvCVImage_FlipY(const NvCVImage* src, NvCVImage* dst) {
+  static const auto funcPtr = (decltype(NvCVImage_FlipY)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_FlipY");
+   
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(src, dst);
+}
+
+#ifdef _WIN32
+__declspec(dllexport) const char* __cdecl
+#else
+const char*
+#endif  // _WIN32 or linux
+    NvCV_GetErrorStringFromCode(NvCV_Status code) {
+  static const auto funcPtr =
+      (decltype(NvCV_GetErrorStringFromCode)*)nvGetProcAddress(getNvCVImageLib(), "NvCV_GetErrorStringFromCode");
+  
+  if (nullptr == funcPtr) return "Cannot find nvCVImage DLL or its dependencies";
+  return funcPtr(code);
+}
+
+
+
+#ifdef _WIN32 // Direct 3D
+
+NvCV_Status NvCV_API NvCVImage_InitFromD3D11Texture(NvCVImage *im, struct ID3D11Texture2D *tx) {
+  static const auto funcPtr = (decltype(NvCVImage_InitFromD3D11Texture)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_InitFromD3D11Texture");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(im, tx);
+}
+
+NvCV_Status NvCV_API NvCVImage_ToD3DFormat(NvCVImage_PixelFormat format, NvCVImage_ComponentType type, unsigned layout, DXGI_FORMAT *d3dFormat) {
+  static const auto funcPtr = (decltype(NvCVImage_ToD3DFormat)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_ToD3DFormat");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(format, type, layout, d3dFormat);
+}
+
+NvCV_Status NvCV_API NvCVImage_FromD3DFormat(DXGI_FORMAT d3dFormat, NvCVImage_PixelFormat *format, NvCVImage_ComponentType *type, unsigned char *layout) {
+  static const auto funcPtr = (decltype(NvCVImage_FromD3DFormat)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_FromD3DFormat");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(d3dFormat, format, type, layout);
+}
+
+#ifdef __dxgicommon_h__
+
+NvCV_Status NvCV_API NvCVImage_ToD3DColorSpace(unsigned char nvcvColorSpace, DXGI_COLOR_SPACE_TYPE *pD3dColorSpace) {
+  static const auto funcPtr = (decltype(NvCVImage_ToD3DColorSpace)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_ToD3DColorSpace");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(nvcvColorSpace, pD3dColorSpace);
+}
+
+NvCV_Status NvCV_API NvCVImage_FromD3DColorSpace(DXGI_COLOR_SPACE_TYPE d3dColorSpace, unsigned char *pNvcvColorSpace) {
+  static const auto funcPtr = (decltype(NvCVImage_FromD3DColorSpace)*)nvGetProcAddress(getNvCVImageLib(), "NvCVImage_FromD3DColorSpace");
+
+  if (nullptr == funcPtr) return NVCV_ERR_LIBRARY;
+  return funcPtr(d3dColorSpace, pNvcvColorSpace);
+}
+
+#endif // __dxgicommon_h__
+
+#endif // _WIN32 Direct 3D
+
+#endif // enabling for this file
diff --git a/vfx/vfx.cc b/vfx/vfx.cc
new file mode 100644
index 0000000..0dd0ed2
--- /dev/null
+++ b/vfx/vfx.cc
@@ -0,0 +1,265 @@
+#include <memory>
+#include <mutex>
+#include <utility>
+#include <string>
+#include <stdexcept>
+#include <vector>
+#include <stdio.h>
+#include <assert.h>
+
+#include "VapourSynth.h"
+#include "VSHelper.h"
+
+#ifndef _WIN32
+#error "Unsupported platform"
+#else
+static std::vector<std::string> autoDllErrors;
+#define CUDA_DLL L"nvcuda.dll","nvcuda.dll",autoDllErrors
+#endif
+#include "../ngx/cuda.h"
+
+#include "nvvfx/include/nvCVStatus.h"
+#include "nvvfx/include/nvCVImage.h"
+#include "nvvfx/include/nvVideoEffects.h"
+
+#define CK_VFX(x) do { \
+    NvCV_Status r = (x); \
+    if (r != NVCV_SUCCESS) { \
+        fprintf(stderr, "failed VFX call %s: %x (%s)\n", #x, r, NvCV_GetErrorStringFromCode(r)); \
+        abort(); \
+    } \
+} while (0)
+#define CK_CUDA(x) do { \
+    int r = (x); \
+    if (r != CUDA_SUCCESS) { \
+        fprintf(stderr, "failed cuda call %s: %d\n", #x, r); \
+        abort(); \
+    } \
+} while (0)
+
+struct VfxData {
+    std::mutex lock;
+
+    VSNodeRef *node;
+    VSVideoInfo vi;
+    double scale;
+    double strength;
+
+    int in_width, in_height;
+
+    NvVFX_Handle vfx;
+    CUstream stream;
+    CUdeviceptr state;
+
+    NvCVImage srcCpuImg, srcGpuImg;
+    NvCVImage dstCpuImg, dstGpuImg;
+
+    typedef float T;
+    uint64_t in_image_width() const   { return in_width; }
+    uint64_t out_image_width() const  { return vi.width; }
+    uint64_t in_image_height() const  { return in_height; }
+    uint64_t out_image_height() const { return vi.height; }
+
+    VfxData() : node(nullptr), vi(), scale(0), strength(0), vfx(nullptr), stream(nullptr), state(nullptr) {}
+    ~VfxData() {
+        if (vfx) NvVFX_DestroyEffect(vfx);
+        if (stream) NvVFX_CudaStreamDestroy(stream);
+        if (state) cuMemFree_v2(state);
+        NvCVImage_Dealloc(&srcCpuImg);
+        NvCVImage_Dealloc(&srcGpuImg);
+        NvCVImage_Dealloc(&dstCpuImg);
+        NvCVImage_Dealloc(&dstGpuImg);
+    }
+};
+
+static void VS_CC vfxInit(VSMap *in, VSMap *out, void **instanceData, VSNode *node, VSCore *core, const VSAPI *vsapi) {
+    VfxData *d = static_cast<VfxData *>(*instanceData);
+    vsapi->setVideoInfo(&d->vi, 1, node);
+}
+
+static const VSFrameRef *VS_CC vfxGetFrame(int n, int activationReason, void **instanceData, void **frameData, VSFrameContext *frameCtx, VSCore *core, const VSAPI *vsapi) {
+    VfxData *d = static_cast<VfxData *>(*instanceData);
+
+    if (activationReason == arInitial) {
+        vsapi->requestFrameFilter(n, d->node, frameCtx);
+    } else if (activationReason == arAllFramesReady) {
+        const VSFrameRef *src = vsapi->getFrameFilter(n, d->node, frameCtx);
+
+        const VSFormat *fi = d->vi.format;
+        assert(vsapi->getFrameHeight(src, 0) == (int)d->in_image_height());
+        assert(vsapi->getFrameWidth(src, 0) == (int)d->in_image_width());
+        int planes[3] = { 0, 1, 2 };
+        const VSFrameRef *srcf[3] = { nullptr, nullptr, nullptr };
+        VSFrameRef *dst = vsapi->newVideoFrame2(fi, d->out_image_width(), d->out_image_height(), srcf, planes, src, core);
+
+        std::lock_guard<std::mutex> lock(d->lock);
+
+        typedef VfxData::T T;
+        T *host = (T *)d->srcCpuImg.pixels;
+        for (int plane = 0; plane < 3; plane++) {
+            const size_t stride = vsapi->getStride(src, plane);
+            const uint8_t *ptr = (uint8_t*)vsapi->getReadPtr(src, plane);
+            const size_t w = d->in_image_width(), h = d->in_image_height();
+            for (size_t i = 0; i < h; i++)
+                for (size_t j = 0; j < w; j++)
+                    host[plane * h * w + i * w + j] = *(T*)&ptr[i * stride + j * sizeof(T)];
+        }
+
+        CK_VFX(NvCVImage_Transfer(&d->srcCpuImg, &d->srcGpuImg, 1.0f, d->stream, nullptr));
+
+        CK_VFX(NvVFX_Run(d->vfx, 0));
+
+        CK_VFX(NvCVImage_Transfer(&d->dstGpuImg, &d->dstCpuImg, 1.0f, d->stream, nullptr));
+
+        host = (T *)d->dstCpuImg.pixels;
+        for (int plane = 0; plane < 3; plane++) {
+            const size_t stride = vsapi->getStride(dst, plane);
+            uint8_t *ptr = (uint8_t*)vsapi->getWritePtr(dst, plane);
+            const size_t w = d->out_image_width(), h = d->out_image_height();
+            for (size_t i = 0; i < h; i++)
+                for (size_t j = 0; j < d->out_image_width(); j++)
+                    *(T*)&ptr[i * stride + j * sizeof(T)] = host[plane * h * w + i * w + j];
+        }
+
+        vsapi->freeFrame(src);
+        return dst;
+    }
+
+    return nullptr;
+}
+
+static void VS_CC vfxFree(void *instanceData, VSCore *core, const VSAPI *vsapi) {
+    VfxData *d = static_cast<VfxData *>(instanceData);
+    vsapi->freeNode(d->node);
+
+    delete d;
+}
+
+static void VS_CC vfxCreate(const VSMap *in, VSMap *out, void *userData, VSCore *core, const VSAPI *vsapi) {
+    std::unique_ptr<VfxData> d(new VfxData);
+    int err;
+
+    try {
+        if (autoDllErrors.size() > 0) {
+            std::string error, last;
+            for (const auto &s: autoDllErrors) {
+                if (error.size()) {
+                    if (last != s)
+                        error += "; " + s;
+                } else
+                    error = s;
+                last = s;
+            }
+            throw std::runtime_error(error);
+        }
+
+        d->node = vsapi->propGetNode(in, "clip", 0, &err);
+        d->vi = *vsapi->getVideoInfo(d->node);
+
+        if (!isConstantFormat(&d->vi)) {
+            throw std::runtime_error("Only clips with constant format and dimensions allowed");
+        }
+        if (d->vi.format->numPlanes != 3 || d->vi.format->colorFamily != cmRGB)
+            throw std::runtime_error("input clip must be RGB format");
+        if (d->vi.format->sampleType != stFloat || d->vi.format->bitsPerSample != 32)
+            throw std::runtime_error("input clip must be 32-bit float format");
+
+        enum { OP_AR, OP_SUPERRES, OP_DENOISE };
+        const NvVFX_EffectSelector selectors[] = { NVVFX_FX_ARTIFACT_REDUCTION, NVVFX_FX_SUPER_RES, NVVFX_FX_DENOISING };
+        size_t op = int64ToIntS(vsapi->propGetInt(in, "op", 0, &err));
+        if (err) throw std::runtime_error("op is required argument");
+        if (op >= sizeof selectors / sizeof selectors[0])
+            throw std::runtime_error("op is out of range.");
+
+        if (op != OP_SUPERRES)
+            d->scale = 1;
+        else {
+            double scale = vsapi->propGetFloat(in, "scale", 0, &err);
+            if (err) scale = 1;
+            if (scale < 1)
+                throw std::runtime_error("invalid scale parameter");
+            d->scale = scale;
+        }
+
+        double strength = vsapi->propGetFloat(in, "strength", 0, &err);
+        if (err) strength = 0;
+        d->strength = strength;
+
+        const char *modelDir = getenv("MODEL_DIR"); // TODO: configurable model directory?
+        if (modelDir == nullptr)
+            modelDir = "C:\\Program Files\\NVIDIA Corporation\\NVIDIA Video Effects\\models";
+        fprintf(stderr, "MODEL_DIR = %s\n", modelDir);
+
+        NvCV_Status r = NvVFX_CreateEffect(selectors[op], &d->vfx);
+        if (r != NVCV_SUCCESS) {
+            const char *err = NvCV_GetErrorStringFromCode(r);
+            fprintf(stderr, "NvVFX_CreateEffect failed: %x (%s)\n", r, err);
+            throw std::runtime_error("unable to create effect: " + std::string(err));
+        }
+
+        CK_VFX(NvVFX_CudaStreamCreate(&d->stream));
+        CK_VFX(NvVFX_SetCudaStream(d->vfx, NVVFX_CUDA_STREAM, d->stream));
+
+        if (op == OP_AR || op == OP_SUPERRES)
+            r = NvVFX_SetU32(d->vfx, NVVFX_STRENGTH, int(d->strength));
+        else if (op == OP_DENOISE)
+            r = NvVFX_SetF32(d->vfx, NVVFX_STRENGTH, d->strength);
+        else assert(false);
+        if (r != NVCV_SUCCESS) {
+            const char *err = NvCV_GetErrorStringFromCode(r);
+            fprintf(stderr, "NvVFX set strength failed: %x (%s)\n", r, err);
+            throw std::runtime_error("failed to set strength: " + std::string(err));
+        }
+
+        r = NvVFX_SetString(d->vfx, NVVFX_MODEL_DIRECTORY, modelDir);
+        if (r != NVCV_SUCCESS) {
+            fprintf(stderr, "NvVFX set model directory to %s failed: %x (%s)\n", modelDir, r, NvCV_GetErrorStringFromCode(r));
+            throw std::runtime_error("unable to set model directory " + std::string(modelDir));
+        }
+
+        if (op == OP_DENOISE) {
+            unsigned int stateSizeInBytes = 0;
+            CK_VFX(NvVFX_GetU32(d->vfx, NVVFX_STATE_SIZE, &stateSizeInBytes));
+            CK_CUDA(cuMemAlloc_v2(&d->state, stateSizeInBytes));
+            CK_CUDA(cuMemsetD8Async(d->state, 0, stateSizeInBytes, d->stream));
+            void *stateArray[1] = { d->state };
+            CK_VFX(NvVFX_SetObject(d->vfx, NVVFX_STATE, (void*)stateArray));
+        }
+    } catch (std::runtime_error &e) {
+        if (d->node)
+            vsapi->freeNode(d->node);
+        vsapi->setError(out, (std::string{ "DLVFX: " } + e.what()).c_str());
+        return;
+    }
+
+    d->in_width = d->vi.width;
+    d->in_height = d->vi.height;
+    d->vi.width *= d->scale;
+    d->vi.height *= d->scale;
+
+    CK_VFX(NvCVImage_Alloc(&d->srcCpuImg, d->in_image_width(), d->in_image_height(), NVCV_RGB, NVCV_F32, NVCV_PLANAR, NVCV_CPU, 1));
+    CK_VFX(NvCVImage_Alloc(&d->srcGpuImg, d->in_image_width(), d->in_image_height(), NVCV_BGR, NVCV_F32, NVCV_PLANAR, NVCV_GPU, 1));
+    CK_VFX(NvCVImage_Alloc(&d->dstCpuImg, d->out_image_width(), d->out_image_height(), NVCV_RGB, NVCV_F32, NVCV_PLANAR, NVCV_CPU, 1));
+    CK_VFX(NvCVImage_Alloc(&d->dstGpuImg, d->out_image_width(), d->out_image_height(), NVCV_BGR, NVCV_F32, NVCV_PLANAR, NVCV_GPU, 1));
+
+    CK_VFX(NvVFX_SetImage(d->vfx, NVVFX_INPUT_IMAGE, &d->srcGpuImg));
+    CK_VFX(NvVFX_SetImage(d->vfx, NVVFX_OUTPUT_IMAGE, &d->dstGpuImg));
+
+    CK_VFX(NvVFX_Load(d->vfx));
+
+    vsapi->createFilter(in, out, "DLVFX", vfxInit, vfxGetFrame, vfxFree, fmParallel, 0, d.release(), core);
+}
+
+//////////////////////////////////////////
+// Init
+
+#ifndef STANDALONE_VFX
+void VS_CC vfxInitialize(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin) {
+#else
+VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin *plugin) {
+    configFunc("info.akarin.plugin", "akarin2", "Experimental Nvidia Maxine plugin", VAPOURSYNTH_API_VERSION, 1, plugin);
+#endif
+    unsigned int version = 0;
+    if (NvVFX_GetVersion(&version) == NVCV_SUCCESS)
+        registerFunc("DLVFX", "clip:clip;op:int;scale:float:opt;strength:float:opt", vfxCreate, nullptr, plugin);
+}