diff --git a/avisynth/src/TCannyMod.vcxproj b/avisynth/src/TCannyMod.vcxproj index a412b34..f80c6a7 100644 --- a/avisynth/src/TCannyMod.vcxproj +++ b/avisynth/src/TCannyMod.vcxproj @@ -140,15 +140,15 @@ + + + - - - diff --git a/avisynth/src/edge_detection.h b/avisynth/src/edge_detection.cpp similarity index 86% rename from avisynth/src/edge_detection.h rename to avisynth/src/edge_detection.cpp index 69b7bb2..d43deff 100644 --- a/avisynth/src/edge_detection.h +++ b/avisynth/src/edge_detection.cpp @@ -1,5 +1,5 @@ /* - edge_detection.h + edge_detection.cpp This file is part of TCannyMod @@ -23,11 +23,12 @@ */ -#ifndef EDGE_DETECTION_H -#define EDGE_DETECTION_H #include #include +#include +#include +#include "tcannymod.h" #include "simd.h" @@ -273,10 +274,37 @@ non_max_suppress(const float* emaskp, const size_t em_pitch, } -void __stdcall -hysteresis(uint8_t* hystp, const size_t hpitch, float* blurp, - const size_t bpitch, const int width, const int height, - const float tmin, const float tmax) noexcept; +edge_detection_t +get_edge_detection(bool use_sobel, bool calc_dir, arch_t arch) noexcept +{ + using std::make_tuple; + std::map, edge_detection_t> func; + + func[make_tuple(false, false, HAS_SSE2)] = standard<__m128, __m128i, false>; + func[make_tuple(false, true, HAS_SSE2)] = standard<__m128, __m128i, true>; + func[make_tuple(true, false, HAS_SSE2)] = sobel<__m128, __m128i, false>; + func[make_tuple(true, true, HAS_SSE2)] = sobel<__m128, __m128i, true>; +#if defined(__AVX2__) + func[make_tuple(false, false, HAS_AVX2)] = standard<__m256, __m256i, false>; + func[make_tuple(false, true, HAS_AVX2)] = standard<__m256, __m256i, true>; + func[make_tuple(true, false, HAS_AVX2)] = sobel<__m256, __m256i, false>; + func[make_tuple(true, true, HAS_AVX2)] = sobel<__m256, __m256i, true>; +#endif + + arch_t a = arch == HAS_SSE41 ? HAS_SSE2 : arch; + + return func[make_tuple(use_sobel, calc_dir, a)]; +} + +non_max_suppress_t get_non_max_suppress(arch_t arch) noexcept +{ +#if defined(__AVX2__) + if (arch == HAS_AVX2) { + return non_max_suppress<__m256, __m256i>; + } #endif + return non_max_suppress<__m128, __m128i>; +} + diff --git a/avisynth/src/gaussian_blur.h b/avisynth/src/gaussian_blur.cpp similarity index 87% rename from avisynth/src/gaussian_blur.h rename to avisynth/src/gaussian_blur.cpp index 3504933..83dc361 100644 --- a/avisynth/src/gaussian_blur.h +++ b/avisynth/src/gaussian_blur.cpp @@ -1,5 +1,5 @@ /* - gaussian_blur.h + gaussian_blur.cpp This file is part of TCannyMod @@ -22,10 +22,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. */ -#ifndef GAUSSIAN_BLUR_H -#define GAUSSIAN_BLUR_H #include +#include "tcannymod.h" #include "simd.h" @@ -123,5 +122,15 @@ gaussian_blur(const int radius, const float* kernel, const float* hkernel, } +gaussian_blur_t get_gaussian_blur(arch_t arch) noexcept +{ +#if defined(__AVX2__) + if (arch == HAS_AVX2) { + return gaussian_blur<__m256, GB_MAX_LENGTH, HAS_AVX2>; + } #endif - + if (arch == HAS_SSE41) { + return gaussian_blur<__m128, GB_MAX_LENGTH, HAS_SSE41>; + } + return gaussian_blur<__m128, GB_MAX_LENGTH, HAS_SSE2>; +} diff --git a/avisynth/src/hysteresis.cpp b/avisynth/src/hysteresis.cpp index ca5ae2d..346dc65 100644 --- a/avisynth/src/hysteresis.cpp +++ b/avisynth/src/hysteresis.cpp @@ -27,7 +27,7 @@ #include #include #include -#include "edge_detection.h" +#include "tcannymod.h" struct Pos { diff --git a/avisynth/src/simd.h b/avisynth/src/simd.h index 4f5fc44..8bc3f17 100644 --- a/avisynth/src/simd.h +++ b/avisynth/src/simd.h @@ -33,14 +33,11 @@ #else #include #endif +#include "tcannymod.h" + #define SFINLINE static __forceinline -enum arch_t { - HAS_SSE2, - HAS_SSE41, - HAS_AVX2, -}; /* -----set-------------------------*/ diff --git a/avisynth/src/tcannymod.cpp b/avisynth/src/tcannymod.cpp index c0c36da..5c96402 100644 --- a/avisynth/src/tcannymod.cpp +++ b/avisynth/src/tcannymod.cpp @@ -26,60 +26,10 @@ #include #include #include -#include -#include #include #include "tcannymod.h" -#include "gaussian_blur.h" -#include "edge_detection.h" -#include "write_frame.h" -static edge_detection_t -get_edge_detection(bool use_sobel, bool calc_dir, arch_t arch) noexcept -{ - using std::make_tuple; - std::map, edge_detection_t> func; - - func[make_tuple(false, false, HAS_SSE2)] = standard<__m128, __m128i, false>; - func[make_tuple(false, true, HAS_SSE2)] = standard<__m128, __m128i, true>; - func[make_tuple(true, false, HAS_SSE2)] = sobel<__m128, __m128i, false>; - func[make_tuple(true, true, HAS_SSE2)] = sobel<__m128, __m128i, true>; -#if defined(__AVX2__) - func[make_tuple(false, false, HAS_AVX2)] = standard<__m256, __m256i, false>; - func[make_tuple(false, true, HAS_AVX2)] = standard<__m256, __m256i, true>; - func[make_tuple(true, false, HAS_AVX2)] = sobel<__m256, __m256i, false>; - func[make_tuple(true, true, HAS_AVX2)] = sobel<__m256, __m256i, true>; -#endif - - arch_t a = arch == HAS_SSE41 ? HAS_SSE2 : arch; - - return func[make_tuple(use_sobel, calc_dir, a)]; -} - - - -static write_gradient_mask_t -get_write_gradient_mask(bool scale, arch_t arch) noexcept -{ -#if defined(__AVX2__) - if (arch == HAS_AVX2) { - return scale ? write_gradient_mask<__m256, __m256i, true> - : write_gradient_mask<__m256, __m256i, false>; - } -#endif - return scale ? write_gradient_mask<__m128, __m128i, true> - : write_gradient_mask<__m128, __m128i, false>; - -} - - -static inline void validate(bool cond, const char* msg) -{ - if (cond) - throw std::runtime_error(msg); -} - template static inline T @@ -129,6 +79,13 @@ Buffers::~Buffers() }; +static inline void validate(bool cond, const char* msg) +{ + if (cond) + throw std::runtime_error(msg); +} + + static void __stdcall set_gb_kernel(float sigma, int& radius, float* kernel) { @@ -146,24 +103,8 @@ set_gb_kernel(float sigma, int& radius, float* kernel) } -static arch_t get_arch(int opt, bool is_plus) noexcept -{ - if (opt == 0 || !has_sse41()) { - return HAS_SSE2; - } -#if !defined(__AVX2__) - return HAS_SSE41; -#else - if (opt == 1 || !has_avx2()) { - return HAS_SSE41; - } - return HAS_AVX2; -#endif -} - - TCannyM::TCannyM(PClip ch, int m, float sigma, float tmin, float tmax, int c, - bool sobel, float s, int opt, const char* n, bool is_plus) : + bool sobel, float s, arch_t arch, const char* n, bool is_plus) : GenericVideoFilter(ch), mode(m), gbRadius(0), th_min(tmin), th_max(tmax), chroma(c), name(n), scale(s), isPlus(is_plus), buff(nullptr) { @@ -171,8 +112,6 @@ TCannyM::TCannyM(PClip ch, int m, float sigma, float tmin, float tmax, int c, numPlanes = (vi.IsY8() || chroma == 0) ? 1 : 3; - arch_t arch = get_arch(opt, isPlus); - align = (arch < HAS_AVX2) ? 16 : 32; if (sigma > 0.0f) { @@ -206,34 +145,20 @@ TCannyM::TCannyM(PClip ch, int m, float sigma, float tmin, float tmax, int c, emaskPitch /= sizeof(float); dirPitch /= sizeof(int32_t); - switch (arch) { -#if defined(__AVX2__) - case HAS_AVX2: - gaussianBlur = gaussian_blur<__m256, GB_MAX_LENGTH, HAS_AVX2>; - nonMaximumSuppression = non_max_suppress<__m256, __m256i>; - writeGradientDirection = write_gradient_direction<__m256i>; - writeEdgeDirection = write_edge_direction<__m256i>; - break; -#endif - case HAS_SSE41: - gaussianBlur = gaussian_blur<__m128, GB_MAX_LENGTH, HAS_SSE41>; - nonMaximumSuppression = non_max_suppress<__m128, __m128i>; - writeGradientDirection = write_gradient_direction<__m128i>; - writeEdgeDirection = write_edge_direction<__m128i>; - break; - default: - gaussianBlur = gaussian_blur<__m128, GB_MAX_LENGTH, HAS_SSE2>; - nonMaximumSuppression = non_max_suppress<__m128, __m128i>; - writeGradientDirection = write_gradient_direction<__m128i>; - writeEdgeDirection = write_edge_direction<__m128i>; - } + gaussianBlur = get_gaussian_blur(arch); edgeDetection = get_edge_detection(sobel, (mode != 1 && mode != 4), arch); + nonMaximumSuppression = get_non_max_suppress(arch); + writeBluredFrame = get_write_gradient_mask(false, arch); writeGradientMask = get_write_gradient_mask(scale != 1.0f, arch); + writeGradientDirection = get_write_gradient_direction(arch); + + writeEdgeDirection = get_write_edge_direction(arch); + if (!isPlus) { buff = new Buffers(buffSize, blurSize, emaskSize, dirSize, hystSize, align, false, nullptr); @@ -250,8 +175,6 @@ TCannyM::~TCannyM() } - - PVideoFrame __stdcall TCannyM::GetFrame(int n, ise_t* env) { PVideoFrame src = child->GetFrame(n, env); @@ -261,12 +184,12 @@ PVideoFrame __stdcall TCannyM::GetFrame(int n, ise_t* env) if (isPlus) { b = new Buffers(buffSize, blurSize, emaskSize, dirSize, hystSize, align, true, env); - if (b->orig == nullptr) { + if (!b || !b->orig) { env->ThrowError("%s: failed to allocate buffer.", name); } } - const int planes[] = { PLANAR_Y, PLANAR_U, PLANAR_V }; + static const int planes[] = { PLANAR_Y, PLANAR_U, PLANAR_V }; for (int i = 0; i < numPlanes; i++) { @@ -332,6 +255,21 @@ PVideoFrame __stdcall TCannyM::GetFrame(int n, ise_t* env) } +static arch_t get_arch(int opt, bool is_plus) noexcept +{ + if (opt == 0 || !has_sse41()) { + return HAS_SSE2; + } +#if !defined(__AVX2__) + return HAS_SSE41; +#else + if (opt == 1 || !has_avx2()) { + return HAS_SSE41; + } + return HAS_AVX2; +#endif +} + static float calc_scale(double gmmax) { @@ -364,10 +302,13 @@ create_tcannymod(AVSValue args, void* user_data, ise_t* env) float scale = calc_scale(args[7].AsFloat(255.0)); bool is_plus = user_data != nullptr; + + arch_t arch = get_arch(args[8].AsInt(HAS_AVX2), is_plus); return new TCannyM(args[0].AsClip(), mode, sigma, tmin, tmax, chroma, - args[5].AsBool(false), scale, args[8].AsInt(HAS_AVX2), - "TCannyMod", is_plus); + args[5].AsBool(false), scale, arch, "TCannyMod", + is_plus); + } catch (std::runtime_error& e) { env->ThrowError("TCannyMod: %s", e.what()); } @@ -389,9 +330,12 @@ create_gblur(AVSValue args, void* user_data, ise_t* env) "chroma must be set to 0, 1, 2, 3 or 4."); bool is_plus = user_data != nullptr; + + arch_t arch = get_arch(args[3].AsInt(HAS_AVX2), is_plus); - return new TCannyM(args[0].AsClip(), 4, sigma, 1.0f, 1.0f, chroma, false, - 1.0f, args[3].AsInt(HAS_AVX2), "GBlur", is_plus); + return new TCannyM(args[0].AsClip(), 4, sigma, 1.0f, 1.0f, chroma, + false, 1.0f, arch, "GBlur", is_plus); + } catch (std::runtime_error& e) { env->ThrowError("GBlur: %s", e.what()); } @@ -415,10 +359,12 @@ create_emask(AVSValue args, void* user_data, ise_t* env) float scale = calc_scale(args[2].AsFloat(50.0)); bool is_plus = user_data != nullptr; + + arch_t arch = get_arch(args[3].AsInt(HAS_AVX2), is_plus); return new TCannyM(args[0].AsClip(), 1, sigma, 1.0f, 1.0f, chroma, - args[5].AsBool(false), scale, args[3].AsInt(HAS_AVX2), - "EMask", is_plus); + args[5].AsBool(false), scale, arch, "EMask", is_plus); + } catch (std::runtime_error& e) { env->ThrowError("EMask: %s", e.what()); } @@ -449,6 +395,7 @@ AvisynthPluginInit3(ise_t* env, const AVS_Linkage* const vectors) env->AddFunction("GBlur", "c[sigma]f[chroma]i[opt]i", create_gblur, is_plus); + env->AddFunction("EMask", "c[sigma]f[gmmax]f[chroma]i[sobel]b[opt]i", create_emask, is_plus); diff --git a/avisynth/src/tcannymod.h b/avisynth/src/tcannymod.h index 714b6e3..9dc488b 100644 --- a/avisynth/src/tcannymod.h +++ b/avisynth/src/tcannymod.h @@ -34,48 +34,57 @@ #include #include -#define TCANNY_M_VERSION "1.2.0" +#define TCANNY_M_VERSION "1.3.0" -constexpr size_t GB_MAX_LENGTH = 17; typedef IScriptEnvironment ise_t; -using gaussian_blur_t = void(__stdcall *)( +typedef void(__stdcall *gaussian_blur_t)( const int radius, const float* kernel, const float* hkernel, float* buffp, float* blurp, const size_t blur_pitch, const uint8_t* srcp, const size_t src_pitch, const size_t width, const size_t height); -using edge_detection_t = void(__stdcall *)( +typedef void(__stdcall *edge_detection_t)( float* blurp, const size_t blur_pitch, float* emaskp, const size_t emask_pitch, int32_t* dirp, const size_t dir_pitch, const size_t width, const size_t height); -using non_max_suppress_t = void (__stdcall *)( +typedef void (__stdcall *non_max_suppress_t)( const float* emaskp, const size_t em_pitch, const int32_t* dirp, const size_t dir_pitch, float* blurp, const size_t blr_pitch, const size_t width, const size_t height); -using write_gradient_mask_t = void(__stdcall *)( +typedef void(__stdcall *write_gradient_mask_t)( const float* srcp, uint8_t* dstp, const size_t width, const size_t height, const size_t dst_pitch, const size_t src_pitch, const float scale); -using write_gradient_direction_t = void(__stdcall *)( +typedef void(__stdcall *write_gradient_direction_t)( const int32_t* dirp, uint8_t* dstp, const size_t dir_pitch, const size_t dst_pitch, const size_t width, const size_t height); -using write_edge_direction_t = void (__stdcall *)( +typedef void (__stdcall *write_edge_direction_t)( const int32_t* dirp, const uint8_t* hystp, uint8_t* dstp, const size_t dir_pitch, const size_t hyst_pitch, const size_t dst_pitch, const size_t width, const size_t height); +enum arch_t { + HAS_SSE2, + HAS_SSE41, + HAS_AVX2, +}; + + +constexpr size_t GB_MAX_LENGTH = 17; + + class Buffers { ise_t* env; bool isPlus; @@ -126,12 +135,30 @@ class TCannyM : public GenericVideoFilter { public: TCannyM(PClip child, int mode, float sigma, float th_min, float th_max, - int chroma, bool sobel, float scale, int opt, const char* name, + int chroma, bool sobel, float scale, arch_t arch, const char* name, bool is_plus); ~TCannyM(); PVideoFrame __stdcall GetFrame(int n, ise_t* env); }; + +gaussian_blur_t get_gaussian_blur(arch_t arch) noexcept; + +edge_detection_t get_edge_detection(bool use_sobel, bool calc_dir, arch_t arch) noexcept; + +non_max_suppress_t get_non_max_suppress(arch_t arch) noexcept; + +write_gradient_mask_t get_write_gradient_mask(bool scale, arch_t arch) noexcept; + +write_gradient_direction_t get_write_gradient_direction(arch_t arch) noexcept; + +write_edge_direction_t get_write_edge_direction(arch_t arch) noexcept; + +void __stdcall +hysteresis(uint8_t* hystp, const size_t hpitch, float* blurp, + const size_t bpitch, const int width, const int height, + const float tmin, const float tmax) noexcept; + extern bool has_sse2(); extern bool has_sse41(); extern bool has_avx(); diff --git a/avisynth/src/write_frame.h b/avisynth/src/write_frame.cpp similarity index 78% rename from avisynth/src/write_frame.h rename to avisynth/src/write_frame.cpp index 30f01b1..7c2e32d 100644 --- a/avisynth/src/write_frame.h +++ b/avisynth/src/write_frame.cpp @@ -1,5 +1,5 @@ /* - write_frame.h + write_frame.cpp This file is part of TCannyMod @@ -23,10 +23,9 @@ */ -#ifndef WRITE_FRAME_H -#define WRITE_FRAME_H #include +#include "tcannymod.h" #include "simd.h" @@ -92,7 +91,7 @@ write_gradient_direction(const int32_t* dirp, uint8_t* dstp, template -void __stdcall +static void __stdcall write_edge_direction(const int32_t* dirp, const uint8_t* hystp, uint8_t* dstp, const size_t dir_pitch, const size_t hyst_pitch, const size_t dst_pitch, const size_t width, @@ -119,5 +118,37 @@ write_edge_direction(const int32_t* dirp, const uint8_t* hystp, uint8_t* dstp, } +write_gradient_mask_t get_write_gradient_mask(bool scale, arch_t arch) noexcept +{ +#if defined(__AVX2__) + if (arch == HAS_AVX2) { + return scale ? write_gradient_mask<__m256, __m256i, true> + : write_gradient_mask<__m256, __m256i, false>; + } +#endif + return scale ? write_gradient_mask<__m128, __m128i, true> + : write_gradient_mask<__m128, __m128i, false>; + +} + + +write_gradient_direction_t get_write_gradient_direction(arch_t arch) noexcept +{ +#if defined(__AVX2__) + if (arch == HAS_AVX2) { + return write_gradient_direction<__m256i>; + } +#endif + return write_gradient_direction<__m128i>; +} + +write_edge_direction_t get_write_edge_direction(arch_t arch) noexcept +{ +#if defined(__AVX2__) + if (arch == HAS_AVX2) { + return write_edge_direction<__m256i>; + } #endif + return write_edge_direction<__m128i>; +}