Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add resize output option #56

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ Other recommended projects:<br>

## :hourglass_flowing_sand: TODO List

- [ ] Support further cheap arbitrary resize (*e.g.*, bicubic, bilinear) for the model outputs
- [ ] Bug: Some PCs will output black images
- [ ] Add the guidance for ncnn model conversion
- [ ] Support face restoration - GFPGAN
Expand All @@ -63,6 +62,7 @@ Usage: realesrgan-ncnn-vulkan.exe -i infile -o outfile [options]...
-i input-path input image path (jpg/png/webp) or directory"
-o output-path output image path (jpg/png/webp) or directory"
-s scale upscale ratio (can be 2, 3, 4. default=4)"
-r resize resize output to dimension (default=WxH:default), use '-r help' for more details
-t tile-size tile size (>=32/0=auto, default=0) can be 0,0,0 for multi-gpu"
-m model-path folder path to the pre-trained models. default=models"
-n model-name model name (default=realesr-animevideov3, can be realesr-animevideov3 | realesrgan-x4plus | realesrgan-x4plus-anime | realesrnet-x4plus)"
Expand All @@ -78,6 +78,16 @@ Usage: realesrgan-ncnn-vulkan.exe -i infile -o outfile [options]...
- `tile-size` = tile size, use smaller value to reduce GPU memory usage, default selects automatically
- `load:proc:save` = thread count for the three stages (image decoding + model upscaling + image encoding), using larger values may increase GPU usage and consume more GPU memory. You can tune this configuration with "4:4:4" for many small-size images, and "2:2:2" for large-size images. The default setting usually works fine for most situations. If you find that your GPU is hungry, try increasing thread count to achieve faster processing.
- `format` = the format of the image to be output, png is better supported, however webp generally yields smaller file sizes, both are losslessly encoded
- `resize` = the forced output dimensions. \
For example '1920x1080' or '1920x1080:default' will force all output images to be resized to 1920x1080 with the default filter if they aren't already. \
Avaliable filters:
- default - Automatically decide
- box - A trapezoid w/1-pixel wide ramps, same result as box for integer scale ratios
- triangle - On upsampling, produces same results as bilinear texture filtering
- cubicbspline - The cubic b-spline (aka Mitchell-Netrevalli with B=1,C=0), gaussian-esque
- catmullrom - An interpolating cubic spline
- mitchell - Mitchell-Netrevalli filter with B=1/3, C=1/3
- pointsample - Simple point sampling

If you encounter crash or error, try to upgrade your GPU driver

Expand Down
164 changes: 162 additions & 2 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@ namespace fs = std::filesystem;
#include "stb_image_write.h"
#endif // _WIN32
#include "webp_image.h"
#define STB_IMAGE_RESIZE2_IMPLEMENTATION
#include "stb_image_resize2.h"

static const char* resizemodes[] = {
"default", // STBIR_FILTER_DEFAULT
"box", // STBIR_FILTER_BOX
"triangle", // STBIR_FILTER_TRIANGLE
"cubicbspline", // STBIR_FILTER_CUBICBSPLINE
"catmullrom", // STBIR_FILTER_CATMULLROM
"mitchell", // STBIR_FILTER_MITCHELL
"pointsample" // STBIR_FILTER_POINT_SAMPLE
};

#if _WIN32
#include <wchar.h>
Expand Down Expand Up @@ -72,6 +84,51 @@ static std::vector<int> parse_optarg_int_array(const wchar_t* optarg)

return array;
}

static bool ascii_string_equals(const wchar_t* wide, const char* narrow)
{
size_t widelen = wcslen(wide);
size_t narrowlen = strlen(narrow);

if (widelen != narrowlen)
return false;

for (size_t i = 0; i < widelen; i++)
{
if (wide[i] != narrow[i])
return false;
}

return true;
}

static bool parse_optarg_resize(const wchar_t* optarg, int* width, int* height, int* mode)
{
*mode = 0; // default

const wchar_t* colon = wcschr(optarg, L':');
if (colon)
{
bool found = false;
const wchar_t* modestr = colon + 1;
for (int i = 0; i < (int)(sizeof(resizemodes) / sizeof(resizemodes[0])); i++)
{
if (ascii_string_equals(modestr, resizemodes[i]))
{
*mode = i;
found = true;
break;
}
}
if (!found)
{
fwprintf(stderr, L"invalid resize mode '%s'\n", modestr);
return false;
}
}

return swscanf(optarg, L"%dx%d", width, height) == 2;
}
#else // _WIN32
#include <unistd.h> // getopt()

Expand All @@ -90,6 +147,34 @@ static std::vector<int> parse_optarg_int_array(const char* optarg)

return array;
}

static bool parse_optarg_resize(const char* optarg, int* width, int* height, int* mode)
{
*mode = 0; // default

const char* colon = strchr(optarg, ':');
if (colon)
{
bool found = false;
const char* modestr = colon + 1;
for (int i = 0; i < (int)(sizeof(resizemodes) / sizeof(resizemodes[0])); i++)
{
if (strcmp(modestr, resizemodes[i]) == 0)
{
*mode = i;
found = true;
break;
}
}
if (!found)
{
fprintf(stderr, "invalid resize mode '%s'\n", modestr);
return false;
}
}

return sscanf(optarg, "%dx%d", width, height) == 2;
}
#endif // _WIN32

// ncnn
Expand All @@ -108,6 +193,7 @@ static void print_usage()
fprintf(stderr, " -i input-path input image path (jpg/png/webp) or directory\n");
fprintf(stderr, " -o output-path output image path (jpg/png/webp) or directory\n");
fprintf(stderr, " -s scale upscale ratio (can be 2, 3, 4. default=4)\n");
fprintf(stderr, " -r resize resize output to dimension (default=WxH:default), use '-r help' for more details\n");
fprintf(stderr, " -t tile-size tile size (>=32/0=auto, default=0) can be 0,0,0 for multi-gpu\n");
fprintf(stderr, " -m model-path folder path to the pre-trained models. default=models\n");
fprintf(stderr, " -n model-name model name (default=realesr-animevideov3, can be realesr-animevideov3 | realesrgan-x4plus | realesrgan-x4plus-anime | realesrnet-x4plus)\n");
Expand All @@ -118,6 +204,23 @@ static void print_usage()
fprintf(stderr, " -v verbose output\n");
}

static void print_resize_usage()
{
printf("'-r widthxheight:filter' argument usage:\n\n");

printf("For example '-r 1920x1080' or '-r 1920x1080:default' will force all output images to be\n");
printf("resized to 1920x1080 with the default filter if they aren't already.\n\n");

printf("Avaliable filters:\n");
printf(" default - Automatically decide\n");
printf(" box - A trapezoid w/1-pixel wide ramps, same result as box for integer scale ratios\n");
printf(" triangle - On upsampling, produces same results as bilinear texture filtering\n");
printf(" cubicbspline - The cubic b-spline (aka Mitchell-Netrevalli with B=1,C=0), gaussian-esque\n");
printf(" catmullrom - An interpolating cubic spline\n");
printf(" mitchell - Mitchell-Netrevalli filter with B=1/3, C=1/3\n");
printf(" pointsample - Simple point sampling\n");
}

class Task
{
public:
Expand Down Expand Up @@ -337,9 +440,31 @@ void* proc(void* args)
class SaveThreadParams
{
public:
int resizeWidth;
int resizeHeight;
int resizeMode;
bool resizeProvided;
int verbose;
};

void resize_output_image(Task& v, const SaveThreadParams* stp)
{
const int resizeWidth = stp->resizeWidth;
const int resizeHeight = stp->resizeHeight;
const int resizeMode = stp->resizeMode;
const bool resizeProvided = stp->resizeProvided;

if (!resizeProvided || (v.outimage.w == resizeWidth && v.outimage.h == resizeHeight))
return;

int c = v.outimage.elempack;
ncnn::Mat resizedmat(resizeWidth, resizeHeight, (size_t)c, c);

stbir_resize(v.outimage.data, v.outimage.w, v.outimage.h, 0, resizedmat.data, resizeWidth, resizeHeight, 0, (stbir_pixel_layout)c, STBIR_TYPE_UINT8, STBIR_EDGE_CLAMP, (stbir_filter)resizeMode);

v.outimage = std::move(resizedmat);
}

void* save(void* args)
{
const SaveThreadParams* stp = (const SaveThreadParams*)args;
Expand Down Expand Up @@ -383,6 +508,8 @@ void* save(void* args)
fs::create_directories(parent_path);
}

resize_output_image(v, stp);

if (ext == PATHSTR("webp") || ext == PATHSTR("WEBP"))
{
success = webp_save(v.outpath.c_str(), v.outimage.w, v.outimage.h, v.outimage.elempack, (const unsigned char*)v.outimage.data);
Expand Down Expand Up @@ -437,6 +564,10 @@ int main(int argc, char** argv)
path_t inputpath;
path_t outputpath;
int scale = 4;
int resizeWidth;
int resizeHeight;
int resizeMode;
bool resizeProvided = false;
std::vector<int> tilesize;
path_t model = PATHSTR("models");
path_t modelname = PATHSTR("realesr-animevideov3");
Expand All @@ -451,7 +582,7 @@ int main(int argc, char** argv)
#if _WIN32
setlocale(LC_ALL, "");
wchar_t opt;
while ((opt = getopt(argc, argv, L"i:o:s:t:m:n:g:j:f:vxh")) != (wchar_t)-1)
while ((opt = getopt(argc, argv, L"i:o:s:r:t:m:n:g:j:f:vxh")) != (wchar_t)-1)
{
switch (opt)
{
Expand All @@ -464,6 +595,19 @@ int main(int argc, char** argv)
case L's':
scale = _wtoi(optarg);
break;
case L'r':
if (wcscmp(optarg, L"help") == 0)
{
print_resize_usage();
return -1;
}
if (!parse_optarg_resize(optarg, &resizeWidth, &resizeHeight, &resizeMode))
{
fwprintf(stderr, L"invalid resize argument\n");
return -1;
}
resizeProvided = true;
break;
case L't':
tilesize = parse_optarg_int_array(optarg);
break;
Expand Down Expand Up @@ -497,7 +641,7 @@ int main(int argc, char** argv)
}
#else // _WIN32
int opt;
while ((opt = getopt(argc, argv, "i:o:s:t:m:n:g:j:f:vxh")) != -1)
while ((opt = getopt(argc, argv, "i:o:s:r:t:m:n:g:j:f:vxh")) != -1)
{
switch (opt)
{
Expand All @@ -510,6 +654,18 @@ int main(int argc, char** argv)
case 's':
scale = atoi(optarg);
break;
case 'r':
if (strcmp(optarg, "help") == 0)
{
print_resize_usage();
return -1;
}
if (!parse_optarg_resize(optarg, &resizeWidth, &resizeHeight, &resizeMode))
{
fprintf(stderr, "invalid resize argument\n");
return -1;
}
break;
case 't':
tilesize = parse_optarg_int_array(optarg);
break;
Expand Down Expand Up @@ -840,6 +996,10 @@ int main(int argc, char** argv)

// save image
SaveThreadParams stp;
stp.resizeWidth = resizeWidth;
stp.resizeHeight = resizeHeight;
stp.resizeMode = resizeMode;
stp.resizeProvided = resizeProvided;
stp.verbose = verbose;

std::vector<ncnn::Thread*> save_threads(jobs_save);
Expand Down
Loading