ffmpeg_phqm.diff

@@ -3487,6 +3496,8 @@ nlmeans_opencl_filter_deps="opencl"
 nnedi_filter_deps="gpl"
 ocr_filter_deps="libtesseract"
 ocv_filter_deps="libopencv"
+phqm_filter_deps="libopencv"
+phqm_filter_extralibs="-lstdc++ -lopencv_img_hash"
 openclsrc_filter_deps="opencl"
 overlay_opencl_filter_deps="opencl"
 overlay_qsv_filter_deps="libmfx"
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index dc4b4b100d..5527ffab67 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -195,24 +195,76 @@ static void reconfig_encoder(AVCodecContext *ctx, const AVFrame *frame)
         x264_encoder_reconfig(x4->enc, &x4->params);
     }
 
-    if (x4->params.rc.i_vbv_buffer_size != ctx->rc_buffer_size / 1000 ||
-        x4->params.rc.i_vbv_max_bitrate != ctx->rc_max_rate    / 1000) {
-        x4->params.rc.i_vbv_buffer_size = ctx->rc_buffer_size / 1000;
-        x4->params.rc.i_vbv_max_bitrate = ctx->rc_max_rate    / 1000;
-        x264_encoder_reconfig(x4->enc, &x4->params);
+    if (frame->perceptual_score == -1) {
+        if (x4->params.rc.i_vbv_buffer_size != ctx->rc_buffer_size / 1000 ||
+            x4->params.rc.i_vbv_max_bitrate != ctx->rc_max_rate    / 1000) {
+            x4->params.rc.i_vbv_buffer_size = ctx->rc_buffer_size / 1000;
+            x4->params.rc.i_vbv_max_bitrate = ctx->rc_max_rate    / 1000;
+            x264_encoder_reconfig(x4->enc, &x4->params);
+        }
     }
 
     if (x4->params.rc.i_rc_method == X264_RC_ABR &&
-        x4->params.rc.i_bitrate != ctx->bit_rate / 1000) {
-        x4->params.rc.i_bitrate = ctx->bit_rate / 1000;
-        x264_encoder_reconfig(x4->enc, &x4->params);
+        (frame->perceptual_score > -1 ||
+         x4->params.rc.i_bitrate != ctx->bit_rate / 1000)) {
+        if (frame->perceptual_score > -1) {
+            int bitrate = 0;
+            /* set ABR bitrate value from perceptual score */
+            /* decrease compression by raising the avg bitrate up to N times */
+            bitrate = (ctx->bit_rate / 1000) + ((frame->perceptual_score * frame->perceptual_score_factor) * (ctx->bit_rate / 1000.0));
+            x4->params.rc.i_bitrate = bitrate;
+            x4->params.rc.i_vbv_max_bitrate = bitrate * 1.5;
+            x4->params.rc.i_vbv_buffer_size = bitrate * 1.5 * 1.5;
+            av_log(ctx, AV_LOG_DEBUG,
+               "Perceptual: [%0.2f] bitrate %d maxbitrate %d from %"PRIu64"\n",
+               frame->perceptual_score,
+               x4->params.rc.i_bitrate,
+               x4->params.rc.i_vbv_max_bitrate,
+               ctx->bit_rate / 1000);
+
+            /* tag this frame with this specific config */
+            x4->pic.param = &x4->params;
+            x264_encoder_reconfig(x4->enc, &x4->params);
+        } else {
+            x4->params.rc.i_bitrate = ctx->bit_rate / 1000;
+            x264_encoder_reconfig(x4->enc, &x4->params);
+        }
     }
 
     if (x4->crf >= 0 &&
         x4->params.rc.i_rc_method == X264_RC_CRF &&
-        x4->params.rc.f_rf_constant != x4->crf) {
-        x4->params.rc.f_rf_constant = x4->crf;
-        x264_encoder_reconfig(x4->enc, &x4->params);
+        (frame->perceptual_score > -1 ||
+         x4->params.rc.f_rf_constant != x4->crf)) {
+        if (frame->perceptual_score > -1) {
+            float crf_value = 0.0;
+
+            /* set crf value from perceptual score */
+            /* decrease compression by lowering the score by up to N CRF points */
+            crf_value = x4->crf - ((frame->perceptual_score * 100.0) / (frame->perceptual_score_factor * 2.0));
+            x4->params.rc.f_rf_constant = crf_value;
+
+            if (ctx->rc_max_rate) {
+                int bitrate = 0;
+                /* set ABR bitrate value from perceptual score */
+                /* decrease compression by raising the avg bitrate up to N times */
+                bitrate = (ctx->rc_max_rate / 1000) + ((frame->perceptual_score * frame->perceptual_score_factor) * (ctx->rc_max_rate / 1000.0));
+                x4->params.rc.i_vbv_max_bitrate = bitrate;
+                x4->params.rc.i_vbv_buffer_size = bitrate * 1.5 * 1.5;
+            }
+            av_log(ctx, AV_LOG_DEBUG,
+               "Perceptual: [%0.2f] crf: %0.2f bitrate %d maxbitrate %d from %"PRIu64"\n",
+               frame->perceptual_score,
+               x4->params.rc.f_rf_constant,
+               x4->params.rc.i_bitrate,
+               x4->params.rc.i_vbv_max_bitrate,
+               ctx->rc_max_rate / 1000);
+
+            /* tag this frame with this specific config */
+            x4->pic.param = &x4->params;
+        } else {
+            x4->params.rc.f_rf_constant = x4->crf;
+            x264_encoder_reconfig(x4->enc, &x4->params);
+        }
     }
 
     if (x4->params.rc.i_rc_method == X264_RC_CQP &&
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 455c809b15..c819ad672a 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -319,6 +319,7 @@ OBJS-$(CONFIG_PALETTEUSE_FILTER)             += vf_paletteuse.o framesync.o
 OBJS-$(CONFIG_PERMS_FILTER)                  += f_perms.o
 OBJS-$(CONFIG_PERSPECTIVE_FILTER)            += vf_perspective.o
 OBJS-$(CONFIG_PHASE_FILTER)                  += vf_phase.o
+OBJS-$(CONFIG_PHQM_FILTER)                   += vf_phqm.o img_hash.o
 OBJS-$(CONFIG_PIXDESCTEST_FILTER)            += vf_pixdesctest.o
 OBJS-$(CONFIG_PIXSCOPE_FILTER)               += vf_datascope.o
 OBJS-$(CONFIG_PP_FILTER)                     += vf_pp.o
@@ -489,6 +490,7 @@ OBJS-$(CONFIG_SHARED)                        += log2_tab.o
 SKIPHEADERS-$(CONFIG_QSVVPP)                 += qsvvpp.h
 SKIPHEADERS-$(CONFIG_OPENCL)                 += opencl.h
 SKIPHEADERS-$(CONFIG_VAAPI)                  += vaapi_vpp.h
+SKIPHEADERS-$(CONFIG_LIBOPENCV)              += img_hash.h
 
 TOOLS     = graph2dot
 TESTPROGS = drawutils filtfmts formats integral
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 04a3df7d56..378c553d66 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -307,6 +307,7 @@ extern AVFilter ff_vf_pixdesctest;
 extern AVFilter ff_vf_pixscope;
 extern AVFilter ff_vf_pp;
 extern AVFilter ff_vf_pp7;
+extern AVFilter ff_vf_phqm;
 extern AVFilter ff_vf_premultiply;
 extern AVFilter ff_vf_prewitt;
 extern AVFilter ff_vf_prewitt_opencl;
diff --git a/libavfilter/img_hash.cpp b/libavfilter/img_hash.cpp
new file mode 100644
index 0000000000..4d5843da22
--- /dev/null
+++ b/libavfilter/img_hash.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2019 Christopher Kennedy
+ *
+ * OpenCV img_hash
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <opencv2/core.hpp>
+#include <opencv2/core/ocl.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/img_hash.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include <iostream>
+
+#include "img_hash.h"
+#include "libavutil/pixdesc.h"
+extern "C" {
+#include "avfilter.h"
+}
+
+// convert from avframe to iplimage format
+static int fill_iplimage_from_frame(IplImage *img, const AVFrame *frame, enum AVPixelFormat pixfmt)
+{
+    IplImage *tmpimg;
+    int depth = IPL_DEPTH_8U, channels_nb;
+
+    switch (pixfmt) {
+        case AV_PIX_FMT_GRAY8:      channels_nb = 1; break;
+        case AV_PIX_FMT_BGRA:       channels_nb = 4; break;
+        case AV_PIX_FMT_BGR24:      channels_nb = 3; break;
+        default: return -1;
+    }
+
+    tmpimg = cvCreateImageHeader((CvSize){frame->width, frame->height}, depth, channels_nb);
+    *img = *tmpimg;
+    img->imageData = img->imageDataOrigin = (char *) frame->data[0];
+    img->dataOrder = IPL_DATA_ORDER_PIXEL;
+    img->origin    = IPL_ORIGIN_TL;
+    img->widthStep = frame->linesize[0];
+
+    return 0;
+}
+
+// Get the score of two Video Frames by comparing the perceptual hashes and deriving a hamming distance
+// showing how similar they are or different. lower score is better for most algorithms
+extern "C" double getScore(const AVFrame *frame1, const AVFrame *frame2, enum AVPixelFormat pixfmt, int hash_type) {
+    cv::Ptr<cv::img_hash::ImgHashBase> algo;
+    IplImage ipl1, ipl2;
+    cv::Mat h1;
+    cv::Mat h2;
+    cv::Mat m1;
+    cv::Mat m2;
+
+    // Take FFmpeg video frame and convert into an IplImage for OpenCV
+    if (fill_iplimage_from_frame(&ipl1, frame1, pixfmt) != 0 ||
+        fill_iplimage_from_frame(&ipl2, frame2, pixfmt) != 0)
+        return DBL_MAX; // Return an invalid value if either fails
+
+    // Convert an IplImage to an Mat Image for OpenCV (newer format)
+    m1 = cv::cvarrToMat(&ipl1);
+    m2 = cv::cvarrToMat(&ipl2);
+
+    // substantiate the hash type algorithm
+    switch (hash_type) {
+        case PHASH:             algo = cv::img_hash::PHash::create();               break;
+        case AVERAGE:           algo = cv::img_hash::AverageHash::create();         break;
+        case MARRHILDRETH:      algo = cv::img_hash::MarrHildrethHash::create();    break;
+        case RADIALVARIANCE:    algo = cv::img_hash::RadialVarianceHash::create();  break;
+        // BlockMeanHash support mode 0 and mode 1, they associate to
+        // mode 1 and mode 2 of PHash library
+        case BLOCKMEAN1:        algo = cv::img_hash::BlockMeanHash::create(0);      break;
+        case BLOCKMEAN2:        algo = cv::img_hash::BlockMeanHash::create(1);      break;
+        case COLORMOMENT:       algo = cv::img_hash::ColorMomentHash::create();     break;
+    }
+
+    // Compute the hash
+    algo->compute(m1, h1);
+    algo->compute(m2, h2);
+
+    // Compare the hashes and return the hamming distance
+    return algo->compare(h1, h2);
+}
diff --git a/libavfilter/img_hash.h b/libavfilter/img_hash.h
new file mode 100644
index 0000000000..76f55c3013
--- /dev/null
+++ b/libavfilter/img_hash.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Christopher Kennedy
+ *
+ * PHQM Perceptual Hash Quality Metric
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_IMG_HASH_H
+#define AVFILTER_IMG_HASH_H
+
+#include "avfilter.h"
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#define AVERAGE 0
+#define BLOCKMEAN1 1
+#define BLOCKMEAN2 2
+#define COLORMOMENT 3
+#define MARRHILDRETH 4
+#define PHASH 5
+#define RADIALVARIANCE 6
+
+double getScore(const AVFrame *frame1, const AVFrame *frame2, enum AVPixelFormat pixfmt, int hash_type);
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/libavfilter/vf_phqm.c b/libavfilter/vf_phqm.c
new file mode 100644
index 0000000000..a7b73cf061
--- /dev/null
+++ b/libavfilter/vf_phqm.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright (c) 2019 Christopher Kennedy
+ *
+ * PHQM Perceptual Hash Quality Metric
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * PHQM: Calculate the Image Hash Hamming Difference between two input videos.
+ */
+
+#include <float.h>
+#include "libavutil/avstring.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "drawutils.h"
+#include "formats.h"
+#include "framesync.h"
+#include "internal.h"
+#include "video.h"
+
+#include "img_hash.h"
+#include "scene_sad.h"
+
+typedef struct PHQMContext {
+    const AVClass *class;
+    FFFrameSync fs;
+    double shd, hd, min_hd, max_hd, smin_hd, smax_hd;
+    double hft, sft, phd, psad;
+    uint64_t nb_shd;
+    uint64_t nb_frames;
+    FILE *stats_file;
+    char *stats_file_str;
+    int hash_type;
+    ff_scene_sad_fn sad_ref;            ///< Sum of the absolute difference function (scene detect only)
+    ff_scene_sad_fn sad_enc;            ///< Sum of the absolute difference function (scene detect only)
+    double prev_mafd_ref;               ///< previous MAFD                           (scene detect only)
+    double prev_mafd_enc;               ///< previous MAFD                           (scene detect only)
+    AVFrame *prev_pic_ref;           ///< ref previous frame                          (scene detect only)
+    AVFrame *prev_pic_enc;           ///< enc previous frame                          (scene detect only)
+    double scd_thresh;
+    double scene_score_ref;
+    double scene_score_enc;
+    double prev_hamm_ref;
+    double prev_hamm_enc;
+} PHQMContext;
+
+#define OFFSET(x) offsetof(PHQMContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption phqm_options[] = {
+    { "stats_file", "Set file where to store per-frame difference information.", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { "f",          "Set file where to store per-frame difference information.", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { "scd_thresh", "Scene Change Detection Threshold.",                         OFFSET(scd_thresh),     AV_OPT_TYPE_DOUBLE, {.dbl=0.5},  0, 1, FLAGS },
+    { "hash_type",  "Type of Image Hash to use from OpenCV.",                    OFFSET(hash_type),      AV_OPT_TYPE_INT,    {.i64 = PHASH}, 0, 6, FLAGS, "hash_type" },
+    {     "average",        "Average Hash",             0, AV_OPT_TYPE_CONST, {.i64 = AVERAGE},        0, 0, FLAGS, "hash_type" },
+    {     "blockmean1",     "Block Mean Hash 1",        0, AV_OPT_TYPE_CONST, {.i64 = BLOCKMEAN1},     0, 0, FLAGS, "hash_type" },
+    {     "blockmean2",     "Block Mean Hash 2",        0, AV_OPT_TYPE_CONST, {.i64 = BLOCKMEAN2},     0, 0, FLAGS, "hash_type" },
+    {     "colormoment",    "Color Moment Hash",        0, AV_OPT_TYPE_CONST, {.i64 = COLORMOMENT},    0, 0, FLAGS, "hash_type" },
+    {     "marrhildreth",   "Marr Hildreth Hash",       0, AV_OPT_TYPE_CONST, {.i64 = MARRHILDRETH},   0, 0, FLAGS, "hash_type" },
+    {     "phash",          "Perceptual Hash (PHash)",  0, AV_OPT_TYPE_CONST, {.i64 = PHASH},          0, 0, FLAGS, "hash_type" },
+    {     "radialvariance", "Radial Variance Hash",     0, AV_OPT_TYPE_CONST, {.i64 = RADIALVARIANCE}, 0, 0, FLAGS, "hash_type" },
+    { NULL }
+};
+
+FRAMESYNC_DEFINE_CLASS(phqm, PHQMContext, fs);
+
+static void set_meta(AVDictionary **metadata, const char *key, char comp, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    if (comp) {
+        char key2[128];
+        snprintf(key2, sizeof(key2), "%s%c", key, comp);
+        av_dict_set(metadata, key2, value, 0);
+    } else {
+        av_dict_set(metadata, key, value, 0);
+    }
+}
+
+static void get_scene_score(AVFilterContext *ctx, AVFrame *ref, AVFrame *enc)
+{
+    PHQMContext *s = ctx->priv;
+    AVFrame *prev_pic_ref = s->prev_pic_ref;
+    AVFrame *prev_pic_enc = s->prev_pic_enc;
+
+    /* reference */
+    if (prev_pic_ref &&
+        ref->height == prev_pic_ref->height &&
+        ref->width  == prev_pic_ref->width) {
+        uint64_t sad;
+        double mafd, diff;
+
+        /* scene change sad score */
+        s->sad_ref(prev_pic_ref->data[0], prev_pic_ref->linesize[0], ref->data[0], ref->linesize[0], ref->width * 3, ref->height, &sad);
+        emms_c();
+        mafd = (double)sad / (ref->width * 3 * ref->height);
+        diff = fabs(mafd - s->prev_mafd_ref);
+        s->scene_score_ref  = av_clipf(FFMIN(mafd, diff) / 100., 0, 1);
+        s->prev_mafd_ref = mafd;
+
+        /* get prev/current frame hamming difference */
+        s->prev_hamm_ref = getScore(s->prev_pic_ref, ref, ref->format, s->hash_type);
+
+        av_frame_free(&prev_pic_ref);
+    }
+    s->prev_pic_ref = av_frame_clone(ref);
+
+    if (prev_pic_enc &&
+        enc->height == prev_pic_enc->height &&
+        enc->width  == prev_pic_enc->width) {
+        uint64_t sad;
+        double mafd, diff;
+
+        /* scene change sad score */
+        s->sad_enc(prev_pic_enc->data[0], prev_pic_enc->linesize[0], enc->data[0], enc->linesize[0], enc->width * 3, enc->height, &sad);
+        emms_c();
+        mafd = (double)sad / (enc->width * 3 * enc->height);
+        diff = fabs(mafd - s->prev_mafd_enc);
+        s->scene_score_enc  = av_clipf(FFMIN(mafd, diff) / 100., 0, 1);
+        s->prev_mafd_enc = mafd;
+
+        /* get prev/current frame hamming difference */
+        s->prev_hamm_enc = getScore(s->prev_pic_enc, enc, enc->format, s->hash_type);
+
+        av_frame_free(&prev_pic_enc);
+    }
+    s->prev_pic_enc = av_frame_clone(enc);
+}
+
+static int do_phqm(FFFrameSync *fs)
+{
+    AVFilterContext *ctx = fs->parent;
+    PHQMContext *s = ctx->priv;
+    AVFrame *master, *ref;
+    double hd = 0.;
+    int ret;
+    double hd_limit = 1000000.;
+    AVDictionary **metadata;
+
+    ret = ff_framesync_dualinput_get(fs, &master, &ref);
+    if (ret < 0)
+        return ret;
+    if (!ref)
+        return ff_filter_frame(ctx->outputs[0], master);
+    metadata = &master->metadata;
+
+    s->nb_frames++;
+
+    /* scene change detection score */
+    get_scene_score(ctx, ref, master);
+    if (s->scene_score_ref >= s->scd_thresh && s->nb_shd >= 48) {
+        av_log(s, AV_LOG_WARNING, "ImgHashScene: n:%"PRId64"-%"PRId64" hd_avg:%0.3lf hd_min:%0.3lf hd_max:%0.3lf scd:%0.2lf hft:%0.3lf sft:%0.3lf\n",
+               (s->nb_frames - s->nb_shd), s->nb_frames - 1, (s->shd / s->nb_shd), s->smin_hd, s->smax_hd, s->scene_score_ref, (s->hft / s->nb_shd), (s->sft / s->nb_shd));
+        s->shd = 0.;
+        s->sft = 0.;
+        s->hft = 0.;
+        s->nb_shd = 0;
+        s->smin_hd = 0.;
+        s->smax_hd = 0.;
+    }
+
+    /* frame perceptual score, normalize to percentage, read by x264 for crf/vbr */
+    master->perceptual_score           = ref->perceptual_score        = .01 * FFMIN((s->prev_hamm_ref * 2.0), 100);
+    master->perceptual_score_factor    = ref->perceptual_score_factor = 2.0;
+    set_meta(metadata, "lavfi.phqm.hamm", 0, s->prev_hamm_ref);
+
+    /* limit the highest value so we cut off at perceptual difference match */
+    switch (s->hash_type) {
+        case PHASH:
+        case AVERAGE:           hd_limit = 5;   break;
+        case MARRHILDRETH:      hd_limit = 30;  break;
+        case RADIALVARIANCE:    hd_limit = 0.9; break;
+        case BLOCKMEAN1:        hd_limit = 12;  break;
+        case BLOCKMEAN2:        hd_limit = 48;  break;
+        case COLORMOMENT:       hd_limit = 8;   break;
+    }
+
+    /* get ref / enc perceptual hashes and calc hamming distance difference value */
+    hd = getScore(ref, master, ref->format, s->hash_type);
+    if (hd == DBL_MAX) {
+        av_log(s, AV_LOG_ERROR, "Failure with handling pix_fmt of AVFrame for conversion to IPLimage.\n");
+        return AVERROR(EINVAL);
+    }
+    s->hd += FFMIN(hd, hd_limit);
+    s->phd += FFMIN(s->prev_hamm_ref, hd_limit);
+    s->psad += FFMIN(s->scene_score_ref, hd_limit);
+    set_meta(metadata, "lavfi.phqm.phqm", 0, hd);
+
+    /* scene hamming distance avg */
+    s->shd += FFMIN(hd, hd_limit);
+    s->hft += s->prev_hamm_ref;
+    s->sft += s->scene_score_ref;
+    s->nb_shd++;
+    av_log(s, AV_LOG_DEBUG, "ImgHashFrame: hd:%0.3lf sad:%0.2lf hamm:%0.3lf\n", hd, s->scene_score_ref, s->prev_hamm_ref);
+
+    s->min_hd = FFMIN(s->min_hd, hd);
+    s->max_hd = FFMAX(s->max_hd, hd);
+    s->smin_hd = FFMIN(s->smin_hd, hd);
+    s->smax_hd = FFMAX(s->smax_hd, hd);
+
+    if (s->stats_file) {
+        fprintf(s->stats_file,
+                "n:%"PRId64" phqm:%0.3f phqm_min:%0.3f phqm_max:%0.3f sad:%0.2f ref_hamm:%0.2f enc_hamm:%0.2f",
+                s->nb_frames, hd, s->min_hd, s->max_hd, s->scene_score_ref, s->prev_hamm_ref, s->prev_hamm_enc);
+        fprintf(s->stats_file, "\n");
+    }
+
+    return ff_filter_frame(ctx->outputs[0], master);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    PHQMContext *s = ctx->priv;
+
+    if (s->stats_file_str) {
+        if (!strcmp(s->stats_file_str, "-")) {
+            s->stats_file = stdout;
+        } else {
+            s->stats_file = fopen(s->stats_file_str, "w");
+            if (!s->stats_file) {
+                int err = AVERROR(errno);
+                char buf[128];
+                av_strerror(err, buf, sizeof(buf));
+                av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
+                       s->stats_file_str, buf);
+                return err;
+            }
+        }
+    }
+
+    s->sad_ref = ff_scene_sad_get_fn(8);
+    if (!s->sad_ref)
+        return AVERROR(EINVAL);
+    s->sad_enc = ff_scene_sad_get_fn(8);
+    if (!s->sad_enc)
+        return AVERROR(EINVAL);
+
+    s->fs.on_event = do_phqm;
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    PHQMContext *s = ctx->priv;
+    AVFilterFormats *fmts_list = NULL;
+    static const enum AVPixelFormat gray8_pix_fmts[] = {
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_NONE
+    };
+    static const enum AVPixelFormat bgr24_pix_fmts[] = {
+        AV_PIX_FMT_BGR24,
+        AV_PIX_FMT_NONE
+    };
+    static const enum AVPixelFormat bgra_pix_fmts[] = {
+        AV_PIX_FMT_BGRA,
+        AV_PIX_FMT_NONE
+    };
+
+    switch (s->hash_type) {
+        case COLORMOMENT: fmts_list = ff_make_format_list(bgr24_pix_fmts); break;
+        case MARRHILDRETH: fmts_list = ff_make_format_list(bgra_pix_fmts); break;
+        /* all other hashes take the gray8 format */
+        default: fmts_list = ff_make_format_list(gray8_pix_fmts); break;
+    }
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int config_input_ref(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx  = inlink->dst;
+
+    if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
+        ctx->inputs[0]->h != ctx->inputs[1]->h) {
+        av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
+        return AVERROR(EINVAL);
+    }
+    if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
+        av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    PHQMContext *s = ctx->priv;
+    AVFilterLink *mainlink = ctx->inputs[0];
+    int ret;
+
+    ret = ff_framesync_init_dualinput(&s->fs, ctx);
+    if (ret < 0)
+        return ret;
+    outlink->w = mainlink->w;
+    outlink->h = mainlink->h;
+    outlink->time_base = mainlink->time_base;
+    outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
+    outlink->frame_rate = mainlink->frame_rate;
+    if ((ret = ff_framesync_configure(&s->fs)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    PHQMContext *s = ctx->priv;
+    return ff_framesync_activate(&s->fs);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    PHQMContext *s = ctx->priv;
+
+    if (s->nb_frames > 0)
+        av_log(ctx, AV_LOG_WARNING, "PHQM average:%f min:%f max:%f hamm:%f sad:%f\n",
+               s->hd / s->nb_frames, s->min_hd, s->max_hd,
+               s->phd / s->nb_frames, s->psad / s->nb_frames);
+
+    ff_framesync_uninit(&s->fs);
+
+    if (s->stats_file && s->stats_file != stdout)
+        fclose(s->stats_file);
+    av_frame_free(&s->prev_pic_ref);
+    av_frame_free(&s->prev_pic_enc);
+}
+
+static const AVFilterPad phqm_inputs[] = {
+    {
+        .name         = "main",
+        .type         = AVMEDIA_TYPE_VIDEO,
+    },{
+        .name         = "reference",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input_ref,
+    },
+    { NULL }
+};
+
+static const AVFilterPad phqm_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_phqm= {
+    .name          = "phqm",
+    .description   = NULL_IF_CONFIG_SMALL("PHQM: Calculate the Perceptual Hash Hamming Difference between two video streams."),
+    .preinit       = phqm_framesync_preinit,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .activate      = activate,
+    .priv_size     = sizeof(PHQMContext),
+    .priv_class    = &phqm_class,
+    .inputs        = phqm_inputs,
+    .outputs       = phqm_outputs,
+};
diff --git a/libavformat/mpegenc.c b/libavformat/mpegenc.c
index 43ebc46e0e..f6aa705e96 100644
--- a/libavformat/mpegenc.c
+++ b/libavformat/mpegenc.c
@@ -984,7 +984,7 @@ static int remove_decoded_packets(AVFormatContext *ctx, int64_t scr)
                scr > pkt_desc->dts) { // FIXME: > vs >=
             if (stream->buffer_index < pkt_desc->size ||
                 stream->predecode_packet == stream->premux_packet) {
-                av_log(ctx, AV_LOG_ERROR,
+                av_log(ctx, AV_LOG_WARNING,
                        "buffer underflow st=%d bufi=%d size=%d\n",
                        i, stream->buffer_index, pkt_desc->size);
                 break;
@@ -1063,7 +1063,7 @@ retry:
                     scr / 90000.0, best_dts / 90000.0);
 
             if (scr >= best_dts + 1 && !ignore_constraints) {
-                av_log(ctx, AV_LOG_ERROR,
+                av_log(ctx, AV_LOG_WARNING,
                     "packet too large, ignoring buffer limits to mux it\n");
                 ignore_constraints = 1;
             }
diff --git a/libavutil/frame.c b/libavutil/frame.c
index dcf1fc3d17..94e7644d1f 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -163,6 +163,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
     frame->color_range         = AVCOL_RANGE_UNSPECIFIED;
     frame->chroma_location     = AVCHROMA_LOC_UNSPECIFIED;
     frame->flags               = 0;
+    frame->perceptual_score    = -1;
+    frame->perceptual_score_factor    = 2.0;
 }
 
 static void free_side_data(AVFrameSideData **ptr_sd)
@@ -373,6 +375,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
     dst->colorspace             = src->colorspace;
     dst->color_range            = src->color_range;
     dst->chroma_location        = src->chroma_location;
+    dst->perceptual_score       = src->perceptual_score;
+    dst->perceptual_score_factor       = src->perceptual_score_factor;
 
     av_dict_copy(&dst->metadata, src->metadata, 0);
 
@@ -453,6 +457,8 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
     dst->channels       = src->channels;
     dst->channel_layout = src->channel_layout;
     dst->nb_samples     = src->nb_samples;
+    dst->perceptual_score = src->perceptual_score;
+    dst->perceptual_score_factor = src->perceptual_score_factor;
 
     ret = frame_copy_props(dst, src, 0);
     if (ret < 0)
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 5d3231e7bb..c9df011aaa 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -672,6 +672,13 @@ typedef struct AVFrame {
      * for the target frame's private_ref field.
      */
     AVBufferRef *private_ref;
+
+    /**
+     * perceptual score
+     * 0.00 - 1.00 percentage of perceptual match to the previous frame
+     */
+    float perceptual_score;
+    float perceptual_score_factor;
 } AVFrame;
 
 #if FF_API_FRAME_GET_SET