diff options
author | Yaowu Xu <yaowu@google.com> | 2016-06-27 19:18:00 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2016-06-27 19:18:00 +0000 |
commit | 7676defca9f2c1fbb7b82e16471858d8d133d81e (patch) | |
tree | f336bfa4704b602c8380974e3e55995102c27f41 /vpx_dsp | |
parent | b9ec759bc23fdbafaf8266badb72a65d201ad315 (diff) | |
parent | 003a9d20add4f736ff16fa58e51b5ca1bbf50ce0 (diff) | |
download | libvpx-7676defca9f2c1fbb7b82e16471858d8d133d81e.tar.gz libvpx-7676defca9f2c1fbb7b82e16471858d8d133d81e.tar.bz2 libvpx-7676defca9f2c1fbb7b82e16471858d8d133d81e.zip |
Merge "Port metric computation changes from nextgenv2"
Diffstat (limited to 'vpx_dsp')
-rw-r--r-- | vpx_dsp/fastssim.c | 144 | ||||
-rw-r--r-- | vpx_dsp/psnr.c | 296 | ||||
-rw-r--r-- | vpx_dsp/psnr.h | 63 | ||||
-rw-r--r-- | vpx_dsp/psnrhvs.c | 135 | ||||
-rw-r--r-- | vpx_dsp/ssim.c | 111 | ||||
-rw-r--r-- | vpx_dsp/ssim.h | 22 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp.mk | 2 |
7 files changed, 598 insertions, 175 deletions
diff --git a/vpx_dsp/fastssim.c b/vpx_dsp/fastssim.c index 1405a30e0..7d9089171 100644 --- a/vpx_dsp/fastssim.c +++ b/vpx_dsp/fastssim.c @@ -10,6 +10,7 @@ * This code was originally written by: Nathan E. Egge, at the Daala * project. */ +#include <assert.h> #include <math.h> #include <stdlib.h> #include <string.h> @@ -17,19 +18,24 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ssim.h" #include "vpx_ports/system_state.h" -/* TODO(jbb): High bit depth version of this code needed */ + typedef struct fs_level fs_level; typedef struct fs_ctx fs_ctx; #define SSIM_C1 (255 * 255 * 0.01 * 0.01) #define SSIM_C2 (255 * 255 * 0.03 * 0.03) - +#if CONFIG_VP9_HIGHBITDEPTH +#define SSIM_C1_10 (1023 * 1023 * 0.01 * 0.01) +#define SSIM_C1_12 (4095 * 4095 * 0.01 * 0.01) +#define SSIM_C2_10 (1023 * 1023 * 0.03 * 0.03) +#define SSIM_C2_12 (4095 * 4095 * 0.03 * 0.03) +#endif #define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b)) #define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b)) struct fs_level { - uint16_t *im1; - uint16_t *im2; + uint32_t *im1; + uint32_t *im2; double *ssim; int w; int h; @@ -80,7 +86,7 @@ static void fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) { level_size += sizeof(*_ctx->level[l].ssim) - 1; level_size /= sizeof(*_ctx->level[l].ssim); level_size *= sizeof(*_ctx->level[l].ssim); - _ctx->level[l].im1 = (uint16_t *) data; + _ctx->level[l].im1 = (uint32_t *)data; _ctx->level[l].im2 = _ctx->level[l].im1 + im_size; data += level_size; _ctx->level[l].ssim = (double *) data; @@ -96,10 +102,10 @@ static void fs_ctx_clear(fs_ctx *_ctx) { } static void fs_downsample_level(fs_ctx *_ctx, int _l) { - const uint16_t *src1; - const uint16_t *src2; - uint16_t *dst1; - uint16_t *dst2; + const uint32_t *src1; + const uint32_t *src2; + uint32_t *dst1; + uint32_t *dst2; int w2; int h2; int w; @@ -132,11 +138,12 @@ static void fs_downsample_level(fs_ctx *_ctx, int _l) { } } -static void fs_downsample_level0(fs_ctx *_ctx, const unsigned char *_src1, - int _s1ystride, const unsigned char *_src2, - int _s2ystride, int _w, int _h) { - uint16_t *dst1; - uint16_t *dst2; +static void fs_downsample_level0(fs_ctx *_ctx, const uint8_t *_src1, + int _s1ystride, const uint8_t *_src2, + int _s2ystride, int _w, int _h, + uint32_t bd, uint32_t shift) { + uint32_t *dst1; + uint32_t *dst2; int w; int h; int i; @@ -155,21 +162,34 @@ static void fs_downsample_level0(fs_ctx *_ctx, const unsigned char *_src1, int i1; i0 = 2 * i; i1 = FS_MINI(i0 + 1, _w); - dst1[j * w + i] = _src1[j0 * _s1ystride + i0] - + _src1[j0 * _s1ystride + i1] + _src1[j1 * _s1ystride + i0] - + _src1[j1 * _s1ystride + i1]; - dst2[j * w + i] = _src2[j0 * _s2ystride + i0] - + _src2[j0 * _s2ystride + i1] + _src2[j1 * _s2ystride + i0] - + _src2[j1 * _s2ystride + i1]; + if (bd == 8 && shift == 0) { + dst1[j * w + i] = _src1[j0 * _s1ystride + i0] + + _src1[j0 * _s1ystride + i1] + _src1[j1 * _s1ystride + i0] + + _src1[j1 * _s1ystride + i1]; + dst2[j * w + i] = _src2[j0 * _s2ystride + i0] + + _src2[j0 * _s2ystride + i1] + _src2[j1 * _s2ystride + i0] + + _src2[j1 * _s2ystride + i1]; + } else { + uint16_t * src1s = CONVERT_TO_SHORTPTR(_src1); + uint16_t * src2s = CONVERT_TO_SHORTPTR(_src2); + dst1[j * w + i] = (src1s[j0 * _s1ystride + i0] >> shift) + + (src1s[j0 * _s1ystride + i1] >> shift) + + (src1s[j1 * _s1ystride + i0] >> shift) + + (src1s[j1 * _s1ystride + i1] >> shift); + dst2[j * w + i] = (src2s[j0 * _s2ystride + i0] >> shift) + + (src2s[j0 * _s2ystride + i1] >> shift) + + (src2s[j1 * _s2ystride + i0] >> shift) + + (src2s[j1 * _s2ystride + i1] >> shift); + } } } } -static void fs_apply_luminance(fs_ctx *_ctx, int _l) { +static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) { unsigned *col_sums_x; unsigned *col_sums_y; - uint16_t *im1; - uint16_t *im2; + uint32_t *im1; + uint32_t *im2; double *ssim; double c1; int w; @@ -178,6 +198,15 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l) { int j1offs; int i; int j; + double ssim_c1 = SSIM_C1; +#if CONFIG_VP9_HIGHBITDEPTH + if (bit_depth == 10) + ssim_c1 = SSIM_C1_10; + if (bit_depth == 12) + ssim_c1 = SSIM_C1_12; +#else + assert(bit_depth == 8); +#endif w = _ctx->level[_l].w; h = _ctx->level[_l].h; col_sums_x = _ctx->col_buf; @@ -196,7 +225,7 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l) { col_sums_y[i] += im2[j1offs + i]; } ssim = _ctx->level[_l].ssim; - c1 = (double) (SSIM_C1 * 4096 * (1 << 4 * _l)); + c1 = (double) (ssim_c1 * 4096 * (1 << 4 * _l)); for (j = 0; j < h; j++) { unsigned mux; unsigned muy; @@ -294,9 +323,9 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l) { } \ while (0) -static void fs_calc_structure(fs_ctx *_ctx, int _l) { - uint16_t *im1; - uint16_t *im2; +static void fs_calc_structure(fs_ctx *_ctx, int _l, int bit_depth) { + uint32_t *im1; + uint32_t *im2; unsigned *gx_buf; unsigned *gy_buf; double *ssim; @@ -309,6 +338,16 @@ static void fs_calc_structure(fs_ctx *_ctx, int _l) { int h; int i; int j; + double ssim_c2 = SSIM_C2; +#if CONFIG_VP9_HIGHBITDEPTH + if (bit_depth == 10) + ssim_c2 = SSIM_C2_10; + if (bit_depth == 12) + ssim_c2 = SSIM_C2_12; +#else + assert(bit_depth == 8); +#endif + w = _ctx->level[_l].w; h = _ctx->level[_l].h; im1 = _ctx->level[_l].im1; @@ -318,7 +357,7 @@ static void fs_calc_structure(fs_ctx *_ctx, int _l) { stride = w + 8; gy_buf = gx_buf + 8 * stride; memset(gx_buf, 0, 2 * 8 * stride * sizeof(*gx_buf)); - c2 = SSIM_C2 * (1 << 4 * _l) * 16 * 104; + c2 = ssim_c2 * (1 << 4 * _l) * 16 * 104; for (j = 0; j < h + 4; j++) { if (j < h - 1) { for (i = 0; i < w - 1; i++) { @@ -326,11 +365,11 @@ static void fs_calc_structure(fs_ctx *_ctx, int _l) { unsigned g2; unsigned gx; unsigned gy; - g1 = abs(im1[(j + 1) * w + i + 1] - im1[j * w + i]); - g2 = abs(im1[(j + 1) * w + i] - im1[j * w + i + 1]); + g1 = abs((int)im1[(j + 1) * w + i + 1] - (int)im1[j * w + i]); + g2 = abs((int)im1[(j + 1) * w + i] - (int)im1[j * w + i + 1]); gx = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2); - g1 = abs(im2[(j + 1) * w + i + 1] - im2[j * w + i]); - g2 = abs(im2[(j + 1) * w + i] - im2[j * w + i + 1]); + g1 = abs((int)im2[(j + 1) * w + i + 1] - (int)im2[j * w + i]); + g2 = abs((int)im2[(j + 1) * w + i] - (int)im2[j * w + i + 1]); gy = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2); gx_buf[(j & 7) * stride + i + 4] = gx; gy_buf[(j & 7) * stride + i + 4] = gy; @@ -421,48 +460,55 @@ static double fs_average(fs_ctx *_ctx, int _l) { return pow(ret / (w * h), FS_WEIGHTS[_l]); } -static double calc_ssim(const unsigned char *_src, int _systride, - const unsigned char *_dst, int _dystride, int _w, int _h) { +static double convert_ssim_db(double _ssim, double _weight) { + assert(_weight >= _ssim); + if ((_weight - _ssim) < 1e-10) + return MAX_SSIM_DB; + return 10 * (log10(_weight) - log10(_weight - _ssim)); +} + +static double calc_ssim(const uint8_t *_src, int _systride, + const uint8_t *_dst, int _dystride, + int _w, int _h, uint32_t _bd, uint32_t _shift) { fs_ctx ctx; double ret; int l; ret = 1; fs_ctx_init(&ctx, _w, _h, FS_NLEVELS); - fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, _w, _h); + fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, + _w, _h, _bd, _shift); for (l = 0; l < FS_NLEVELS - 1; l++) { - fs_calc_structure(&ctx, l); + fs_calc_structure(&ctx, l, _bd); ret *= fs_average(&ctx, l); fs_downsample_level(&ctx, l + 1); } - fs_calc_structure(&ctx, l); - fs_apply_luminance(&ctx, l); + fs_calc_structure(&ctx, l, _bd); + fs_apply_luminance(&ctx, l, _bd); ret *= fs_average(&ctx, l); fs_ctx_clear(&ctx); return ret; } -static double convert_ssim_db(double _ssim, double _weight) { - return 10 * (log10(_weight) - log10(_weight - _ssim)); -} - double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, - double *ssim_y, double *ssim_u, double *ssim_v) { + double *ssim_y, double *ssim_u, double *ssim_v, + uint32_t bd, uint32_t in_bd) { double ssimv; + uint32_t bd_shift = 0; vpx_clear_system_state(); + assert(bd >= in_bd); + bd_shift = bd - in_bd; *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer, dest->y_stride, source->y_crop_width, - source->y_crop_height); - + source->y_crop_height, in_bd, bd_shift); *ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer, dest->uv_stride, source->uv_crop_width, - source->uv_crop_height); - + source->uv_crop_height, in_bd, bd_shift); *ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer, dest->uv_stride, source->uv_crop_width, - source->uv_crop_height); - ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v)); + source->uv_crop_height, in_bd, bd_shift); + ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v)); return convert_ssim_db(ssimv, 1.0); } diff --git a/vpx_dsp/psnr.c b/vpx_dsp/psnr.c new file mode 100644 index 000000000..1655f116c --- /dev/null +++ b/vpx_dsp/psnr.c @@ -0,0 +1,296 @@ +/* +* Copyright (c) 2016 The WebM project authors. All Rights Reserved. +* +* Use of this source code is governed by a BSD-style license +* that can be found in the LICENSE file in the root of the source +* tree. An additional intellectual property rights grant can be found +* in the file PATENTS. All contributing project authors may +* be found in the AUTHORS file in the root of the source tree. +*/ + +#include <math.h> +#include <assert.h> +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/psnr.h" +#include "vpx_scale/yv12config.h" + + +double vpx_sse_to_psnr(double samples, double peak, double sse) { + if (sse > 0.0) { + const double psnr = 10.0 * log10(samples * peak * peak / sse); + return psnr > MAX_PSNR ? MAX_PSNR : psnr; + } else { + return MAX_PSNR; + } +} + +/* TODO(yaowu): The block_variance calls the unoptimized versions of variance() +* and highbd_8_variance(). It should not. +*/ +static void encoder_variance(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int w, int h, unsigned int *sse, int *sum) { + int i, j; + + *sum = 0; + *sse = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + const int diff = a[j] - b[j]; + *sum += diff; + *sse += diff * diff; + } + + a += a_stride; + b += b_stride; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void encoder_highbd_variance64(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, uint64_t *sse, + uint64_t *sum) { + int i, j; + + uint16_t *a = CONVERT_TO_SHORTPTR(a8); + uint16_t *b = CONVERT_TO_SHORTPTR(b8); + *sum = 0; + *sse = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + const int diff = a[j] - b[j]; + *sum += diff; + *sse += diff * diff; + } + a += a_stride; + b += b_stride; + } +} + +static void encoder_highbd_8_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, + unsigned int *sse, int *sum) { + uint64_t sse_long = 0; + uint64_t sum_long = 0; + encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h, + &sse_long, &sum_long); + *sse = (unsigned int)sse_long; + *sum = (int)sum_long; +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +static int64_t get_sse(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int width, int height) { + const int dw = width % 16; + const int dh = height % 16; + int64_t total_sse = 0; + unsigned int sse = 0; + int sum = 0; + int x, y; + + if (dw > 0) { + encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride, + dw, height, &sse, &sum); + total_sse += sse; + } + + if (dh > 0) { + encoder_variance(&a[(height - dh) * a_stride], a_stride, + &b[(height - dh) * b_stride], b_stride, + width - dw, dh, &sse, &sum); + total_sse += sse; + } + + for (y = 0; y < height / 16; ++y) { + const uint8_t *pa = a; + const uint8_t *pb = b; + for (x = 0; x < width / 16; ++x) { + vpx_mse16x16(pa, a_stride, pb, b_stride, &sse); + total_sse += sse; + + pa += 16; + pb += 16; + } + + a += 16 * a_stride; + b += 16 * b_stride; + } + + return total_sse; +} + +#if CONFIG_VP9_HIGHBITDEPTH +static int64_t highbd_get_sse_shift(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int width, int height, + unsigned int input_shift) { + const uint16_t *a = CONVERT_TO_SHORTPTR(a8); + const uint16_t *b = CONVERT_TO_SHORTPTR(b8); + int64_t total_sse = 0; + int x, y; + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { + int64_t diff; + diff = (a[x] >> input_shift) - (b[x] >> input_shift); + total_sse += diff * diff; + } + a += a_stride; + b += b_stride; + } + return total_sse; +} + +static int64_t highbd_get_sse(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int width, int height) { + int64_t total_sse = 0; + int x, y; + const int dw = width % 16; + const int dh = height % 16; + unsigned int sse = 0; + int sum = 0; + if (dw > 0) { + encoder_highbd_8_variance(&a[width - dw], a_stride, + &b[width - dw], b_stride, + dw, height, &sse, &sum); + total_sse += sse; + } + if (dh > 0) { + encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride, + &b[(height - dh) * b_stride], b_stride, + width - dw, dh, &sse, &sum); + total_sse += sse; + } + for (y = 0; y < height / 16; ++y) { + const uint8_t *pa = a; + const uint8_t *pb = b; + for (x = 0; x < width / 16; ++x) { + vpx_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse); + total_sse += sse; + pa += 16; + pb += 16; + } + a += 16 * a_stride; + b += 16 * b_stride; + } + return total_sse; +} +#endif // CONFIG_VP9_HIGHBITDEPTH + + +int64_t vpx_get_y_sse(const YV12_BUFFER_CONFIG *a, + const YV12_BUFFER_CONFIG *b) { + assert(a->y_crop_width == b->y_crop_width); + assert(a->y_crop_height == b->y_crop_height); + + return get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, + a->y_crop_width, a->y_crop_height); +} + +#if CONFIG_VP9_HIGHBITDEPTH +int64_t vpx_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, + const YV12_BUFFER_CONFIG *b) { + assert(a->y_crop_width == b->y_crop_width); + assert(a->y_crop_height == b->y_crop_height); + assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0); + assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0); + + return highbd_get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, + a->y_crop_width, a->y_crop_height); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +#if CONFIG_VP9_HIGHBITDEPTH +void vpx_calc_highbd_psnr(const YV12_BUFFER_CONFIG *a, + const YV12_BUFFER_CONFIG *b, + PSNR_STATS *psnr, uint32_t bit_depth, + uint32_t in_bit_depth) { + const int widths[3] = + { a->y_crop_width, a->uv_crop_width, a->uv_crop_width }; + const int heights[3] = + { a->y_crop_height, a->uv_crop_height, a->uv_crop_height }; + const uint8_t *a_planes[3] = { a->y_buffer, a->u_buffer, a->v_buffer }; + const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride }; + const uint8_t *b_planes[3] = { b->y_buffer, b->u_buffer, b->v_buffer }; + const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride }; + int i; + uint64_t total_sse = 0; + uint32_t total_samples = 0; + const double peak = (double)((1 << in_bit_depth) - 1); + const unsigned int input_shift = bit_depth - in_bit_depth; + + for (i = 0; i < 3; ++i) { + const int w = widths[i]; + const int h = heights[i]; + const uint32_t samples = w * h; + uint64_t sse; + if (a->flags & YV12_FLAG_HIGHBITDEPTH) { + if (input_shift) { + sse = highbd_get_sse_shift(a_planes[i], a_strides[i], + b_planes[i], b_strides[i], w, h, + input_shift); + } else { + sse = highbd_get_sse(a_planes[i], a_strides[i], + b_planes[i], b_strides[i], w, h); + } + } else { + sse = get_sse(a_planes[i], a_strides[i], + b_planes[i], b_strides[i], + w, h); + } + psnr->sse[1 + i] = sse; + psnr->samples[1 + i] = samples; + psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse); + + total_sse += sse; + total_samples += samples; + } + + psnr->sse[0] = total_sse; + psnr->samples[0] = total_samples; + psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak, + (double)total_sse); +} + +#endif // !CONFIG_VP9_HIGHBITDEPTH + +void vpx_calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, + PSNR_STATS *psnr) { + static const double peak = 255.0; + const int widths[3] = { + a->y_crop_width, a->uv_crop_width, a->uv_crop_width }; + const int heights[3] = { + a->y_crop_height, a->uv_crop_height, a->uv_crop_height }; + const uint8_t *a_planes[3] = { a->y_buffer, a->u_buffer, a->v_buffer }; + const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride }; + const uint8_t *b_planes[3] = { b->y_buffer, b->u_buffer, b->v_buffer }; + const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride }; + int i; + uint64_t total_sse = 0; + uint32_t total_samples = 0; + + for (i = 0; i < 3; ++i) { + const int w = widths[i]; + const int h = heights[i]; + const uint32_t samples = w * h; + const uint64_t sse = get_sse(a_planes[i], a_strides[i], + b_planes[i], b_strides[i], + w, h); + psnr->sse[1 + i] = sse; + psnr->samples[1 + i] = samples; + psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse); + + total_sse += sse; + total_samples += samples; + } + + psnr->sse[0] = total_sse; + psnr->samples[0] = total_samples; + psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak, + (double)total_sse); +} diff --git a/vpx_dsp/psnr.h b/vpx_dsp/psnr.h new file mode 100644 index 000000000..e25b4504a --- /dev/null +++ b/vpx_dsp/psnr.h @@ -0,0 +1,63 @@ +/* +* Copyright (c) 2016 The WebM project authors. All Rights Reserved. +* +* Use of this source code is governed by a BSD-style license +* that can be found in the LICENSE file in the root of the source +* tree. An additional intellectual property rights grant can be found +* in the file PATENTS. All contributing project authors may +* be found in the AUTHORS file in the root of the source tree. +*/ + +#ifndef VPX_DSP_PSNR_H_ +#define VPX_DSP_PSNR_H_ + + +#include "vpx_scale/yv12config.h" + +#define MAX_PSNR 100.0 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + double psnr[4]; // total/y/u/v + uint64_t sse[4]; // total/y/u/v + uint32_t samples[4]; // total/y/u/v +} PSNR_STATS; + +// TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t + +/*!\brief Converts SSE to PSNR +* +* Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR). +* +* \param[in] samples Number of samples +* \param[in] peak Max sample value +* \param[in] sse Sum of squared errors +*/ +double vpx_sse_to_psnr(double samples, double peak, double sse); +int64_t vpx_get_y_sse(const YV12_BUFFER_CONFIG *a, + const YV12_BUFFER_CONFIG *b); +#if CONFIG_VP9_HIGHBITDEPTH +int64_t vpx_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, + const YV12_BUFFER_CONFIG *b); +void vpx_calc_highbd_psnr(const YV12_BUFFER_CONFIG *a, + const YV12_BUFFER_CONFIG *b, + PSNR_STATS *psnr, + unsigned int bit_depth, + unsigned int in_bit_depth); +#endif +void vpx_calc_psnr(const YV12_BUFFER_CONFIG *a, + const YV12_BUFFER_CONFIG *b, + PSNR_STATS *psnr); + +double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *dest, + double *phvs_y, double *phvs_u, + double *phvs_v, uint32_t bd, uint32_t in_bd); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // VPX_DSP_PSNR_H_ diff --git a/vpx_dsp/psnrhvs.c b/vpx_dsp/psnrhvs.c index 300170579..095ba5d13 100644 --- a/vpx_dsp/psnrhvs.c +++ b/vpx_dsp/psnrhvs.c @@ -10,6 +10,7 @@ * This code was originally written by: Gregory Maxwell, at the Daala * project. */ +#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <math.h> @@ -18,6 +19,7 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ssim.h" #include "vpx_ports/system_state.h" +#include "vpx_dsp/psnr.h" #if !defined(M_PI) # define M_PI (3.141592653589793238462643) @@ -26,14 +28,29 @@ static void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x, int xstride) { + int i, j; (void) xstride; vpx_fdct8x8(x, y, ystride); + for (i = 0; i < 8; i++) + for (j = 0; j< 8; j++) + *(y + ystride*i + j) = (*(y + ystride*i + j) + 4) >> 3; } +#if CONFIG_VP9_HIGHBITDEPTH +static void hbd_od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x, + int xstride) { + int i, j; + (void) xstride; + vpx_highbd_fdct8x8(x, y, ystride); + for (i = 0; i < 8; i++) + for (j = 0; j< 8; j++) + *(y + ystride*i + j) = (*(y + ystride*i + j) + 4) >> 3; +} +#endif /* Normalized inverse quantization matrix for 8x8 DCT at the point of * transparency. This is not the JPEG based matrix from the paper, this one gives a slightly higher MOS agreement.*/ -static const float csf_y[8][8] = { +static const double csf_y[8][8] = { {1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, 1.00227514334, 0.678296995242, 0.466224900598, 0.3265091542}, {2.2901594831, 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963, @@ -50,7 +67,7 @@ static const float csf_y[8][8] = { 0.283006984131, 0.215017739696, 0.168869545842, 0.136153931001}, {0.3265091542, 0.436405793551, 0.372504254596, 0.295774038565, 0.226951348204, 0.17408067321, 0.136153931001, 0.109083846276}}; -static const float csf_cb420[8][8] = { +static const double csf_cb420[8][8] = { {1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788, 0.898018824055, 0.74725392039, 0.615105596242}, {2.46074210438, 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972, @@ -67,7 +84,7 @@ static const float csf_cb420[8][8] = { 0.55002013668, 0.454353482512, 0.389234902883, 0.342353999733}, {0.615105596242, 0.830890433625, 0.731221236837, 0.608694761374, 0.495804539034, 0.407050308965, 0.342353999733, 0.295530605237}}; -static const float csf_cr420[8][8] = { +static const double csf_cr420[8][8] = { {2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469, 0.867069376285, 0.721500455585, 0.593906509971}, {2.62502345193, 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198, @@ -85,23 +102,38 @@ static const float csf_cr420[8][8] = { {0.593906509971, 0.802254508198, 0.706020324706, 0.587716619023, 0.478717061273, 0.393021669543, 0.330555063063, 0.285345396658}}; -static double convert_score_db(double _score, double _weight) { - return 10 * (log10(255 * 255) - log10(_weight * _score)); +static double convert_score_db(double _score, double _weight, int bit_depth) { + int16_t pix_max = 255; + assert(_score * _weight >= 0.0); + if (bit_depth == 10) + pix_max = 1023; + else if (bit_depth == 12) + pix_max = 4095; + + if (_weight * _score < pix_max * pix_max * 1e-10) + return MAX_PSNR; + return 10 * (log10(pix_max * pix_max) - log10(_weight * _score)); } -static double calc_psnrhvs(const unsigned char *_src, int _systride, - const unsigned char *_dst, int _dystride, - double _par, int _w, int _h, int _step, - const float _csf[8][8]) { - float ret; +static double calc_psnrhvs(const unsigned char *src, int _systride, + const unsigned char *dst, int _dystride, + double _par, int _w, int _h, int _step, + const double _csf[8][8], uint32_t bit_depth, + uint32_t _shift) { + double ret; + const uint8_t *_src8 = src; + const uint8_t *_dst8 = dst; + const uint16_t *_src16 = CONVERT_TO_SHORTPTR(src); + const uint16_t *_dst16 = CONVERT_TO_SHORTPTR(dst); int16_t dct_s[8 * 8], dct_d[8 * 8]; tran_low_t dct_s_coef[8 * 8], dct_d_coef[8 * 8]; - float mask[8][8]; + double mask[8][8]; int pixels; int x; int y; (void) _par; ret = pixels = 0; + /*In the PSNR-HVS-M paper[1] the authors describe the construction of their masking table as "we have used the quantization table for the color component Y of JPEG [6] that has been also obtained on the @@ -126,23 +158,28 @@ static double calc_psnrhvs(const unsigned char *_src, int _systride, for (x = 0; x < _w - 7; x += _step) { int i; int j; - float s_means[4]; - float d_means[4]; - float s_vars[4]; - float d_vars[4]; - float s_gmean = 0; - float d_gmean = 0; - float s_gvar = 0; - float d_gvar = 0; - float s_mask = 0; - float d_mask = 0; + double s_means[4]; + double d_means[4]; + double s_vars[4]; + double d_vars[4]; + double s_gmean = 0; + double d_gmean = 0; + double s_gvar = 0; + double d_gvar = 0; + double s_mask = 0; + double d_mask = 0; for (i = 0; i < 4; i++) s_means[i] = d_means[i] = s_vars[i] = d_vars[i] = 0; for (i = 0; i < 8; i++) { for (j = 0; j < 8; j++) { int sub = ((i & 12) >> 2) + ((j & 12) >> 1); - dct_s[i * 8 + j] = _src[(y + i) * _systride + (j + x)]; - dct_d[i * 8 + j] = _dst[(y + i) * _dystride + (j + x)]; + if (bit_depth == 8 && _shift == 0) { + dct_s[i * 8 + j] = _src8[(y + i) * _systride + (j + x)]; + dct_d[i * 8 + j] = _dst8[(y + i) * _dystride + (j + x)]; + } else if (bit_depth == 10 || bit_depth == 12) { + dct_s[i * 8 + j] = _src16[(y + i) * _systride + (j + x)] >> _shift; + dct_d[i * 8 + j] = _dst16[(y + i) * _dystride + (j + x)] >> _shift; + } s_gmean += dct_s[i * 8 + j]; d_gmean += dct_d[i * 8 + j]; s_means[sub] += dct_s[i * 8 + j]; @@ -176,8 +213,16 @@ static double calc_psnrhvs(const unsigned char *_src, int _systride, s_gvar = (s_vars[0] + s_vars[1] + s_vars[2] + s_vars[3]) / s_gvar; if (d_gvar > 0) d_gvar = (d_vars[0] + d_vars[1] + d_vars[2] + d_vars[3]) / d_gvar; - od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8); - od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8); +#if CONFIG_VP9_HIGHBITDEPTH + if (bit_depth == 10 || bit_depth == 12) { + hbd_od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8); + hbd_od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8); + } +#endif + if (bit_depth == 8) { + od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8); + od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8); + } for (i = 0; i < 8; i++) for (j = (i == 0); j < 8; j++) s_mask += dct_s_coef[i * 8 + j] * dct_s_coef[i * 8 + j] * mask[i][j]; @@ -190,8 +235,8 @@ static double calc_psnrhvs(const unsigned char *_src, int _systride, s_mask = d_mask; for (i = 0; i < 8; i++) { for (j = 0; j < 8; j++) { - float err; - err = fabs((float)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j])); + double err; + err = fabs((double)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j])); if (i != 0 || j != 0) err = err < s_mask / mask[i][j] ? 0 : err - s_mask / mask[i][j]; ret += (err * _csf[i][j]) * (err * _csf[i][j]); @@ -203,25 +248,35 @@ static double calc_psnrhvs(const unsigned char *_src, int _systride, ret /= pixels; return ret; } -double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source, + +double vpx_psnrhvs(const YV12_BUFFER_CONFIG *src, const YV12_BUFFER_CONFIG *dest, double *y_psnrhvs, - double *u_psnrhvs, double *v_psnrhvs) { + double *u_psnrhvs, double *v_psnrhvs, + uint32_t bd, uint32_t in_bd) { double psnrhvs; const double par = 1.0; const int step = 7; + uint32_t bd_shift = 0; vpx_clear_system_state(); - *y_psnrhvs = calc_psnrhvs(source->y_buffer, source->y_stride, dest->y_buffer, - dest->y_stride, par, source->y_crop_width, - source->y_crop_height, step, csf_y); - *u_psnrhvs = calc_psnrhvs(source->u_buffer, source->uv_stride, dest->u_buffer, - dest->uv_stride, par, source->uv_crop_width, - source->uv_crop_height, step, csf_cb420); + assert(bd == 8 || bd == 10 || bd == 12); + assert(bd >= in_bd); - *v_psnrhvs = calc_psnrhvs(source->v_buffer, source->uv_stride, dest->v_buffer, - dest->uv_stride, par, source->uv_crop_width, - source->uv_crop_height, step, csf_cr420); - psnrhvs = (*y_psnrhvs) * .8 + .1 * ((*u_psnrhvs) + (*v_psnrhvs)); + bd_shift = bd - in_bd; - return convert_score_db(psnrhvs, 1.0); + *y_psnrhvs = calc_psnrhvs(src->y_buffer, src->y_stride, dest->y_buffer, + dest->y_stride, par, src->y_crop_width, + src->y_crop_height, step, csf_y, bd, + bd_shift); + *u_psnrhvs = calc_psnrhvs(src->u_buffer, src->uv_stride, dest->u_buffer, + dest->uv_stride, par, src->uv_crop_width, + src->uv_crop_height, step, csf_cb420, bd, + bd_shift); + *v_psnrhvs = calc_psnrhvs(src->v_buffer, src->uv_stride, dest->v_buffer, + dest->uv_stride, par, src->uv_crop_width, + src->uv_crop_height, step, csf_cr420, bd, + bd_shift); + psnrhvs = (*y_psnrhvs) * .8 + .1 * ((*u_psnrhvs) + (*v_psnrhvs)); + return convert_score_db(psnrhvs, 1.0, in_bd); } + diff --git a/vpx_dsp/ssim.c b/vpx_dsp/ssim.c index cfe5bb331..632e272dc 100644 --- a/vpx_dsp/ssim.c +++ b/vpx_dsp/ssim.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <assert.h> #include <math.h> #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ssim.h" @@ -66,16 +67,31 @@ void vpx_highbd_ssim_parms_8x8_c(const uint16_t *s, int sp, static const int64_t cc1 = 26634; // (64^2*(.01*255)^2 static const int64_t cc2 = 239708; // (64^2*(.03*255)^2 +static const int64_t cc1_10 = 428658; // (64^2*(.01*1023)^2 +static const int64_t cc2_10 = 3857925; // (64^2*(.03*1023)^2 +static const int64_t cc1_12 = 6868593; // (64^2*(.01*4095)^2 +static const int64_t cc2_12 = 61817334; // (64^2*(.03*4095)^2 static double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s, uint32_t sum_sq_r, - uint32_t sum_sxr, int count) { + uint32_t sum_sxr, int count, + uint32_t bd) { int64_t ssim_n, ssim_d; int64_t c1, c2; - - // scale the constants by number of pixels - c1 = (cc1 * count * count) >> 12; - c2 = (cc2 * count * count) >> 12; + if (bd == 8) { + // scale the constants by number of pixels + c1 = (cc1 * count * count) >> 12; + c2 = (cc2 * count * count) >> 12; + } else if (bd == 10) { + c1 = (cc1_10 * count * count) >> 12; + c2 = (cc2_10 * count * count) >> 12; + } else if (bd == 12) { + c1 = (cc1_12 * count * count) >> 12; + c2 = (cc2_12 * count * count) >> 12; + } else { + c1 = c2 = 0; + assert(0); + } ssim_n = (2 * sum_s * sum_r + c1) * ((int64_t) 2 * count * sum_sxr - (int64_t) 2 * sum_s * sum_r + c2); @@ -91,22 +107,21 @@ static double ssim_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp) { uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; vpx_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); - return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64); + return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64, 8); } #if CONFIG_VP9_HIGHBITDEPTH static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r, - int rp, unsigned int bd) { + int rp, uint32_t bd, uint32_t shift) { uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; - const int oshift = bd - 8; vpx_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); - return similarity(sum_s >> oshift, - sum_r >> oshift, - sum_sq_s >> (2 * oshift), - sum_sq_r >> (2 * oshift), - sum_sxr >> (2 * oshift), - 64); + return similarity(sum_s >> shift, + sum_r >> shift, + sum_sq_s >> (2 * shift), + sum_sq_r >> (2 * shift), + sum_sxr >> (2 * shift), + 64, bd); } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -136,7 +151,7 @@ static double vpx_ssim2(const uint8_t *img1, const uint8_t *img2, #if CONFIG_VP9_HIGHBITDEPTH static double vpx_highbd_ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1, int stride_img2, int width, - int height, unsigned int bd) { + int height, uint32_t bd, uint32_t shift) { int i, j; int samples = 0; double ssim_total = 0; @@ -147,7 +162,7 @@ static double vpx_highbd_ssim2(const uint8_t *img1, const uint8_t *img2, for (j = 0; j <= width - 8; j += 4) { double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1, CONVERT_TO_SHORTPTR(img2 + j), stride_img2, - bd); + bd, shift); ssim_total += v; samples++; } @@ -182,31 +197,6 @@ double vpx_calc_ssim(const YV12_BUFFER_CONFIG *source, return ssimv; } -double vpx_calc_ssimg(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *dest, - double *ssim_y, double *ssim_u, double *ssim_v) { - double ssim_all = 0; - double a, b, c; - - a = vpx_ssim2(source->y_buffer, dest->y_buffer, - source->y_stride, dest->y_stride, - source->y_crop_width, source->y_crop_height); - - b = vpx_ssim2(source->u_buffer, dest->u_buffer, - source->uv_stride, dest->uv_stride, - source->uv_crop_width, source->uv_crop_height); - - c = vpx_ssim2(source->v_buffer, dest->v_buffer, - source->uv_stride, dest->uv_stride, - source->uv_crop_width, source->uv_crop_height); - *ssim_y = a; - *ssim_u = b; - *ssim_v = c; - ssim_all = (a * 4 + b + c) / 6; - - return ssim_all; -} - // traditional ssim as per: http://en.wikipedia.org/wiki/Structural_similarity // // Re working out the math -> @@ -455,21 +445,28 @@ double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch, #if CONFIG_VP9_HIGHBITDEPTH double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, - double *weight, unsigned int bd) { + double *weight, uint32_t bd, uint32_t in_bd) { double a, b, c; double ssimv; + uint32_t shift = 0; + + assert(bd >= in_bd); + shift = bd - in_bd; a = vpx_highbd_ssim2(source->y_buffer, dest->y_buffer, source->y_stride, dest->y_stride, - source->y_crop_width, source->y_crop_height, bd); + source->y_crop_width, source->y_crop_height, + in_bd, shift); b = vpx_highbd_ssim2(source->u_buffer, dest->u_buffer, source->uv_stride, dest->uv_stride, - source->uv_crop_width, source->uv_crop_height, bd); + source->uv_crop_width, source->uv_crop_height, + in_bd, shift); c = vpx_highbd_ssim2(source->v_buffer, dest->v_buffer, source->uv_stride, dest->uv_stride, - source->uv_crop_width, source->uv_crop_height, bd); + source->uv_crop_width, source->uv_crop_height, + in_bd, shift); ssimv = a * .8 + .1 * (b + c); @@ -478,28 +475,4 @@ double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source, return ssimv; } -double vpx_highbd_calc_ssimg(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *dest, double *ssim_y, - double *ssim_u, double *ssim_v, unsigned int bd) { - double ssim_all = 0; - double a, b, c; - - a = vpx_highbd_ssim2(source->y_buffer, dest->y_buffer, - source->y_stride, dest->y_stride, - source->y_crop_width, source->y_crop_height, bd); - - b = vpx_highbd_ssim2(source->u_buffer, dest->u_buffer, - source->uv_stride, dest->uv_stride, - source->uv_crop_width, source->uv_crop_height, bd); - - c = vpx_highbd_ssim2(source->v_buffer, dest->v_buffer, - source->uv_stride, dest->uv_stride, - source->uv_crop_width, source->uv_crop_height, bd); - *ssim_y = a; - *ssim_u = b; - *ssim_v = c; - ssim_all = (a * 4 + b + c) / 6; - - return ssim_all; -} #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/vpx_dsp/ssim.h b/vpx_dsp/ssim.h index 132f7f9e1..d4d6b0d8a 100644 --- a/vpx_dsp/ssim.h +++ b/vpx_dsp/ssim.h @@ -11,6 +11,8 @@ #ifndef VPX_DSP_SSIM_H_ #define VPX_DSP_SSIM_H_ +#define MAX_SSIM_DB 100.0; + #ifdef __cplusplus extern "C" { #endif @@ -68,30 +70,16 @@ double vpx_calc_ssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *weight); -double vpx_calc_ssimg(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *dest, - double *ssim_y, double *ssim_u, double *ssim_v); - double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, - double *ssim_y, double *ssim_u, double *ssim_v); - -double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *dest, - double *ssim_y, double *ssim_u, double *ssim_v); + double *ssim_y, double *ssim_u, + double *ssim_v, uint32_t bd, uint32_t in_bd); #if CONFIG_VP9_HIGHBITDEPTH double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *weight, - unsigned int bd); - -double vpx_highbd_calc_ssimg(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *dest, - double *ssim_y, - double *ssim_u, - double *ssim_v, - unsigned int bd); + uint32_t bd, uint32_t in_bd); #endif // CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 84b529136..018126d4b 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -22,6 +22,8 @@ DSP_SRCS-yes += bitwriter.h DSP_SRCS-yes += bitwriter.c DSP_SRCS-yes += bitwriter_buffer.c DSP_SRCS-yes += bitwriter_buffer.h +DSP_SRCS-yes += psnr.c +DSP_SRCS-yes += psnr.h DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.c DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.h DSP_SRCS-$(CONFIG_INTERNAL_STATS) += psnrhvs.c |