contrib/patch-x264-aq.patch

   1 Index: encoder/encoder.c\r
   2 ===================================================================\r
   3 --- encoder/encoder.c   (revisione 634)\r
   4 +++ encoder/encoder.c   (copia locale)\r
   5 @@ -470,6 +470,8 @@\r
   6      if( !h->param.b_cabac )
   7          h->param.analyse.i_trellis = 0;
   8      h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
   9 +    if( h->param.analyse.b_aq && h->param.analyse.f_aq_strength <= 0 )
  10 +        h->param.analyse.b_aq = 0;
  11      h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
  12
  13      {
  14 Index: encoder/analyse.c\r
  15 ===================================================================\r
  16 --- encoder/analyse.c   (revisione 634)\r
  17 +++ encoder/analyse.c   (copia locale)\r
  18 @@ -29,6 +29,7 @@\r
  19  #include <unistd.h>
  20
  21  #include "common/common.h"
  22 +#include "common/cpu.h"
  23  #include "macroblock.h"
  24  #include "me.h"
  25  #include "ratecontrol.h"
  26 @@ -2026,8 +2027,73 @@\r
  27      }
  28  }
  29
  30 +static void x264_add_dctq(int16_t dct[8][8], int *p_total) {
  31 +    int     i, t = 0;
  32 +    int16_t *p = &dct[0][0];
  33
  34 +    for (i = 1; i < 64; ++i)
  35 +        t += abs(p[i]) * x264_dct8_weight_tab[i];
  36 +
  37 +    *p_total += t;
  38 +}
  39 +
  40  /*****************************************************************************
  41 + * x264_adaptive_quant:
  42 + * check if mb is "flat", i.e. has most energy in low frequency components, and
  43 + * adjust qp down if it is
  44 + *****************************************************************************/
  45 +void x264_adaptive_quant( x264_t *h, x264_mb_analysis_t *a )
  46 +{
  47 +    DECLARE_ALIGNED( static uint8_t, zero[FDEC_STRIDE*8], 16 );
  48 +
  49 +    int16_t dct[8][8];
  50 +    int     total = 0;
  51 +    int     i_qp = h->mb.i_qp, i_qp_adj;
  52 +    float   fc;
  53 +
  54 +    if( i_qp <= 10 ) /* AQ is probably not needed at such low QP */
  55 +        return;
  56 +
  57 +    if( h->pixf.sad[PIXEL_16x16](h->mb.pic.p_fenc[0], FENC_STRIDE, zero, 16) > 64*16*16 )
  58 +    {   /* light places */
  59 +        if( h->pixf.count_8x8(h->mb.pic.p_fenc[1], FENC_STRIDE, 0x81818181) < 40 )
  60 +            /* not enough "blue" pixels */
  61 +            return;
  62 +
  63 +        if( h->pixf.count_8x8(h->mb.pic.p_fenc[2], FENC_STRIDE, 0x87878787) > 24 )
  64 +            /* too many "red" pixels */
  65 +            return;
  66 +    }
  67 +
  68 +    h->dctf.sub8x8_dct8( dct, h->mb.pic.p_fenc[0], zero );
  69 +    x264_add_dctq( dct, &total );
  70 +    h->dctf.sub8x8_dct8( dct, h->mb.pic.p_fenc[0] + 8, zero );
  71 +    x264_add_dctq( dct, &total );
  72 +    h->dctf.sub8x8_dct8( dct, h->mb.pic.p_fenc[0] + 8*FENC_STRIDE, zero );
  73 +    x264_add_dctq( dct, &total );
  74 +    h->dctf.sub8x8_dct8( dct, h->mb.pic.p_fenc[0] + 8*FENC_STRIDE + 8, zero );
  75 +    x264_add_dctq( dct, &total );
  76 +
  77 +    if( total == 0 ) /* sum is 0, nothing to do */
  78 +        return;
  79 +
  80 +    x264_cpu_restore( h->param.cpu );
  81 +
  82 +    fc = (float)expf(-5e-13 * total * total);
  83 +
  84 +    //printf("AQ: %d %.3f\n", total, fc);
  85 +    /* the function is chosen such that it stays close to 0 in almost all
  86 +      * range of 0..1, and rapidly goes up to 1 near 1.0 */
  87 +    i_qp_adj = (int)(i_qp * h->param.analyse.f_aq_strength / pow(2 - fc, h->param.analyse.f_aq_sensitivity));
  88 +
  89 +    /* don't adjust by more than this amount */
  90 +    i_qp_adj = X264_MIN(i_qp_adj, i_qp/2);
  91 +
  92 +    h->mb.i_qp = a->i_qp = i_qp - i_qp_adj;
  93 +    h->mb.i_chroma_qp = i_chroma_qp_table[x264_clip3( h->mb.i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
  94 +}
  95 +
  96 +/*****************************************************************************
  97   * x264_macroblock_analyse:
  98   *****************************************************************************/
  99  void x264_macroblock_analyse( x264_t *h )
 100 @@ -2039,6 +2105,10 @@\r
 101      /* init analysis */
 102      x264_mb_analyse_init( h, &analysis, x264_ratecontrol_qp( h ) );
 103
 104 +    /* try to do adaptive quantization */
 105 +    if( h->param.analyse.b_aq )
 106 +        x264_adaptive_quant( h, &analysis);
 107 +
 108      /*--------------------------- Do the analysis ---------------------------*/
 109      if( h->sh.i_type == SLICE_TYPE_I )
 110      {
 111 Index: x264.c\r
 112 ===================================================================\r
 113 --- x264.c      (revisione 634)\r
 114 +++ x264.c      (copia locale)\r
 115 @@ -253,6 +253,12 @@\r
 116          "                                  - 2: enabled on all mode decisions\n", defaults->analyse.i_trellis );
 117      H0( "      --no-fast-pskip         Disables early SKIP detection on P-frames\n" );
 118      H0( "      --no-dct-decimate       Disables coefficient thresholding on P-frames\n" );
 119 +    H0( "      --aq-strength <float>   Amount to adjust QP per MB [%.1f]\n"
 120 +        "                                  0.0: no AQ\n"
 121 +        "                                  1.1: strong AQ\n", defaults->analyse.f_aq_strength );
 122 +    H0( "      --aq-sensitivity <float> \"Flatness\" threshold to trigger AQ [%.1f]\n"
 123 +        "                                    5: applies to almost all blocks\n"
 124 +        "                                   22: only flat blocks\n", defaults->analyse.f_aq_sensitivity );
 125      H0( "      --nr <integer>          Noise reduction [%d]\n", defaults->analyse.i_noise_reduction );
 126      H1( "\n" );
 127      H1( "      --deadzone-inter <int>  Set the size of the inter luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[0] );
 128 @@ -416,6 +422,8 @@\r
 129              { "trellis", required_argument, NULL, 't' },
 130              { "no-fast-pskip", no_argument, NULL, 0 },
 131              { "no-dct-decimate", no_argument, NULL, 0 },
 132 +            { "aq-strength", required_argument, NULL, 0 },
 133 +            { "aq-sensitivity", required_argument, NULL, 0 },
 134              { "deadzone-inter", required_argument, NULL, '0' },
 135              { "deadzone-intra", required_argument, NULL, '0' },
 136              { "level",   required_argument, NULL, 0 },
 137 Index: common/pixel.c\r
 138 ===================================================================\r
 139 --- common/pixel.c      (revisione 634)\r
 140 +++ common/pixel.c      (copia locale)\r
 141 @@ -215,7 +215,17 @@\r
 142  PIXEL_SATD_C( x264_pixel_satd_4x8,   4, 8 )
 143  PIXEL_SATD_C( x264_pixel_satd_4x4,   4, 4 )
 144
 145 +static int x264_pixel_count_8x8( uint8_t *pix1, int i_pix1, uint32_t threshold )
 146 +{
 147 +  int i, j, sum = 0;
 148 +
 149 +  for ( i = 0; i < 8; ++i, pix1 += i_pix1 )
 150 +      for ( j = 0; j < 8; ++j )
 151 +          sum += pix1[j] > (uint8_t)threshold;
 152
 153 +  return sum;
 154 +}
 155 +
 156  /****************************************************************************
 157   * pixel_sa8d_WxH: sum of 8x8 Hadamard transformed differences
 158   ****************************************************************************/
 159 @@ -469,6 +479,8 @@\r
 160      pixf->ads[PIXEL_16x16] = pixel_ads4;
 161      pixf->ads[PIXEL_16x8] = pixel_ads2;
 162      pixf->ads[PIXEL_8x8] = pixel_ads1;
 163 +
 164 +    pixf->count_8x8 = x264_pixel_count_8x8;
 165
 166  #ifdef HAVE_MMX
 167      if( cpu&X264_CPU_MMX )
 168 Index: common/pixel.h\r
 169 ===================================================================\r
 170 --- common/pixel.h      (revisione 634)\r
 171 +++ common/pixel.h      (copia locale)\r
 172 @@ -26,6 +26,7 @@\r
 173
 174  typedef int  (*x264_pixel_cmp_t) ( uint8_t *, int, uint8_t *, int );
 175  typedef int  (*x264_pixel_cmp_pde_t) ( uint8_t *, int, uint8_t *, int, int );
 176 +typedef int  (*x264_pixel_count_t) ( uint8_t *, int, uint32_t );
 177  typedef void (*x264_pixel_cmp_x3_t) ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int[3] );
 178  typedef void (*x264_pixel_cmp_x4_t) ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int[4] );
 179
 180 @@ -84,6 +85,7 @@\r
 181      /* multiple parallel calls to sad. */
 182      x264_pixel_cmp_x3_t sad_x3[7];
 183      x264_pixel_cmp_x4_t sad_x4[7];
 184 +    x264_pixel_count_t count_8x8;
 185
 186      /* abs-diff-sum for successive elimination.
 187       * may round width up to a multiple of 8. */
 188 Index: common/common.c\r
 189 ===================================================================\r
 190 --- common/common.c     (revisione 634)\r
 191 +++ common/common.c     (copia locale)\r
 192 @@ -125,6 +125,9 @@\r
 193      param->analyse.i_chroma_qp_offset = 0;
 194      param->analyse.b_fast_pskip = 1;
 195      param->analyse.b_dct_decimate = 1;
 196 +    param->analyse.b_aq = 0;
 197 +    param->analyse.f_aq_strength = 0.0;
 198 +    param->analyse.f_aq_sensitivity = 15;
 199      param->analyse.i_luma_deadzone[0] = 21;
 200      param->analyse.i_luma_deadzone[1] = 11;
 201      param->analyse.b_psnr = 1;
 202 @@ -457,6 +460,13 @@\r
 203          p->analyse.b_fast_pskip = atobool(value);
 204      OPT("dct-decimate")
 205          p->analyse.b_dct_decimate = atobool(value);
 206 +    OPT("aq-strength")
 207 +    {
 208 +        p->analyse.f_aq_strength = atof(value);
 209 +        p->analyse.b_aq = (p->analyse.f_aq_strength > 0.0);
 210 +    }
 211 +    OPT("aq-sensitivity")
 212 +        p->analyse.f_aq_sensitivity = atof(value);
 213      OPT("deadzone-inter")
 214          p->analyse.i_luma_deadzone[0] = atoi(value);
 215      OPT("deadzone-intra")
 216 @@ -936,6 +946,9 @@\r
 217              s += sprintf( s, " zones" );
 218      }
 219
 220 +    if( p->analyse.b_aq )
 221 +        s += sprintf( s, " aq=1:%.1f:%.1f", p->analyse.f_aq_strength, p->analyse.f_aq_sensitivity );
 222 +
 223      return buf;
 224  }
 225
 226 Index: x264.h\r
 227 ===================================================================\r
 228 --- x264.h      (revisione 634)\r
 229 +++ x264.h      (copia locale)\r
 230 @@ -223,6 +223,9 @@\r
 231          int          i_trellis;  /* trellis RD quantization */
 232          int          b_fast_pskip; /* early SKIP detection on P-frames */
 233          int          b_dct_decimate; /* transform coefficient thresholding on P-frames */
 234 +        int          b_aq; /* psy adaptive QP */
 235 +        float        f_aq_strength;
 236 +        float        f_aq_sensitivity;
 237          int          i_noise_reduction; /* adaptive pseudo-deadzone */
 238
 239          /* the deadzone size that will be used in luma quantization */