libhb/encx264.c

   1 /* $Id: encx264.c,v 1.21 2005/11/04 13:09:41 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 #include <stdarg.h>
   8
   9 #include "hb.h"
  10
  11 #include "x264.h"
  12
  13 int  encx264Init( hb_work_object_t *, hb_job_t * );
  14 int  encx264Work( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  15 void encx264Close( hb_work_object_t * );
  16
  17 hb_work_object_t hb_encx264 =
  18 {
  19     WORK_ENCX264,
  20     "H.264/AVC encoder (libx264)",
  21     encx264Init,
  22     encx264Work,
  23     encx264Close
  24 };
  25
  26 #define DTS_BUFFER_SIZE 32
  27
  28 /*
  29  * The frame info struct remembers information about each frame across calls
  30  * to x264_encoder_encode. Since frames are uniquely identified by their
  31  * timestamp, we use some bits of the timestamp as an index. The LSB is
  32  * chosen so that two successive frames will have different values in the
  33  * bits over any plausible range of frame rates. (Starting with bit 8 allows
  34  * any frame rate slower than 352fps.) The MSB determines the size of the array.
  35  * It is chosen so that two frames can't use the same slot during the
  36  * encoder's max frame delay (set by the standard as 16 frames) and so
  37  * that, up to some minimum frame rate, frames are guaranteed to map to
  38  * different slots. (An MSB of 17 which is 2^(17-8+1) = 1024 slots guarantees
  39  * no collisions down to a rate of .7 fps).
  40  */
  41 #define FRAME_INFO_MAX2 (8)     // 2^8 = 256; 90000/256 = 352 frames/sec
  42 #define FRAME_INFO_MIN2 (17)    // 2^17 = 128K; 90000/131072 = 1.4 frames/sec
  43 #define FRAME_INFO_SIZE (1 << (FRAME_INFO_MIN2 - FRAME_INFO_MAX2 + 1))
  44 #define FRAME_INFO_MASK (FRAME_INFO_SIZE - 1)
  45
  46 struct hb_work_private_s
  47 {
  48     hb_job_t       * job;
  49     x264_t         * x264;
  50     x264_picture_t   pic_in;
  51     uint8_t         *x264_allocated_pic;
  52
  53     uint32_t       frames_in;
  54     uint32_t       frames_out;
  55     uint32_t       frames_split; // number of frames we had to split
  56     int            chap_mark;   // saved chap mark when we're propagating it
  57     int64_t        last_stop;   // Debugging - stop time of previous input frame
  58     int64_t        next_chap;
  59
  60     struct {
  61         int64_t duration;
  62     } frame_info[FRAME_INFO_SIZE];
  63
  64     char             filename[1024];
  65 };
  66
  67 /***********************************************************************
  68  * hb_work_encx264_init
  69  ***********************************************************************
  70  *
  71  **********************************************************************/
  72 int encx264Init( hb_work_object_t * w, hb_job_t * job )
  73 {
  74     x264_param_t       param;
  75     x264_nal_t       * nal;
  76     int                nal_count;
  77
  78     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
  79     w->private_data = pv;
  80
  81     pv->job = job;
  82
  83     memset( pv->filename, 0, 1024 );
  84     hb_get_tempory_filename( job->h, pv->filename, "x264.log" );
  85
  86     x264_param_default( &param );
  87
  88     /* Enable metrics */
  89     param.analyse.b_psnr = 1;
  90     param.analyse.b_ssim = 1;
  91
  92     param.i_threads    = ( hb_get_cpu_count() * 3 / 2 );
  93     param.i_width      = job->width;
  94     param.i_height     = job->height;
  95     param.i_fps_num    = job->vrate;
  96     param.i_fps_den    = job->vrate_base;
  97     if ( job->cfr == 1 )
  98     {
  99         param.i_timebase_num   = 0;
 100         param.i_timebase_den   = 0;
 101         param.b_vfr_input = 0;
 102     }
 103     else
 104     {
 105         param.i_timebase_num   = 1;
 106         param.i_timebase_den   = 90000;
 107     }
 108
 109     /* Disable annexb. Inserts size into nal header instead of start code */
 110     param.b_annexb     = 0;
 111
 112     /* Set min:max key intervals ratio to 1:10 of fps.
 113      * This section is skipped if fps=25 (default).
 114      */
 115     if (job->vrate_base != 1080000)
 116     {
 117         if (job->pass == 2 && !job->cfr )
 118         {
 119             /* Even though the framerate might be different due to VFR,
 120                we still want the same keyframe intervals as the 1st pass,
 121                so the 1st pass stats won't conflict on frame decisions.    */
 122             hb_interjob_t * interjob = hb_interjob_get( job->h );
 123             param.i_keyint_max = 10 * (int)( (double)interjob->vrate / (double)interjob->vrate_base + 0.5 );
 124         }
 125         else
 126         {
 127             /* adjust +0.5 for when fps has remainder to bump
 128                { 23.976, 29.976, 59.94 } to { 24, 30, 60 } */
 129             param.i_keyint_max = 10 * (int)( (double)job->vrate / (double)job->vrate_base + 0.5 );
 130         }
 131     }
 132
 133     param.i_log_level  = X264_LOG_INFO;
 134
 135     /*
 136         This section passes the string x264opts to libx264 for parsing into
 137         parameter names and values.
 138
 139         The string is set up like this:
 140         option1=value1:option2=value 2
 141
 142         So, you have to iterate through based on the colons, and then put
 143         the left side of the equals sign in "name" and the right side into
 144         "value." Then you hand those strings off to x264 for interpretation.
 145
 146         This is all based on the universal x264 option handling Loren
 147         Merritt implemented in the Mplayer/Mencoder project.
 148      */
 149
 150     if( job->x264opts != NULL && *job->x264opts != '\0' )
 151     {
 152         char *x264opts, *x264opts_start;
 153
 154         x264opts = x264opts_start = strdup(job->x264opts);
 155
 156         while( x264opts_start && *x264opts )
 157         {
 158             char *name = x264opts;
 159             char *value;
 160             int ret;
 161
 162             x264opts += strcspn( x264opts, ":" );
 163             if( *x264opts )
 164             {
 165                 *x264opts = 0;
 166                 x264opts++;
 167             }
 168
 169             value = strchr( name, '=' );
 170             if( value )
 171             {
 172                 *value = 0;
 173                 value++;
 174             }
 175
 176             /* Here's where the strings are passed to libx264 for parsing. */
 177             ret = x264_param_parse( &param, name, value );
 178
 179             /*  Let x264 sanity check the options for us*/
 180             if( ret == X264_PARAM_BAD_NAME )
 181                 hb_log( "x264 options: Unknown suboption %s", name );
 182             if( ret == X264_PARAM_BAD_VALUE )
 183                 hb_log( "x264 options: Bad argument %s=%s", name, value ? value : "(null)" );
 184         }
 185         free(x264opts_start);
 186     }
 187
 188     /* B-frames are on by default.*/
 189     job->areBframes = 1;
 190
 191     if( param.i_bframe && param.i_bframe_pyramid )
 192     {
 193         /* Note b-pyramid here, so the initial delay can be doubled */
 194         job->areBframes = 2;
 195     }
 196     else if( !param.i_bframe )
 197     {
 198         /*
 199          When B-frames are enabled, the max frame count increments
 200          by 1 (regardless of the number of B-frames). If you don't
 201          change the duration of the video track when you mux, libmp4
 202          barfs.  So, check if the x264opts aren't using B-frames, and
 203          when they aren't, set the boolean job->areBframes as false.
 204          */
 205         job->areBframes = 0;
 206     }
 207
 208     if( param.i_keyint_min != X264_KEYINT_MIN_AUTO || param.i_keyint_max != 250 )
 209     {
 210         int min_auto;
 211
 212         if ( param.i_fps_num / param.i_fps_den < param.i_keyint_max / 10 )
 213             min_auto = param.i_fps_num / param.i_fps_den;
 214         else
 215             min_auto = param.i_keyint_max / 10;
 216
 217         char min[40], max[40];
 218         param.i_keyint_min == X264_KEYINT_MIN_AUTO ?
 219             snprintf( min, 40, "auto (%d)", min_auto ) :
 220             snprintf( min, 40, "%d", param.i_keyint_min );
 221
 222         param.i_keyint_max == X264_KEYINT_MAX_INFINITE ?
 223             snprintf( max, 40, "infinite" ) :
 224             snprintf( max, 40, "%d", param.i_keyint_max );
 225
 226         hb_log( "encx264: min-keyint: %s, keyint: %s", min, max );
 227     }
 228
 229     /* set up the VUI color model & gamma to match what the COLR atom
 230      * set in muxmp4.c says. See libhb/muxmp4.c for notes. */
 231     if( job->color_matrix == 1 )
 232     {
 233         // ITU BT.601 DVD or SD TV content
 234         param.vui.i_colorprim = 6;
 235         param.vui.i_transfer = 1;
 236         param.vui.i_colmatrix = 6;
 237     }
 238     else if( job->color_matrix == 2 )
 239     {
 240         // ITU BT.709 HD content
 241         param.vui.i_colorprim = 1;
 242         param.vui.i_transfer = 1;
 243         param.vui.i_colmatrix = 1;
 244     }
 245     else if ( job->title->width >= 1280 || job->title->height >= 720 )
 246     {
 247         // we guess that 720p or above is ITU BT.709 HD content
 248         param.vui.i_colorprim = 1;
 249         param.vui.i_transfer = 1;
 250         param.vui.i_colmatrix = 1;
 251     }
 252     else
 253     {
 254         // ITU BT.601 DVD or SD TV content
 255         param.vui.i_colorprim = 6;
 256         param.vui.i_transfer = 1;
 257         param.vui.i_colmatrix = 6;
 258     }
 259
 260     if( job->anamorphic.mode )
 261     {
 262         param.vui.i_sar_width  = job->anamorphic.par_width;
 263         param.vui.i_sar_height = job->anamorphic.par_height;
 264
 265         hb_log( "encx264: encoding with stored aspect %d/%d",
 266                 param.vui.i_sar_width, param.vui.i_sar_height );
 267     }
 268
 269
 270     if( job->vquality > 0.0 && job->vquality < 1.0 )
 271     {
 272         /*Constant RF*/
 273         param.rc.i_rc_method = X264_RC_CRF;
 274         param.rc.f_rf_constant = 51 - job->vquality * 51;
 275         hb_log( "encx264: Encoding at constant RF %f", param.rc.f_rf_constant );
 276     }
 277     else if( job->vquality == 0 || job->vquality >= 1.0 )
 278     {
 279         /* Use the vquality as a raw RF or QP
 280           instead of treating it like a percentage. */
 281         /*Constant RF*/
 282         param.rc.i_rc_method = X264_RC_CRF;
 283         param.rc.f_rf_constant = job->vquality;
 284         hb_log( "encx264: Encoding at constant RF %f", param.rc.f_rf_constant );
 285     }
 286     else
 287     {
 288         /* Rate control */
 289         param.rc.i_rc_method = X264_RC_ABR;
 290         param.rc.i_bitrate = job->vbitrate;
 291         switch( job->pass )
 292         {
 293             case 1:
 294                 param.rc.b_stat_write  = 1;
 295                 param.rc.psz_stat_out = pv->filename;
 296                 break;
 297             case 2:
 298                 param.rc.b_stat_read = 1;
 299                 param.rc.psz_stat_in = pv->filename;
 300                 break;
 301         }
 302     }
 303
 304     hb_deep_log( 2, "encx264: opening libx264 (pass %d)", job->pass );
 305     pv->x264 = x264_encoder_open( &param );
 306
 307     x264_encoder_headers( pv->x264, &nal, &nal_count );
 308
 309     /* Sequence Parameter Set */
 310     memcpy(w->config->h264.sps, nal[0].p_payload + 4, nal[0].i_payload - 4);
 311     w->config->h264.sps_length = nal[0].i_payload - 4;
 312
 313     /* Picture Parameter Set */
 314     memcpy(w->config->h264.pps, nal[1].p_payload + 4, nal[1].i_payload - 4);
 315     w->config->h264.pps_length = nal[1].i_payload - 4;
 316
 317     x264_picture_alloc( &pv->pic_in, X264_CSP_I420,
 318                         job->width, job->height );
 319
 320     pv->pic_in.img.i_stride[2] = pv->pic_in.img.i_stride[1] = ( ( job->width + 1 ) >> 1 );
 321     pv->x264_allocated_pic = pv->pic_in.img.plane[0];
 322
 323     return 0;
 324 }
 325
 326 void encx264Close( hb_work_object_t * w )
 327 {
 328     hb_work_private_t * pv = w->private_data;
 329
 330     if ( pv->frames_split )
 331     {
 332         hb_log( "encx264: %u frames had to be split (%u in, %u out)",
 333                 pv->frames_split, pv->frames_in, pv->frames_out );
 334     }
 335     /*
 336      * Patch the x264 allocated data back in so that x264 can free it
 337      * we have been using our own buffers during the encode to avoid copying.
 338      */
 339     pv->pic_in.img.plane[0] = pv->x264_allocated_pic;
 340     x264_picture_clean( &pv->pic_in );
 341     x264_encoder_close( pv->x264 );
 342     free( pv );
 343     w->private_data = NULL;
 344
 345     /* TODO */
 346 }
 347
 348 /*
 349  * see comments in definition of 'frame_info' in pv struct for description
 350  * of what these routines are doing.
 351  */
 352 static void save_frame_info( hb_work_private_t * pv, hb_buffer_t * in )
 353 {
 354     int i = (in->start >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
 355     pv->frame_info[i].duration = in->stop - in->start;
 356 }
 357
 358 static int64_t get_frame_duration( hb_work_private_t * pv, int64_t pts )
 359 {
 360     int i = (pts >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
 361     return pv->frame_info[i].duration;
 362 }
 363
 364 static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
 365                                 int i_nal, x264_nal_t *nal )
 366 {
 367     hb_buffer_t *buf = NULL;
 368     hb_work_private_t *pv = w->private_data;
 369     hb_job_t *job = pv->job;
 370
 371     /* Should be way too large */
 372     buf = hb_video_buffer_init( job->width, job->height );
 373     buf->size = 0;
 374     buf->frametype = 0;
 375
 376     // use the pts to get the original frame's duration.
 377     int64_t duration  = get_frame_duration( pv, pic_out->i_pts );
 378     buf->start = pic_out->i_pts;
 379     buf->stop  = pic_out->i_pts + duration;
 380     buf->renderOffset = pic_out->i_dts;
 381     if ( !w->config->h264.init_delay && pic_out->i_dts < 0 )
 382     {
 383         w->config->h264.init_delay = -pic_out->i_dts;
 384     }
 385
 386     /* Encode all the NALs we were given into buf.
 387        NOTE: This code assumes one video frame per NAL (but there can
 388              be other stuff like SPS and/or PPS). If there are multiple
 389              frames we only get the duration of the first which will
 390              eventually screw up the muxer & decoder. */
 391     int i;
 392     for( i = 0; i < i_nal; i++ )
 393     {
 394         int size = nal[i].i_payload;
 395         memcpy(buf->data + buf->size, nal[i].p_payload, size);
 396         if( size < 1 )
 397         {
 398             continue;
 399         }
 400
 401         /* H.264 in .mp4 or .mkv */
 402         switch( nal[i].i_type )
 403         {
 404             /* Sequence Parameter Set & Program Parameter Set go in the
 405              * mp4 header so skip them here
 406              */
 407             case NAL_SPS:
 408             case NAL_PPS:
 409                 continue;
 410
 411             case NAL_SLICE:
 412             case NAL_SLICE_IDR:
 413             case NAL_SEI:
 414             default:
 415                 break;
 416         }
 417
 418         /* Decide what type of frame we have. */
 419         switch( pic_out->i_type )
 420         {
 421             case X264_TYPE_IDR:
 422                 buf->frametype = HB_FRAME_IDR;
 423                 /* if we have a chapter marker pending and this
 424                    frame's presentation time stamp is at or after
 425                    the marker's time stamp, use this as the
 426                    chapter start. */
 427                 if( pv->next_chap != 0 && pv->next_chap <= pic_out->i_pts )
 428                 {
 429                     pv->next_chap = 0;
 430                     buf->new_chap = pv->chap_mark;
 431                 }
 432                 break;
 433
 434             case X264_TYPE_I:
 435                 buf->frametype = HB_FRAME_I;
 436                 break;
 437
 438             case X264_TYPE_P:
 439                 buf->frametype = HB_FRAME_P;
 440                 break;
 441
 442             case X264_TYPE_B:
 443                 buf->frametype = HB_FRAME_B;
 444                 break;
 445
 446         /*  This is for b-pyramid, which has reference b-frames
 447             However, it doesn't seem to ever be used... */
 448             case X264_TYPE_BREF:
 449                 buf->frametype = HB_FRAME_BREF;
 450                 break;
 451
 452             // If it isn't the above, what type of frame is it??
 453             default:
 454                 buf->frametype = 0;
 455                 break;
 456         }
 457
 458         /* Since libx264 doesn't tell us when b-frames are
 459            themselves reference frames, figure it out on our own. */
 460         if( (buf->frametype == HB_FRAME_B) &&
 461             (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) )
 462             buf->frametype = HB_FRAME_BREF;
 463
 464         /* Expose disposable bit to muxer. */
 465         if( nal[i].i_ref_idc == NAL_PRIORITY_DISPOSABLE )
 466             buf->flags &= ~HB_FRAME_REF;
 467         else
 468             buf->flags |= HB_FRAME_REF;
 469
 470         buf->size += size;
 471     }
 472     // make sure we found at least one video frame
 473     if ( buf->size <= 0 )
 474     {
 475         // no video - discard the buf
 476         hb_buffer_close( &buf );
 477     }
 478     return buf;
 479 }
 480
 481 static hb_buffer_t *x264_encode( hb_work_object_t *w, hb_buffer_t *in )
 482 {
 483     hb_work_private_t *pv = w->private_data;
 484     hb_job_t *job = pv->job;
 485
 486     /* Point x264 at our current buffers Y(UV) data.  */
 487     pv->pic_in.img.plane[0] = in->data;
 488
 489     int uvsize = ( (job->width + 1) >> 1 ) * ( (job->height + 1) >> 1 );
 490     if( job->grayscale )
 491     {
 492         /* XXX x264 has currently no option for grayscale encoding */
 493         memset( pv->pic_in.img.plane[1], 0x80, uvsize );
 494         memset( pv->pic_in.img.plane[2], 0x80, uvsize );
 495     }
 496     else
 497     {
 498         /* Point x264 at our buffers (Y)UV data */
 499         pv->pic_in.img.plane[1] = in->data + job->width * job->height;
 500         pv->pic_in.img.plane[2] = pv->pic_in.img.plane[1] + uvsize;
 501     }
 502     if( in->new_chap && job->chapter_markers )
 503     {
 504         /* chapters have to start with an IDR frame so request that this
 505            frame be coded as IDR. Since there may be up to 16 frames
 506            currently buffered in the encoder remember the timestamp so
 507            when this frame finally pops out of the encoder we'll mark
 508            its buffer as the start of a chapter. */
 509         pv->pic_in.i_type = X264_TYPE_IDR;
 510         if( pv->next_chap == 0 )
 511         {
 512             pv->next_chap = in->start;
 513             pv->chap_mark = in->new_chap;
 514         }
 515         /* don't let 'work_loop' put a chapter mark on the wrong buffer */
 516         in->new_chap = 0;
 517     }
 518     else
 519     {
 520         pv->pic_in.i_type = X264_TYPE_AUTO;
 521     }
 522     pv->pic_in.i_qpplus1 = 0;
 523
 524     /* XXX this is temporary debugging code to check that the upstream
 525      * modules (render & sync) have generated a continuous, self-consistent
 526      * frame stream with the current frame's start time equal to the
 527      * previous frame's stop time.
 528      */
 529     if( pv->last_stop != in->start )
 530     {
 531         hb_log("encx264 input continuity err: last stop %"PRId64"  start %"PRId64,
 532                 pv->last_stop, in->start);
 533     }
 534     pv->last_stop = in->stop;
 535
 536     // Remember info about this frame that we need to pass across
 537     // the x264_encoder_encode call (since it reorders frames).
 538     save_frame_info( pv, in );
 539
 540     /* Feed the input PTS to x264 so it can figure out proper output PTS */
 541     pv->pic_in.i_pts = in->start;
 542
 543     x264_picture_t pic_out;
 544     int i_nal;
 545     x264_nal_t *nal;
 546
 547     x264_encoder_encode( pv->x264, &nal, &i_nal, &pv->pic_in, &pic_out );
 548     if ( i_nal > 0 )
 549     {
 550         return nal_encode( w, &pic_out, i_nal, nal );
 551     }
 552     return NULL;
 553 }
 554
 555 int encx264Work( hb_work_object_t * w, hb_buffer_t ** buf_in,
 556                   hb_buffer_t ** buf_out )
 557 {
 558     hb_work_private_t *pv = w->private_data;
 559     hb_buffer_t *in = *buf_in;
 560
 561     *buf_out = NULL;
 562
 563     if( in->size <= 0 )
 564     {
 565         // EOF on input. Flush any frames still in the decoder then
 566         // send the eof downstream to tell the muxer we're done.
 567         x264_picture_t pic_out;
 568         int i_nal;
 569         x264_nal_t *nal;
 570         hb_buffer_t *last_buf = NULL;
 571
 572         while ( x264_encoder_delayed_frames( pv->x264 ) )
 573         {
 574             x264_encoder_encode( pv->x264, &nal, &i_nal, NULL, &pic_out );
 575             if ( i_nal == 0 )
 576                 continue;
 577             if ( i_nal < 0 )
 578                 break;
 579
 580             hb_buffer_t *buf = nal_encode( w, &pic_out, i_nal, nal );
 581             if ( buf )
 582             {
 583                 ++pv->frames_out;
 584                 if ( last_buf == NULL )
 585                     *buf_out = buf;
 586                 else
 587                     last_buf->next = buf;
 588                 last_buf = buf;
 589             }
 590         }
 591         // Flushed everything - add the eof to the end of the chain.
 592         if ( last_buf == NULL )
 593             *buf_out = in;
 594         else
 595             last_buf->next = in;
 596
 597         *buf_in = NULL;
 598         return HB_WORK_DONE;
 599     }
 600
 601     // Not EOF - encode the packet & wrap it in a NAL
 602     ++pv->frames_in;
 603     ++pv->frames_out;
 604     *buf_out = x264_encode( w, in );
 605     return HB_WORK_OK;
 606 }