libhb/encx264.c

   1 /* $Id: encx264.c,v 1.21 2005/11/04 13:09:41 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 #include <stdarg.h>
   8
   9 #include "hb.h"
  10
  11 #include "x264.h"
  12
  13 int  encx264Init( hb_work_object_t *, hb_job_t * );
  14 int  encx264Work( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  15 void encx264Close( hb_work_object_t * );
  16
  17 hb_work_object_t hb_encx264 =
  18 {
  19     WORK_ENCX264,
  20     "H.264/AVC encoder (libx264)",
  21     encx264Init,
  22     encx264Work,
  23     encx264Close
  24 };
  25
  26 #define DTS_BUFFER_SIZE 32
  27
  28 /*
  29  * The frame info struct remembers information about each frame across calls
  30  * to x264_encoder_encode. Since frames are uniquely identified by their
  31  * timestamp, we use some bits of the timestamp as an index. The LSB is
  32  * chosen so that two successive frames will have different values in the
  33  * bits over any plausible range of frame rates. (Starting with bit 8 allows
  34  * any frame rate slower than 352fps.) The MSB determines the size of the array.
  35  * It is chosen so that two frames can't use the same slot during the
  36  * encoder's max frame delay (set by the standard as 16 frames) and so
  37  * that, up to some minimum frame rate, frames are guaranteed to map to
  38  * different slots. (An MSB of 17 which is 2^(17-8+1) = 1024 slots guarantees
  39  * no collisions down to a rate of .7 fps).
  40  */
  41 #define FRAME_INFO_MAX2 (8)     // 2^8 = 256; 90000/256 = 352 frames/sec
  42 #define FRAME_INFO_MIN2 (17)    // 2^17 = 128K; 90000/131072 = 1.4 frames/sec
  43 #define FRAME_INFO_SIZE (1 << (FRAME_INFO_MIN2 - FRAME_INFO_MAX2 + 1))
  44 #define FRAME_INFO_MASK (FRAME_INFO_SIZE - 1)
  45
  46 struct hb_work_private_s
  47 {
  48     hb_job_t       * job;
  49     x264_t         * x264;
  50     x264_picture_t   pic_in;
  51     uint8_t         *x264_allocated_pic;
  52
  53     uint32_t       frames_in;
  54     uint32_t       frames_out;
  55     uint32_t       frames_split; // number of frames we had to split
  56     int            chap_mark;   // saved chap mark when we're propagating it
  57     int64_t        last_stop;   // Debugging - stop time of previous input frame
  58     int64_t        next_chap;
  59
  60     struct {
  61         int64_t duration;
  62     } frame_info[FRAME_INFO_SIZE];
  63
  64     char             filename[1024];
  65 };
  66
  67 /***********************************************************************
  68  * hb_work_encx264_init
  69  ***********************************************************************
  70  *
  71  **********************************************************************/
  72 int encx264Init( hb_work_object_t * w, hb_job_t * job )
  73 {
  74     x264_param_t       param;
  75     x264_nal_t       * nal;
  76     int                nal_count;
  77
  78     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
  79     w->private_data = pv;
  80
  81     pv->job = job;
  82
  83     memset( pv->filename, 0, 1024 );
  84     hb_get_tempory_filename( job->h, pv->filename, "x264.log" );
  85
  86     x264_param_default( &param );
  87
  88     /* Default weightp to off for baseline,
  89        overridable through x264 option strings. */
  90     if( job->x264opts != NULL && *job->x264opts != '\0' )
  91     {
  92         char *x264opts, *x264opts_start;
  93
  94         x264opts = x264opts_start = strdup(job->x264opts);
  95
  96         while( x264opts_start && *x264opts )
  97         {
  98             char *name = x264opts;
  99             char *value;
 100
 101             x264opts += strcspn( x264opts, ":" );
 102             if( *x264opts )
 103             {
 104                 *x264opts = 0;
 105                 x264opts++;
 106             }
 107
 108             value = strchr( name, '=' );
 109             if( value )
 110             {
 111                 *value = 0;
 112                 value++;
 113             }
 114
 115             if( !( strcmp( name, "bframes" ) ) )
 116             {
 117                 if( atoi( value ) == 0 )
 118                 {
 119                     param.analyse.i_weighted_pred = X264_WEIGHTP_NONE;
 120                     hb_log("encx264: no bframes, disabling weight-p unless told otherwise");
 121                 }
 122             }
 123         }
 124     }
 125
 126     /* Temporary hack to use old b-pyramid default */
 127     param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
 128
 129     /* Enable metrics */
 130     param.analyse.b_psnr = 1;
 131     param.analyse.b_ssim = 1;
 132
 133     param.i_threads    = ( hb_get_cpu_count() * 3 / 2 );
 134     param.i_width      = job->width;
 135     param.i_height     = job->height;
 136     param.i_fps_num    = job->vrate;
 137     param.i_fps_den    = job->vrate_base;
 138     if ( job->cfr == 1 )
 139     {
 140         param.i_timebase_num   = 0;
 141         param.i_timebase_den   = 0;
 142         param.b_vfr_input = 0;
 143     }
 144     else
 145     {
 146         param.i_timebase_num   = 1;
 147         param.i_timebase_den   = 90000;
 148     }
 149
 150     /* Disable annexb. Inserts size into nal header instead of start code */
 151     param.b_annexb     = 0;
 152
 153     /* Set min:max key intervals ratio to 1:10 of fps.
 154      * This section is skipped if fps=25 (default).
 155      */
 156     if (job->vrate_base != 1080000)
 157     {
 158         if (job->pass == 2 && !job->cfr )
 159         {
 160             /* Even though the framerate might be different due to VFR,
 161                we still want the same keyframe intervals as the 1st pass,
 162                so the 1st pass stats won't conflict on frame decisions.    */
 163             hb_interjob_t * interjob = hb_interjob_get( job->h );
 164             param.i_keyint_max = ( ( 10 * (double)interjob->vrate / (double)interjob->vrate_base ) + 0.5 );
 165         }
 166         else
 167         {
 168             /* adjust +0.5 for when fps has remainder to bump
 169                { 23.976, 29.976, 59.94 } to { 24, 30, 60 } */
 170             param.i_keyint_max = ( ( 10 * (double)job->vrate / (double)job->vrate_base ) + 0.5 );
 171         }
 172     }
 173
 174     param.i_log_level  = X264_LOG_INFO;
 175     if( job->h264_level )
 176     {
 177         param.b_cabac     = 0;
 178         param.i_level_idc = job->h264_level;
 179         hb_log( "encx264: encoding at level %i",
 180                 param.i_level_idc );
 181     }
 182
 183     /*
 184         This section passes the string x264opts to libx264 for parsing into
 185         parameter names and values.
 186
 187         The string is set up like this:
 188         option1=value1:option2=value 2
 189
 190         So, you have to iterate through based on the colons, and then put
 191         the left side of the equals sign in "name" and the right side into
 192         "value." Then you hand those strings off to x264 for interpretation.
 193
 194         This is all based on the universal x264 option handling Loren
 195         Merritt implemented in the Mplayer/Mencoder project.
 196      */
 197
 198     if( job->x264opts != NULL && *job->x264opts != '\0' )
 199     {
 200         char *x264opts, *x264opts_start;
 201
 202         x264opts = x264opts_start = strdup(job->x264opts);
 203
 204         while( x264opts_start && *x264opts )
 205         {
 206             char *name = x264opts;
 207             char *value;
 208             int ret;
 209
 210             x264opts += strcspn( x264opts, ":" );
 211             if( *x264opts )
 212             {
 213                 *x264opts = 0;
 214                 x264opts++;
 215             }
 216
 217             value = strchr( name, '=' );
 218             if( value )
 219             {
 220                 *value = 0;
 221                 value++;
 222             }
 223
 224             if( !( strcmp( name, "b-pyramid" ) ) )
 225             {
 226                 if( value == NULL || !strcmp( value, "1" ) )
 227                 {
 228                     value = "normal";
 229                 }
 230                 else if( !strcmp( value, "0" ) )
 231                 {
 232                     value = "none";
 233                 }
 234             }
 235
 236             /* Here's where the strings are passed to libx264 for parsing. */
 237             ret = x264_param_parse( &param, name, value );
 238
 239             /*  Let x264 sanity check the options for us*/
 240             if( ret == X264_PARAM_BAD_NAME )
 241                 hb_log( "x264 options: Unknown suboption %s", name );
 242             if( ret == X264_PARAM_BAD_VALUE )
 243                 hb_log( "x264 options: Bad argument %s=%s", name, value ? value : "(null)" );
 244         }
 245         free(x264opts_start);
 246     }
 247
 248     /* B-frames are on by default.*/
 249     job->areBframes = 1;
 250
 251     if( param.i_bframe && param.i_bframe_pyramid )
 252     {
 253         /* Note b-pyramid here, so the initial delay can be doubled */
 254         job->areBframes = 2;
 255     }
 256     else if( !param.i_bframe )
 257     {
 258         /*
 259          When B-frames are enabled, the max frame count increments
 260          by 1 (regardless of the number of B-frames). If you don't
 261          change the duration of the video track when you mux, libmp4
 262          barfs.  So, check if the x264opts aren't using B-frames, and
 263          when they aren't, set the boolean job->areBframes as false.
 264          */
 265         job->areBframes = 0;
 266     }
 267
 268     if( param.i_keyint_min != X264_KEYINT_MIN_AUTO || param.i_keyint_max != 250 )
 269         hb_log("encx264: min-keyint: %i, keyint: %i", param.i_keyint_min == X264_KEYINT_MIN_AUTO ? param.i_keyint_max / 10 : param.i_keyint_min,
 270                                                       param.i_keyint_max);
 271
 272     /* set up the VUI color model & gamma to match what the COLR atom
 273      * set in muxmp4.c says. See libhb/muxmp4.c for notes. */
 274     if( job->color_matrix == 1 )
 275     {
 276         // ITU BT.601 DVD or SD TV content
 277         param.vui.i_colorprim = 6;
 278         param.vui.i_transfer = 1;
 279         param.vui.i_colmatrix = 6;
 280     }
 281     else if( job->color_matrix == 2 )
 282     {
 283         // ITU BT.709 HD content
 284         param.vui.i_colorprim = 1;
 285         param.vui.i_transfer = 1;
 286         param.vui.i_colmatrix = 1;
 287     }
 288     else if ( job->title->width >= 1280 || job->title->height >= 720 )
 289     {
 290         // we guess that 720p or above is ITU BT.709 HD content
 291         param.vui.i_colorprim = 1;
 292         param.vui.i_transfer = 1;
 293         param.vui.i_colmatrix = 1;
 294     }
 295     else
 296     {
 297         // ITU BT.601 DVD or SD TV content
 298         param.vui.i_colorprim = 6;
 299         param.vui.i_transfer = 1;
 300         param.vui.i_colmatrix = 6;
 301     }
 302
 303     if( job->anamorphic.mode )
 304     {
 305         param.vui.i_sar_width  = job->anamorphic.par_width;
 306         param.vui.i_sar_height = job->anamorphic.par_height;
 307
 308         hb_log( "encx264: encoding with stored aspect %d/%d",
 309                 param.vui.i_sar_width, param.vui.i_sar_height );
 310     }
 311
 312
 313     if( job->vquality > 0.0 && job->vquality < 1.0 )
 314     {
 315         /*Constant RF*/
 316         param.rc.i_rc_method = X264_RC_CRF;
 317         param.rc.f_rf_constant = 51 - job->vquality * 51;
 318         hb_log( "encx264: Encoding at constant RF %f", param.rc.f_rf_constant );
 319     }
 320     else if( job->vquality == 0 || job->vquality >= 1.0 )
 321     {
 322         /* Use the vquality as a raw RF or QP
 323           instead of treating it like a percentage. */
 324         /*Constant RF*/
 325         param.rc.i_rc_method = X264_RC_CRF;
 326         param.rc.f_rf_constant = job->vquality;
 327         hb_log( "encx264: Encoding at constant RF %f", param.rc.f_rf_constant );
 328     }
 329     else
 330     {
 331         /* Rate control */
 332         param.rc.i_rc_method = X264_RC_ABR;
 333         param.rc.i_bitrate = job->vbitrate;
 334         switch( job->pass )
 335         {
 336             case 1:
 337                 param.rc.b_stat_write  = 1;
 338                 param.rc.psz_stat_out = pv->filename;
 339                 break;
 340             case 2:
 341                 param.rc.b_stat_read = 1;
 342                 param.rc.psz_stat_in = pv->filename;
 343                 break;
 344         }
 345     }
 346
 347     hb_deep_log( 2, "encx264: opening libx264 (pass %d)", job->pass );
 348     pv->x264 = x264_encoder_open( &param );
 349
 350     x264_encoder_headers( pv->x264, &nal, &nal_count );
 351
 352     /* Sequence Parameter Set */
 353     memcpy(w->config->h264.sps, nal[0].p_payload + 4, nal[0].i_payload - 4);
 354     w->config->h264.sps_length = nal[0].i_payload - 4;
 355
 356     /* Picture Parameter Set */
 357     memcpy(w->config->h264.pps, nal[1].p_payload + 4, nal[1].i_payload - 4);
 358     w->config->h264.pps_length = nal[1].i_payload - 4;
 359
 360     x264_picture_alloc( &pv->pic_in, X264_CSP_I420,
 361                         job->width, job->height );
 362
 363     pv->pic_in.img.i_stride[2] = pv->pic_in.img.i_stride[1] = ( ( job->width + 1 ) >> 1 );
 364     pv->x264_allocated_pic = pv->pic_in.img.plane[0];
 365
 366     return 0;
 367 }
 368
 369 void encx264Close( hb_work_object_t * w )
 370 {
 371     hb_work_private_t * pv = w->private_data;
 372
 373     if ( pv->frames_split )
 374     {
 375         hb_log( "encx264: %u frames had to be split (%u in, %u out)",
 376                 pv->frames_split, pv->frames_in, pv->frames_out );
 377     }
 378     /*
 379      * Patch the x264 allocated data back in so that x264 can free it
 380      * we have been using our own buffers during the encode to avoid copying.
 381      */
 382     pv->pic_in.img.plane[0] = pv->x264_allocated_pic;
 383     x264_picture_clean( &pv->pic_in );
 384     x264_encoder_close( pv->x264 );
 385     free( pv );
 386     w->private_data = NULL;
 387
 388     /* TODO */
 389 }
 390
 391 /*
 392  * see comments in definition of 'frame_info' in pv struct for description
 393  * of what these routines are doing.
 394  */
 395 static void save_frame_info( hb_work_private_t * pv, hb_buffer_t * in )
 396 {
 397     int i = (in->start >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
 398     pv->frame_info[i].duration = in->stop - in->start;
 399 }
 400
 401 static int64_t get_frame_duration( hb_work_private_t * pv, int64_t pts )
 402 {
 403     int i = (pts >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
 404     return pv->frame_info[i].duration;
 405 }
 406
 407 static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
 408                                 int i_nal, x264_nal_t *nal )
 409 {
 410     hb_buffer_t *buf = NULL;
 411     hb_work_private_t *pv = w->private_data;
 412     hb_job_t *job = pv->job;
 413
 414     /* Should be way too large */
 415     buf = hb_video_buffer_init( job->width, job->height );
 416     buf->size = 0;
 417     buf->frametype = 0;
 418
 419     // use the pts to get the original frame's duration.
 420     int64_t duration  = get_frame_duration( pv, pic_out->i_pts );
 421     buf->start = pic_out->i_pts;
 422     buf->stop  = pic_out->i_pts + duration;
 423     buf->renderOffset = pic_out->i_dts;
 424     if ( !w->config->h264.init_delay && pic_out->i_dts < 0 )
 425     {
 426         w->config->h264.init_delay = -pic_out->i_dts;
 427     }
 428
 429     /* Encode all the NALs we were given into buf.
 430        NOTE: This code assumes one video frame per NAL (but there can
 431              be other stuff like SPS and/or PPS). If there are multiple
 432              frames we only get the duration of the first which will
 433              eventually screw up the muxer & decoder. */
 434     int i;
 435     for( i = 0; i < i_nal; i++ )
 436     {
 437         int size = nal[i].i_payload;
 438         memcpy(buf->data + buf->size, nal[i].p_payload, size);
 439         if( size < 1 )
 440         {
 441             continue;
 442         }
 443
 444         if( job->mux & HB_MUX_AVI )
 445         {
 446             if( nal[i].i_ref_idc == NAL_PRIORITY_HIGHEST )
 447             {
 448                 buf->frametype = HB_FRAME_KEY;
 449             }
 450             buf->size += size;
 451             continue;
 452         }
 453
 454         /* H.264 in .mp4 or .mkv */
 455         switch( nal[i].i_type )
 456         {
 457             /* Sequence Parameter Set & Program Parameter Set go in the
 458              * mp4 header so skip them here
 459              */
 460             case NAL_SPS:
 461             case NAL_PPS:
 462                 continue;
 463
 464             case NAL_SLICE:
 465             case NAL_SLICE_IDR:
 466             case NAL_SEI:
 467             default:
 468                 break;
 469         }
 470
 471         /* Decide what type of frame we have. */
 472         switch( pic_out->i_type )
 473         {
 474             case X264_TYPE_IDR:
 475                 buf->frametype = HB_FRAME_IDR;
 476                 /* if we have a chapter marker pending and this
 477                    frame's presentation time stamp is at or after
 478                    the marker's time stamp, use this as the
 479                    chapter start. */
 480                 if( pv->next_chap != 0 && pv->next_chap <= pic_out->i_pts )
 481                 {
 482                     pv->next_chap = 0;
 483                     buf->new_chap = pv->chap_mark;
 484                 }
 485                 break;
 486
 487             case X264_TYPE_I:
 488                 buf->frametype = HB_FRAME_I;
 489                 break;
 490
 491             case X264_TYPE_P:
 492                 buf->frametype = HB_FRAME_P;
 493                 break;
 494
 495             case X264_TYPE_B:
 496                 buf->frametype = HB_FRAME_B;
 497                 break;
 498
 499         /*  This is for b-pyramid, which has reference b-frames
 500             However, it doesn't seem to ever be used... */
 501             case X264_TYPE_BREF:
 502                 buf->frametype = HB_FRAME_BREF;
 503                 break;
 504
 505             // If it isn't the above, what type of frame is it??
 506             default:
 507                 buf->frametype = 0;
 508                 break;
 509         }
 510
 511         /* Since libx264 doesn't tell us when b-frames are
 512            themselves reference frames, figure it out on our own. */
 513         if( (buf->frametype == HB_FRAME_B) &&
 514             (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) )
 515             buf->frametype = HB_FRAME_BREF;
 516
 517         /* Expose disposable bit to muxer. */
 518         if( nal[i].i_ref_idc == NAL_PRIORITY_DISPOSABLE )
 519             buf->flags &= ~HB_FRAME_REF;
 520         else
 521             buf->flags |= HB_FRAME_REF;
 522
 523         buf->size += size;
 524     }
 525     // make sure we found at least one video frame
 526     if ( buf->size <= 0 )
 527     {
 528         // no video - discard the buf
 529         hb_buffer_close( &buf );
 530     }
 531     return buf;
 532 }
 533
 534 static hb_buffer_t *x264_encode( hb_work_object_t *w, hb_buffer_t *in )
 535 {
 536     hb_work_private_t *pv = w->private_data;
 537     hb_job_t *job = pv->job;
 538
 539     /* Point x264 at our current buffers Y(UV) data.  */
 540     pv->pic_in.img.plane[0] = in->data;
 541
 542     int uvsize = ( (job->width + 1) >> 1 ) * ( (job->height + 1) >> 1 );
 543     if( job->grayscale )
 544     {
 545         /* XXX x264 has currently no option for grayscale encoding */
 546         memset( pv->pic_in.img.plane[1], 0x80, uvsize );
 547         memset( pv->pic_in.img.plane[2], 0x80, uvsize );
 548     }
 549     else
 550     {
 551         /* Point x264 at our buffers (Y)UV data */
 552         pv->pic_in.img.plane[1] = in->data + job->width * job->height;
 553         pv->pic_in.img.plane[2] = pv->pic_in.img.plane[1] + uvsize;
 554     }
 555     if( in->new_chap && job->chapter_markers )
 556     {
 557         /* chapters have to start with an IDR frame so request that this
 558            frame be coded as IDR. Since there may be up to 16 frames
 559            currently buffered in the encoder remember the timestamp so
 560            when this frame finally pops out of the encoder we'll mark
 561            its buffer as the start of a chapter. */
 562         pv->pic_in.i_type = X264_TYPE_IDR;
 563         if( pv->next_chap == 0 )
 564         {
 565             pv->next_chap = in->start;
 566             pv->chap_mark = in->new_chap;
 567         }
 568         /* don't let 'work_loop' put a chapter mark on the wrong buffer */
 569         in->new_chap = 0;
 570     }
 571     else
 572     {
 573         pv->pic_in.i_type = X264_TYPE_AUTO;
 574     }
 575     pv->pic_in.i_qpplus1 = 0;
 576
 577     /* XXX this is temporary debugging code to check that the upstream
 578      * modules (render & sync) have generated a continuous, self-consistent
 579      * frame stream with the current frame's start time equal to the
 580      * previous frame's stop time.
 581      */
 582     if( pv->last_stop != in->start )
 583     {
 584         hb_log("encx264 input continuity err: last stop %"PRId64"  start %"PRId64,
 585                 pv->last_stop, in->start);
 586     }
 587     pv->last_stop = in->stop;
 588
 589     // Remember info about this frame that we need to pass across
 590     // the x264_encoder_encode call (since it reorders frames).
 591     save_frame_info( pv, in );
 592
 593     /* Feed the input PTS to x264 so it can figure out proper output PTS */
 594     pv->pic_in.i_pts = in->start;
 595
 596     x264_picture_t pic_out;
 597     int i_nal;
 598     x264_nal_t *nal;
 599
 600     x264_encoder_encode( pv->x264, &nal, &i_nal, &pv->pic_in, &pic_out );
 601     if ( i_nal > 0 )
 602     {
 603         return nal_encode( w, &pic_out, i_nal, nal );
 604     }
 605     return NULL;
 606 }
 607
 608 int encx264Work( hb_work_object_t * w, hb_buffer_t ** buf_in,
 609                   hb_buffer_t ** buf_out )
 610 {
 611     hb_work_private_t *pv = w->private_data;
 612     hb_buffer_t *in = *buf_in;
 613
 614     *buf_out = NULL;
 615
 616     if( in->size <= 0 )
 617     {
 618         // EOF on input. Flush any frames still in the decoder then
 619         // send the eof downstream to tell the muxer we're done.
 620         x264_picture_t pic_out;
 621         int i_nal;
 622         x264_nal_t *nal;
 623         hb_buffer_t *last_buf = NULL;
 624
 625         while ( x264_encoder_delayed_frames( pv->x264 ) )
 626         {
 627             x264_encoder_encode( pv->x264, &nal, &i_nal, NULL, &pic_out );
 628             if ( i_nal == 0 )
 629                 continue;
 630             if ( i_nal < 0 )
 631                 break;
 632
 633             hb_buffer_t *buf = nal_encode( w, &pic_out, i_nal, nal );
 634             if ( buf )
 635             {
 636                 ++pv->frames_out;
 637                 if ( last_buf == NULL )
 638                     *buf_out = buf;
 639                 else
 640                     last_buf->next = buf;
 641                 last_buf = buf;
 642             }
 643         }
 644         // Flushed everything - add the eof to the end of the chain.
 645         if ( last_buf == NULL )
 646             *buf_out = in;
 647         else
 648             last_buf->next = in;
 649
 650         *buf_in = NULL;
 651         return HB_WORK_DONE;
 652     }
 653
 654     // Not EOF - encode the packet & wrap it in a NAL
 655     ++pv->frames_in;
 656     ++pv->frames_out;
 657     *buf_out = x264_encode( w, in );
 658     return HB_WORK_OK;
 659 }