libhb/encx264.c

   1 /* $Id: encx264.c,v 1.21 2005/11/04 13:09:41 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 #include <stdarg.h>
   8
   9 #include "hb.h"
  10
  11 #include "x264.h"
  12
  13 int  encx264Init( hb_work_object_t *, hb_job_t * );
  14 int  encx264Work( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  15 void encx264Close( hb_work_object_t * );
  16
  17 hb_work_object_t hb_encx264 =
  18 {
  19     WORK_ENCX264,
  20     "H.264/AVC encoder (libx264)",
  21     encx264Init,
  22     encx264Work,
  23     encx264Close
  24 };
  25
  26 #define DTS_BUFFER_SIZE 32
  27
  28 /*
  29  * The frame info struct remembers information about each frame across calls
  30  * to x264_encoder_encode. Since frames are uniquely identified by their
  31  * timestamp, we use some bits of the timestamp as an index. The LSB is
  32  * chosen so that two successive frames will have different values in the
  33  * bits over any plausible range of frame rates. (Starting with bit 8 allows
  34  * any frame rate slower than 352fps.) The MSB determines the size of the array.
  35  * It is chosen so that two frames can't use the same slot during the
  36  * encoder's max frame delay (set by the standard as 16 frames) and so
  37  * that, up to some minimum frame rate, frames are guaranteed to map to
  38  * different slots. (An MSB of 17 which is 2^(17-8+1) = 1024 slots guarantees
  39  * no collisions down to a rate of .7 fps).
  40  */
  41 #define FRAME_INFO_MAX2 (8)     // 2^8 = 256; 90000/256 = 352 frames/sec
  42 #define FRAME_INFO_MIN2 (17)    // 2^17 = 128K; 90000/131072 = 1.4 frames/sec
  43 #define FRAME_INFO_SIZE (1 << (FRAME_INFO_MIN2 - FRAME_INFO_MAX2 + 1))
  44 #define FRAME_INFO_MASK (FRAME_INFO_SIZE - 1)
  45
  46 struct hb_work_private_s
  47 {
  48     hb_job_t       * job;
  49     x264_t         * x264;
  50     x264_picture_t   pic_in;
  51     uint8_t        * grey_data;
  52
  53     uint32_t       frames_in;
  54     uint32_t       frames_out;
  55     uint32_t       frames_split; // number of frames we had to split
  56     int            chap_mark;   // saved chap mark when we're propagating it
  57     int64_t        last_stop;   // Debugging - stop time of previous input frame
  58     int64_t        next_chap;
  59
  60     struct {
  61         int64_t duration;
  62     } frame_info[FRAME_INFO_SIZE];
  63
  64     char             filename[1024];
  65 };
  66
  67 /***********************************************************************
  68  * hb_work_encx264_init
  69  ***********************************************************************
  70  *
  71  **********************************************************************/
  72 int encx264Init( hb_work_object_t * w, hb_job_t * job )
  73 {
  74     x264_param_t       param;
  75     x264_nal_t       * nal;
  76     int                nal_count;
  77
  78     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
  79     w->private_data = pv;
  80
  81     pv->job = job;
  82
  83     memset( pv->filename, 0, 1024 );
  84     hb_get_tempory_filename( job->h, pv->filename, "x264.log" );
  85
  86     x264_param_default( &param );
  87
  88     /* Enable metrics */
  89     param.analyse.b_psnr = 1;
  90     param.analyse.b_ssim = 1;
  91
  92     /* QuickTime has trouble with very low QPs (resulting in visual artifacts).
  93      * Known to affect QuickTime 7, QuickTime X and iTunes.
  94      * Testing shows that a qpmin of 3 works.
  95      */
  96     param.rc.i_qp_min = 3;
  97
  98     param.i_threads    = ( hb_get_cpu_count() * 3 / 2 );
  99     param.i_width      = job->width;
 100     param.i_height     = job->height;
 101     param.i_fps_num    = job->vrate;
 102     param.i_fps_den    = job->vrate_base;
 103     if ( job->cfr == 1 )
 104     {
 105         param.i_timebase_num   = 0;
 106         param.i_timebase_den   = 0;
 107         param.b_vfr_input = 0;
 108     }
 109     else
 110     {
 111         param.i_timebase_num   = 1;
 112         param.i_timebase_den   = 90000;
 113     }
 114
 115     /* Disable annexb. Inserts size into nal header instead of start code */
 116     param.b_annexb     = 0;
 117
 118     /* Set min:max key intervals ratio to 1:10 of fps.
 119      * This section is skipped if fps=25 (default).
 120      */
 121     if (job->vrate_base != 1080000)
 122     {
 123         if (job->pass == 2 && !job->cfr )
 124         {
 125             /* Even though the framerate might be different due to VFR,
 126                we still want the same keyframe intervals as the 1st pass,
 127                so the 1st pass stats won't conflict on frame decisions.    */
 128             hb_interjob_t * interjob = hb_interjob_get( job->h );
 129             param.i_keyint_max = 10 * (int)( (double)interjob->vrate / (double)interjob->vrate_base + 0.5 );
 130         }
 131         else
 132         {
 133             /* adjust +0.5 for when fps has remainder to bump
 134                { 23.976, 29.976, 59.94 } to { 24, 30, 60 } */
 135             param.i_keyint_max = 10 * (int)( (double)job->vrate / (double)job->vrate_base + 0.5 );
 136         }
 137     }
 138
 139     param.i_log_level  = X264_LOG_INFO;
 140
 141     /*
 142         This section passes the string x264opts to libx264 for parsing into
 143         parameter names and values.
 144
 145         The string is set up like this:
 146         option1=value1:option2=value 2
 147
 148         So, you have to iterate through based on the colons, and then put
 149         the left side of the equals sign in "name" and the right side into
 150         "value." Then you hand those strings off to x264 for interpretation.
 151
 152         This is all based on the universal x264 option handling Loren
 153         Merritt implemented in the Mplayer/Mencoder project.
 154      */
 155
 156     if( job->x264opts != NULL && *job->x264opts != '\0' )
 157     {
 158         char *x264opts, *x264opts_start;
 159
 160         x264opts = x264opts_start = strdup(job->x264opts);
 161
 162         while( x264opts_start && *x264opts )
 163         {
 164             char *name = x264opts;
 165             char *value;
 166             int ret;
 167
 168             x264opts += strcspn( x264opts, ":" );
 169             if( *x264opts )
 170             {
 171                 *x264opts = 0;
 172                 x264opts++;
 173             }
 174
 175             value = strchr( name, '=' );
 176             if( value )
 177             {
 178                 *value = 0;
 179                 value++;
 180             }
 181
 182             /* Here's where the strings are passed to libx264 for parsing. */
 183             ret = x264_param_parse( &param, name, value );
 184
 185             /*  Let x264 sanity check the options for us*/
 186             if( ret == X264_PARAM_BAD_NAME )
 187                 hb_log( "x264 options: Unknown suboption %s", name );
 188             if( ret == X264_PARAM_BAD_VALUE )
 189                 hb_log( "x264 options: Bad argument %s=%s", name, value ? value : "(null)" );
 190         }
 191         free(x264opts_start);
 192     }
 193
 194     /* B-frames are on by default.*/
 195     job->areBframes = 1;
 196
 197     if( param.i_bframe && param.i_bframe_pyramid )
 198     {
 199         /* Note b-pyramid here, so the initial delay can be doubled */
 200         job->areBframes = 2;
 201     }
 202     else if( !param.i_bframe )
 203     {
 204         /*
 205          When B-frames are enabled, the max frame count increments
 206          by 1 (regardless of the number of B-frames). If you don't
 207          change the duration of the video track when you mux, libmp4
 208          barfs.  So, check if the x264opts aren't using B-frames, and
 209          when they aren't, set the boolean job->areBframes as false.
 210          */
 211         job->areBframes = 0;
 212     }
 213
 214     if( param.i_keyint_min != X264_KEYINT_MIN_AUTO || param.i_keyint_max != 250 )
 215     {
 216         int min_auto;
 217
 218         if ( param.i_fps_num / param.i_fps_den < param.i_keyint_max / 10 )
 219             min_auto = param.i_fps_num / param.i_fps_den;
 220         else
 221             min_auto = param.i_keyint_max / 10;
 222
 223         char min[40], max[40];
 224         param.i_keyint_min == X264_KEYINT_MIN_AUTO ?
 225             snprintf( min, 40, "auto (%d)", min_auto ) :
 226             snprintf( min, 40, "%d", param.i_keyint_min );
 227
 228         param.i_keyint_max == X264_KEYINT_MAX_INFINITE ?
 229             snprintf( max, 40, "infinite" ) :
 230             snprintf( max, 40, "%d", param.i_keyint_max );
 231
 232         hb_log( "encx264: min-keyint: %s, keyint: %s", min, max );
 233     }
 234
 235     /* set up the VUI color model & gamma to match what the COLR atom
 236      * set in muxmp4.c says. See libhb/muxmp4.c for notes. */
 237     if( job->color_matrix == 1 )
 238     {
 239         // ITU BT.601 DVD or SD TV content
 240         param.vui.i_colorprim = 6;
 241         param.vui.i_transfer = 1;
 242         param.vui.i_colmatrix = 6;
 243     }
 244     else if( job->color_matrix == 2 )
 245     {
 246         // ITU BT.709 HD content
 247         param.vui.i_colorprim = 1;
 248         param.vui.i_transfer = 1;
 249         param.vui.i_colmatrix = 1;
 250     }
 251     else if ( job->title->width >= 1280 || job->title->height >= 720 )
 252     {
 253         // we guess that 720p or above is ITU BT.709 HD content
 254         param.vui.i_colorprim = 1;
 255         param.vui.i_transfer = 1;
 256         param.vui.i_colmatrix = 1;
 257     }
 258     else
 259     {
 260         // ITU BT.601 DVD or SD TV content
 261         param.vui.i_colorprim = 6;
 262         param.vui.i_transfer = 1;
 263         param.vui.i_colmatrix = 6;
 264     }
 265
 266     if( job->anamorphic.mode )
 267     {
 268         param.vui.i_sar_width  = job->anamorphic.par_width;
 269         param.vui.i_sar_height = job->anamorphic.par_height;
 270
 271         hb_log( "encx264: encoding with stored aspect %d/%d",
 272                 param.vui.i_sar_width, param.vui.i_sar_height );
 273     }
 274
 275
 276     if( job->vquality > 0.0 && job->vquality < 1.0 )
 277     {
 278         /*Constant RF*/
 279         param.rc.i_rc_method = X264_RC_CRF;
 280         param.rc.f_rf_constant = 51 - job->vquality * 51;
 281         hb_log( "encx264: Encoding at constant RF %f", param.rc.f_rf_constant );
 282     }
 283     else if( job->vquality == 0 || job->vquality >= 1.0 )
 284     {
 285         /* Use the vquality as a raw RF or QP
 286           instead of treating it like a percentage. */
 287         /*Constant RF*/
 288         param.rc.i_rc_method = X264_RC_CRF;
 289         param.rc.f_rf_constant = job->vquality;
 290         hb_log( "encx264: Encoding at constant RF %f", param.rc.f_rf_constant );
 291     }
 292     else
 293     {
 294         /* Rate control */
 295         param.rc.i_rc_method = X264_RC_ABR;
 296         param.rc.i_bitrate = job->vbitrate;
 297         switch( job->pass )
 298         {
 299             case 1:
 300                 param.rc.b_stat_write  = 1;
 301                 param.rc.psz_stat_out = pv->filename;
 302                 break;
 303             case 2:
 304                 param.rc.b_stat_read = 1;
 305                 param.rc.psz_stat_in = pv->filename;
 306                 break;
 307         }
 308     }
 309
 310     hb_deep_log( 2, "encx264: opening libx264 (pass %d)", job->pass );
 311     pv->x264 = x264_encoder_open( &param );
 312
 313     x264_encoder_headers( pv->x264, &nal, &nal_count );
 314
 315     /* Sequence Parameter Set */
 316     memcpy(w->config->h264.sps, nal[0].p_payload + 4, nal[0].i_payload - 4);
 317     w->config->h264.sps_length = nal[0].i_payload - 4;
 318
 319     /* Picture Parameter Set */
 320     memcpy(w->config->h264.pps, nal[1].p_payload + 4, nal[1].i_payload - 4);
 321     w->config->h264.pps_length = nal[1].i_payload - 4;
 322
 323     x264_picture_init( &pv->pic_in );
 324
 325     pv->pic_in.img.i_csp = X264_CSP_I420;
 326     pv->pic_in.img.i_plane = 3;
 327     pv->pic_in.img.i_stride[0] = job->width;
 328     pv->pic_in.img.i_stride[2] = pv->pic_in.img.i_stride[1] = ( ( job->width + 1 ) >> 1 );
 329
 330     if( job->grayscale )
 331     {
 332         int uvsize = ( (job->width + 1) >> 1 ) * ( (job->height + 1) >> 1 );
 333         pv->grey_data = malloc( uvsize );
 334         memset( pv->grey_data, 0x80, uvsize );
 335         pv->pic_in.img.plane[1] = pv->pic_in.img.plane[2] = pv->grey_data;
 336     }
 337
 338     return 0;
 339 }
 340
 341 void encx264Close( hb_work_object_t * w )
 342 {
 343     hb_work_private_t * pv = w->private_data;
 344
 345     if ( pv->frames_split )
 346     {
 347         hb_log( "encx264: %u frames had to be split (%u in, %u out)",
 348                 pv->frames_split, pv->frames_in, pv->frames_out );
 349     }
 350     free( pv->grey_data );
 351     x264_encoder_close( pv->x264 );
 352     free( pv );
 353     w->private_data = NULL;
 354
 355     /* TODO */
 356 }
 357
 358 /*
 359  * see comments in definition of 'frame_info' in pv struct for description
 360  * of what these routines are doing.
 361  */
 362 static void save_frame_info( hb_work_private_t * pv, hb_buffer_t * in )
 363 {
 364     int i = (in->start >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
 365     pv->frame_info[i].duration = in->stop - in->start;
 366 }
 367
 368 static int64_t get_frame_duration( hb_work_private_t * pv, int64_t pts )
 369 {
 370     int i = (pts >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
 371     return pv->frame_info[i].duration;
 372 }
 373
 374 static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
 375                                 int i_nal, x264_nal_t *nal )
 376 {
 377     hb_buffer_t *buf = NULL;
 378     hb_work_private_t *pv = w->private_data;
 379     hb_job_t *job = pv->job;
 380
 381     /* Should be way too large */
 382     buf = hb_video_buffer_init( job->width, job->height );
 383     buf->size = 0;
 384     buf->frametype = 0;
 385
 386     // use the pts to get the original frame's duration.
 387     int64_t duration  = get_frame_duration( pv, pic_out->i_pts );
 388     buf->start = pic_out->i_pts;
 389     buf->stop  = pic_out->i_pts + duration;
 390     buf->renderOffset = pic_out->i_dts;
 391     if ( !w->config->h264.init_delay && pic_out->i_dts < 0 )
 392     {
 393         w->config->h264.init_delay = -pic_out->i_dts;
 394     }
 395
 396     /* Encode all the NALs we were given into buf.
 397        NOTE: This code assumes one video frame per NAL (but there can
 398              be other stuff like SPS and/or PPS). If there are multiple
 399              frames we only get the duration of the first which will
 400              eventually screw up the muxer & decoder. */
 401     int i;
 402     for( i = 0; i < i_nal; i++ )
 403     {
 404         int size = nal[i].i_payload;
 405         memcpy(buf->data + buf->size, nal[i].p_payload, size);
 406         if( size < 1 )
 407         {
 408             continue;
 409         }
 410
 411         /* H.264 in .mp4 or .mkv */
 412         switch( nal[i].i_type )
 413         {
 414             /* Sequence Parameter Set & Program Parameter Set go in the
 415              * mp4 header so skip them here
 416              */
 417             case NAL_SPS:
 418             case NAL_PPS:
 419                 continue;
 420
 421             case NAL_SLICE:
 422             case NAL_SLICE_IDR:
 423             case NAL_SEI:
 424             default:
 425                 break;
 426         }
 427
 428         /* Decide what type of frame we have. */
 429         switch( pic_out->i_type )
 430         {
 431             case X264_TYPE_IDR:
 432                 buf->frametype = HB_FRAME_IDR;
 433                 /* if we have a chapter marker pending and this
 434                    frame's presentation time stamp is at or after
 435                    the marker's time stamp, use this as the
 436                    chapter start. */
 437                 if( pv->next_chap != 0 && pv->next_chap <= pic_out->i_pts )
 438                 {
 439                     pv->next_chap = 0;
 440                     buf->new_chap = pv->chap_mark;
 441                 }
 442                 break;
 443
 444             case X264_TYPE_I:
 445                 buf->frametype = HB_FRAME_I;
 446                 break;
 447
 448             case X264_TYPE_P:
 449                 buf->frametype = HB_FRAME_P;
 450                 break;
 451
 452             case X264_TYPE_B:
 453                 buf->frametype = HB_FRAME_B;
 454                 break;
 455
 456         /*  This is for b-pyramid, which has reference b-frames
 457             However, it doesn't seem to ever be used... */
 458             case X264_TYPE_BREF:
 459                 buf->frametype = HB_FRAME_BREF;
 460                 break;
 461
 462             // If it isn't the above, what type of frame is it??
 463             default:
 464                 buf->frametype = 0;
 465                 break;
 466         }
 467
 468         /* Since libx264 doesn't tell us when b-frames are
 469            themselves reference frames, figure it out on our own. */
 470         if( (buf->frametype == HB_FRAME_B) &&
 471             (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) )
 472             buf->frametype = HB_FRAME_BREF;
 473
 474         /* Expose disposable bit to muxer. */
 475         if( nal[i].i_ref_idc == NAL_PRIORITY_DISPOSABLE )
 476             buf->flags &= ~HB_FRAME_REF;
 477         else
 478             buf->flags |= HB_FRAME_REF;
 479
 480         buf->size += size;
 481     }
 482     // make sure we found at least one video frame
 483     if ( buf->size <= 0 )
 484     {
 485         // no video - discard the buf
 486         hb_buffer_close( &buf );
 487     }
 488     return buf;
 489 }
 490
 491 static hb_buffer_t *x264_encode( hb_work_object_t *w, hb_buffer_t *in )
 492 {
 493     hb_work_private_t *pv = w->private_data;
 494     hb_job_t *job = pv->job;
 495
 496     /* Point x264 at our current buffers Y(UV) data.  */
 497     pv->pic_in.img.plane[0] = in->data;
 498
 499     int uvsize = ( (job->width + 1) >> 1 ) * ( (job->height + 1) >> 1 );
 500     if( !job->grayscale )
 501     {
 502         /* Point x264 at our buffers (Y)UV data */
 503         pv->pic_in.img.plane[1] = in->data + job->width * job->height;
 504         pv->pic_in.img.plane[2] = pv->pic_in.img.plane[1] + uvsize;
 505     }
 506     if( in->new_chap && job->chapter_markers )
 507     {
 508         /* chapters have to start with an IDR frame so request that this
 509            frame be coded as IDR. Since there may be up to 16 frames
 510            currently buffered in the encoder remember the timestamp so
 511            when this frame finally pops out of the encoder we'll mark
 512            its buffer as the start of a chapter. */
 513         pv->pic_in.i_type = X264_TYPE_IDR;
 514         if( pv->next_chap == 0 )
 515         {
 516             pv->next_chap = in->start;
 517             pv->chap_mark = in->new_chap;
 518         }
 519         /* don't let 'work_loop' put a chapter mark on the wrong buffer */
 520         in->new_chap = 0;
 521     }
 522     else
 523     {
 524         pv->pic_in.i_type = X264_TYPE_AUTO;
 525     }
 526
 527     /* XXX this is temporary debugging code to check that the upstream
 528      * modules (render & sync) have generated a continuous, self-consistent
 529      * frame stream with the current frame's start time equal to the
 530      * previous frame's stop time.
 531      */
 532     if( pv->last_stop != in->start )
 533     {
 534         hb_log("encx264 input continuity err: last stop %"PRId64"  start %"PRId64,
 535                 pv->last_stop, in->start);
 536     }
 537     pv->last_stop = in->stop;
 538
 539     // Remember info about this frame that we need to pass across
 540     // the x264_encoder_encode call (since it reorders frames).
 541     save_frame_info( pv, in );
 542
 543     /* Feed the input PTS to x264 so it can figure out proper output PTS */
 544     pv->pic_in.i_pts = in->start;
 545
 546     x264_picture_t pic_out;
 547     int i_nal;
 548     x264_nal_t *nal;
 549
 550     x264_encoder_encode( pv->x264, &nal, &i_nal, &pv->pic_in, &pic_out );
 551     if ( i_nal > 0 )
 552     {
 553         return nal_encode( w, &pic_out, i_nal, nal );
 554     }
 555     return NULL;
 556 }
 557
 558 int encx264Work( hb_work_object_t * w, hb_buffer_t ** buf_in,
 559                   hb_buffer_t ** buf_out )
 560 {
 561     hb_work_private_t *pv = w->private_data;
 562     hb_buffer_t *in = *buf_in;
 563
 564     *buf_out = NULL;
 565
 566     if( in->size <= 0 )
 567     {
 568         // EOF on input. Flush any frames still in the decoder then
 569         // send the eof downstream to tell the muxer we're done.
 570         x264_picture_t pic_out;
 571         int i_nal;
 572         x264_nal_t *nal;
 573         hb_buffer_t *last_buf = NULL;
 574
 575         while ( x264_encoder_delayed_frames( pv->x264 ) )
 576         {
 577             x264_encoder_encode( pv->x264, &nal, &i_nal, NULL, &pic_out );
 578             if ( i_nal == 0 )
 579                 continue;
 580             if ( i_nal < 0 )
 581                 break;
 582
 583             hb_buffer_t *buf = nal_encode( w, &pic_out, i_nal, nal );
 584             if ( buf )
 585             {
 586                 ++pv->frames_out;
 587                 if ( last_buf == NULL )
 588                     *buf_out = buf;
 589                 else
 590                     last_buf->next = buf;
 591                 last_buf = buf;
 592             }
 593         }
 594         // Flushed everything - add the eof to the end of the chain.
 595         if ( last_buf == NULL )
 596             *buf_out = in;
 597         else
 598             last_buf->next = in;
 599
 600         *buf_in = NULL;
 601         return HB_WORK_DONE;
 602     }
 603
 604     // Not EOF - encode the packet & wrap it in a NAL
 605     ++pv->frames_in;
 606     ++pv->frames_out;
 607     *buf_out = x264_encode( w, in );
 608     return HB_WORK_OK;
 609 }