1 /* $Id: encx264.c,v 1.21 2005/11/04 13:09:41 titer Exp $
3 This file is part of the HandBrake source code.
4 Homepage: <http://handbrake.fr/>.
5 It may be used under the terms of the GNU General Public License. */
13 int encx264Init( hb_work_object_t *, hb_job_t * );
14 int encx264Work( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
15 void encx264Close( hb_work_object_t * );
17 hb_work_object_t hb_encx264 =
20 "H.264/AVC encoder (libx264)",
26 #define DTS_BUFFER_SIZE 32
29 * The frame info struct remembers information about each frame across calls
30 * to x264_encoder_encode. Since frames are uniquely identified by their
31 * timestamp, we use some bits of the timestamp as an index. The LSB is
32 * chosen so that two successive frames will have different values in the
33 * bits over any plausible range of frame rates. (Starting with bit 8 allows
34 * any frame rate slower than 352fps.) The MSB determines the size of the array.
35 * It is chosen so that two frames can't use the same slot during the
36 * encoder's max frame delay (set by the standard as 16 frames) and so
37 * that, up to some minimum frame rate, frames are guaranteed to map to
38 * different slots. (An MSB of 17 which is 2^(17-8+1) = 1024 slots guarantees
39 * no collisions down to a rate of .7 fps).
41 #define FRAME_INFO_MAX2 (8) // 2^8 = 256; 90000/256 = 352 frames/sec
42 #define FRAME_INFO_MIN2 (17) // 2^17 = 128K; 90000/131072 = 1.4 frames/sec
43 #define FRAME_INFO_SIZE (1 << (FRAME_INFO_MIN2 - FRAME_INFO_MAX2 + 1))
44 #define FRAME_INFO_MASK (FRAME_INFO_SIZE - 1)
46 struct hb_work_private_s
50 x264_picture_t pic_in;
51 uint8_t *x264_allocated_pic;
53 int chap_mark; // saved chap mark when we're propagating it
54 int64_t last_stop; // Debugging - stop time of previous input frame
60 } frame_info[FRAME_INFO_SIZE];
65 /***********************************************************************
66 * hb_work_encx264_init
67 ***********************************************************************
69 **********************************************************************/
70 int encx264Init( hb_work_object_t * w, hb_job_t * job )
77 hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
82 memset( pv->filename, 0, 1024 );
83 hb_get_tempory_filename( job->h, pv->filename, "x264.log" );
85 x264_param_default( ¶m );
87 param.i_threads = ( hb_get_cpu_count() * 3 / 2 );
88 param.i_width = job->width;
89 param.i_height = job->height;
90 param.i_fps_num = job->vrate;
91 param.i_fps_den = job->vrate_base;
93 if (job->vrate_base != 1080000)
95 /* If the fps isn't 25, adjust the key intervals. Add 1 because
96 we want 24, not 23 with a truncated remainder. */
97 param.i_keyint_min = (job->vrate / job->vrate_base) + 1;
98 param.i_keyint_max = (10 * job->vrate / job->vrate_base) + 1;
99 hb_log("encx264: keyint-min: %i, keyint-max: %i", param.i_keyint_min, param.i_keyint_max);
102 param.i_log_level = X264_LOG_INFO;
103 if( job->h264_level )
106 param.i_level_idc = job->h264_level;
107 hb_log( "encx264: encoding at level %i",
112 This section passes the string x264opts to libx264 for parsing into
113 parameter names and values.
115 The string is set up like this:
116 option1=value1:option2=value 2
118 So, you have to iterate through based on the colons, and then put
119 the left side of the equals sign in "name" and the right side into
120 "value." Then you hand those strings off to x264 for interpretation.
122 This is all based on the universal x264 option handling Loren
123 Merritt implemented in the Mplayer/Mencoder project.
126 if( job->x264opts != NULL && *job->x264opts != '\0' )
128 char *x264opts, *x264opts_start;
130 x264opts = x264opts_start = strdup(job->x264opts);
132 while( x264opts_start && *x264opts )
134 char *name = x264opts;
138 x264opts += strcspn( x264opts, ":" );
145 value = strchr( name, '=' );
153 When B-frames are enabled, the max frame count increments
154 by 1 (regardless of the number of B-frames). If you don't
155 change the duration of the video track when you mux, libmp4
156 barfs. So, check if the x264opts are using B-frames, and
157 when they are, set the boolean job->areBframes as true.
160 if( !( strcmp( name, "bframes" ) ) )
162 if( atoi( value ) > 0 )
168 /* Note b-pyramid here, so the initial delay can be doubled */
169 if( !( strcmp( name, "b-pyramid" ) ) )
173 if( atoi( value ) > 0 )
184 /* Here's where the strings are passed to libx264 for parsing. */
185 ret = x264_param_parse( ¶m, name, value );
187 /* Let x264 sanity check the options for us*/
188 if( ret == X264_PARAM_BAD_NAME )
189 hb_log( "x264 options: Unknown suboption %s", name );
190 if( ret == X264_PARAM_BAD_VALUE )
191 hb_log( "x264 options: Bad argument %s=%s", name, value ? value : "(null)" );
193 free(x264opts_start);
196 /* set up the VUI color model & gamma to match what the COLR atom
197 * set in muxmp4.c says. See libhb/muxmp4.c for notes. */
198 if( job->color_matrix == 1 )
200 // ITU BT.601 DVD or SD TV content
201 param.vui.i_colorprim = 6;
202 param.vui.i_transfer = 1;
203 param.vui.i_colmatrix = 6;
205 else if( job->color_matrix == 2 )
207 // ITU BT.709 HD content
208 param.vui.i_colorprim = 1;
209 param.vui.i_transfer = 1;
210 param.vui.i_colmatrix = 1;
212 else if ( job->title->width >= 1280 || job->title->height >= 720 )
214 // we guess that 720p or above is ITU BT.709 HD content
215 param.vui.i_colorprim = 1;
216 param.vui.i_transfer = 1;
217 param.vui.i_colmatrix = 1;
221 // ITU BT.601 DVD or SD TV content
222 param.vui.i_colorprim = 6;
223 param.vui.i_transfer = 1;
224 param.vui.i_colmatrix = 6;
227 if( job->pixel_ratio )
229 param.vui.i_sar_width = job->pixel_aspect_width;
230 param.vui.i_sar_height = job->pixel_aspect_height;
232 hb_log( "encx264: encoding with stored aspect %d/%d",
233 param.vui.i_sar_width, param.vui.i_sar_height );
237 if( job->vquality > 0.0 && job->vquality < 1.0 )
243 param.rc.i_rc_method = X264_RC_CRF;
244 param.rc.f_rf_constant = 51 - job->vquality * 51;
245 hb_log( "encx264: Encoding at constant RF %f",
246 param.rc.f_rf_constant );
251 param.rc.i_rc_method = X264_RC_CQP;
252 param.rc.i_qp_constant = 51 - job->vquality * 51;
253 hb_log( "encx264: encoding at constant QP %d",
254 param.rc.i_qp_constant );
258 else if( job->vquality == 0 || job->vquality >= 1.0 )
260 /* Use the vquality as a raw RF or QP
261 instead of treating it like a percentage. */
266 param.rc.i_rc_method = X264_RC_CRF;
267 param.rc.f_rf_constant = job->vquality;
268 hb_log( "encx264: Encoding at constant RF %f",
269 param.rc.f_rf_constant );
274 param.rc.i_rc_method = X264_RC_CQP;
275 param.rc.i_qp_constant = job->vquality;
276 hb_log( "encx264: encoding at constant QP %d",
277 param.rc.i_qp_constant );
284 param.rc.i_rc_method = X264_RC_ABR;
285 param.rc.i_bitrate = job->vbitrate;
289 param.rc.b_stat_write = 1;
290 param.rc.psz_stat_out = pv->filename;
293 param.rc.b_stat_read = 1;
294 param.rc.psz_stat_in = pv->filename;
299 hb_deep_log( 2, "encx264: opening libx264 (pass %d)", job->pass );
300 pv->x264 = x264_encoder_open( ¶m );
302 x264_encoder_headers( pv->x264, &nal, &nal_count );
304 /* Sequence Parameter Set */
305 x264_nal_encode( w->config->h264.sps, &nal_size, 0, &nal[1] );
306 w->config->h264.sps_length = nal_size;
308 /* Picture Parameter Set */
309 x264_nal_encode( w->config->h264.pps, &nal_size, 0, &nal[2] );
310 w->config->h264.pps_length = nal_size;
312 x264_picture_alloc( &pv->pic_in, X264_CSP_I420,
313 job->width, job->height );
315 pv->x264_allocated_pic = pv->pic_in.img.plane[0];
319 /* Basic initDelay value is the clockrate divided by the FPS
320 -- the length of one frame in clockticks. */
321 pv->init_delay = 90000. / ((double)job->vrate / (double)job->vrate_base);
323 /* 23.976-length frames are 3753.75 ticks long on average but the DVD
324 creates that average rate by repeating 59.95 fields so the max
325 frame size is actually 4504.5 (3 field times). The field durations
326 are computed based on quantized times (see below) so we need an extra
327 two ticks to account for the rounding. */
328 if (pv->init_delay == 3753)
329 pv->init_delay = 4507;
331 /* frame rates are not exact in the DVD 90KHz PTS clock (they are
332 exact in the DVD 27MHz system clock but we never see that) so the
333 rates computed above are all +-1 due to quantization. Worst case
334 is when a clock-rounded-down frame is adjacent to a rounded-up frame
335 which makes one of the frames 2 ticks longer than the nominal
339 /* For VFR, libhb sees the FPS as 29.97, but the longest frames
340 will use the duration of frames running at 23.976fps instead.. */
343 pv->init_delay = 7506;
346 /* The delay is 1 frames for regular b-frames, 2 for b-pyramid. */
347 pv->init_delay *= job->areBframes;
349 w->config->h264.init_delay = pv->init_delay;
354 void encx264Close( hb_work_object_t * w )
356 hb_work_private_t * pv = w->private_data;
358 * Patch the x264 allocated data back in so that x264 can free it
359 * we have been using our own buffers during the encode to avoid copying.
361 pv->pic_in.img.plane[0] = pv->x264_allocated_pic;
362 x264_picture_clean( &pv->pic_in );
363 x264_encoder_close( pv->x264 );
365 w->private_data = NULL;
371 * see comments in definition of 'frame_info' in pv struct for description
372 * of what these routines are doing.
374 static void save_frame_info( hb_work_private_t * pv, hb_buffer_t * in )
376 int i = (in->start >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
377 pv->frame_info[i].duration = in->stop - in->start;
380 static int64_t get_frame_duration( hb_work_private_t * pv, int64_t pts )
382 int i = (pts >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
383 return pv->frame_info[i].duration;
386 static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
387 int i_nal, x264_nal_t *nal )
389 hb_buffer_t *buf = NULL;
390 hb_work_private_t *pv = w->private_data;
391 hb_job_t *job = pv->job;
393 /* Should be way too large */
394 buf = hb_buffer_init( 3 * job->width * job->height / 2 );
398 // use the pts to get the original frame's duration.
399 int64_t duration = get_frame_duration( pv, pic_out->i_pts );
400 buf->start = pic_out->i_pts;
401 buf->stop = pic_out->i_pts + duration;
403 /* Encode all the NALs we were given into buf.
404 NOTE: This code assumes one video frame per NAL (but there can
405 be other stuff like SPS and/or PPS). If there are multiple
406 frames we only get the duration of the first which will
407 eventually screw up the muxer & decoder. */
409 for( i = 0; i < i_nal; i++ )
411 int data = buf->alloc - buf->size;
412 int size = x264_nal_encode( buf->data + buf->size, &data, 1, &nal[i] );
418 if( job->mux & HB_MUX_AVI )
420 if( nal[i].i_ref_idc == NAL_PRIORITY_HIGHEST )
422 buf->frametype = HB_FRAME_KEY;
428 /* H.264 in .mp4 or .mkv */
429 int naltype = buf->data[buf->size+4] & 0x1f;
430 if ( naltype == 0x7 || naltype == 0x8 )
432 // Sequence Parameter Set & Program Parameter Set go in the
433 // mp4 header so skip them here
437 /* H.264 in mp4 (stolen from mp4creator) */
438 buf->data[buf->size+0] = ( ( size - 4 ) >> 24 ) & 0xFF;
439 buf->data[buf->size+1] = ( ( size - 4 ) >> 16 ) & 0xFF;
440 buf->data[buf->size+2] = ( ( size - 4 ) >> 8 ) & 0xFF;
441 buf->data[buf->size+3] = ( ( size - 4 ) >> 0 ) & 0xFF;
443 /* Decide what type of frame we have. */
444 switch( pic_out->i_type )
447 buf->frametype = HB_FRAME_IDR;
448 /* if we have a chapter marker pending and this
449 frame's presentation time stamp is at or after
450 the marker's time stamp, use this as the
452 if( pv->next_chap != 0 && pv->next_chap <= pic_out->i_pts )
455 buf->new_chap = pv->chap_mark;
460 buf->frametype = HB_FRAME_I;
464 buf->frametype = HB_FRAME_P;
468 buf->frametype = HB_FRAME_B;
471 /* This is for b-pyramid, which has reference b-frames
472 However, it doesn't seem to ever be used... */
474 buf->frametype = HB_FRAME_BREF;
477 // If it isn't the above, what type of frame is it??
483 /* Since libx264 doesn't tell us when b-frames are
484 themselves reference frames, figure it out on our own. */
485 if( (buf->frametype == HB_FRAME_B) &&
486 (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) )
487 buf->frametype = HB_FRAME_BREF;
491 // make sure we found at least one video frame
492 if ( buf->size <= 0 )
494 // no video - discard the buf
495 hb_buffer_close( &buf );
500 static hb_buffer_t *x264_encode( hb_work_object_t *w, hb_buffer_t *in )
502 hb_work_private_t *pv = w->private_data;
503 hb_job_t *job = pv->job;
505 /* Point x264 at our current buffers Y(UV) data. */
506 pv->pic_in.img.plane[0] = in->data;
510 /* XXX x264 has currently no option for grayscale encoding */
511 memset( pv->pic_in.img.plane[1], 0x80, job->width * job->height / 4 );
512 memset( pv->pic_in.img.plane[2], 0x80, job->width * job->height / 4 );
516 /* Point x264 at our buffers (Y)UV data */
517 pv->pic_in.img.plane[1] = in->data + job->width * job->height;
518 pv->pic_in.img.plane[2] = in->data + 5 * job->width * job->height / 4;
520 if( in->new_chap && job->chapter_markers )
522 /* chapters have to start with an IDR frame so request that this
523 frame be coded as IDR. Since there may be up to 16 frames
524 currently buffered in the encoder remember the timestamp so
525 when this frame finally pops out of the encoder we'll mark
526 its buffer as the start of a chapter. */
527 pv->pic_in.i_type = X264_TYPE_IDR;
528 if( pv->next_chap == 0 )
530 pv->next_chap = in->start;
531 pv->chap_mark = in->new_chap;
533 /* don't let 'work_loop' put a chapter mark on the wrong buffer */
538 pv->pic_in.i_type = X264_TYPE_AUTO;
540 pv->pic_in.i_qpplus1 = 0;
542 /* XXX this is temporary debugging code to check that the upstream
543 * modules (render & sync) have generated a continuous, self-consistent
544 * frame stream with the current frame's start time equal to the
545 * previous frame's stop time.
547 if( pv->last_stop != in->start )
549 hb_log("encx264 input continuity err: last stop %lld start %lld",
550 pv->last_stop, in->start);
552 pv->last_stop = in->stop;
554 // Remember info about this frame that we need to pass across
555 // the x264_encoder_encode call (since it reorders frames).
556 save_frame_info( pv, in );
558 /* Feed the input PTS to x264 so it can figure out proper output PTS */
559 pv->pic_in.i_pts = in->start;
561 x264_picture_t pic_out;
565 x264_encoder_encode( pv->x264, &nal, &i_nal, &pv->pic_in, &pic_out );
568 return nal_encode( w, &pic_out, i_nal, nal );
573 int encx264Work( hb_work_object_t * w, hb_buffer_t ** buf_in,
574 hb_buffer_t ** buf_out )
576 hb_work_private_t *pv = w->private_data;
577 hb_buffer_t *in = *buf_in;
583 // EOF on input. Flush any frames still in the decoder then
584 // send the eof downstream to tell the muxer we're done.
585 x264_picture_t pic_out;
588 hb_buffer_t *last_buf = NULL;
592 x264_encoder_encode( pv->x264, &nal, &i_nal, NULL, &pic_out );
596 hb_buffer_t *buf = nal_encode( w, &pic_out, i_nal, nal );
599 if ( last_buf == NULL )
602 last_buf->next = buf;
606 // Flushed everything - add the eof to the end of the chain.
607 if ( last_buf == NULL )
616 // Not EOF - encode the packet & wrap it in a NAL
618 // if we're re-ordering frames, check if this frame is too large to reorder
619 if ( pv->init_delay && in->stop - in->start > pv->init_delay )
621 // This frame's duration is larger than the time allotted for b-frame
622 // reordering. That means that if it's used as a reference the decoder
623 // won't be able to move it early enough to render it in correct
624 // sequence & the playback will have odd jumps & twitches. To make
625 // sure this doesn't happen we pretend this frame is multiple
626 // frames, each with duration <= init_delay. Since each of these
627 // new frames contains the same image the visual effect is identical
628 // to the original but the resulting stream can now be coded without
629 // error. We take advantage of the fact that x264 buffers frame
630 // data internally to feed the same image into the encoder multiple
631 // times, just changing its start & stop times each time.
632 int64_t orig_stop = in->stop;
633 int64_t new_stop = in->start;
634 hb_buffer_t *last_buf = NULL;
636 // We want to spread the new frames uniformly over the total time
637 // so that we don't end up with a very short frame at the end.
638 // In the number of pieces calculation we add in init_delay-1 to
639 // round up but not add an extra piece if the frame duration is
640 // a multiple of init_delay. The final increment of frame_dur is
641 // to restore the bits that got truncated by the divide on the
642 // previous line. If we don't do this we end up with an extra tiny
643 // frame at the end whose duration is npieces-1.
644 int64_t frame_dur = orig_stop - new_stop;
645 int64_t npieces = ( frame_dur + pv->init_delay - 1 ) / pv->init_delay;
646 frame_dur /= npieces;
649 while ( in->start < orig_stop )
651 new_stop += frame_dur;
652 if ( new_stop > orig_stop )
653 new_stop = orig_stop;
655 hb_buffer_t *buf = x264_encode( w, in );
658 if ( last_buf == NULL )
661 last_buf->next = buf;
664 in->start = new_stop;
669 *buf_out = x264_encode( w, in );