X-Git-Url: http://git.osdn.jp/view?a=blobdiff_plain;f=libhb%2Fdeinterlace.c;h=78856dd13960c218f2bb5785561435e80982e771;hb=4f0019f03c2e85e8634150ff0c9a31bee6d35ce5;hp=4cf3a2fe18fb154ef36ae59bce63dff4ca819e5f;hpb=1a8ebaf33ee803773082e1978b132273f8fb0bb9;p=handbrake-jp%2Fhandbrake-jp-git.git diff --git a/libhb/deinterlace.c b/libhb/deinterlace.c index 4cf3a2fe..78856dd1 100644 --- a/libhb/deinterlace.c +++ b/libhb/deinterlace.c @@ -17,7 +17,7 @@ */ #include "hb.h" -#include "libavcodec/avcodec.h" +#include "hbffmpeg.h" #include "mpeg2dec/mpeg2.h" #define SUPPRESS_AV_LOG @@ -32,6 +32,13 @@ #define MIN3(a,b,c) MIN(MIN(a,b),c) #define MAX3(a,b,c) MAX(MAX(a,b),c) +typedef struct yadif_arguments_s { + uint8_t **dst; + int parity; + int tff; + int stop; +} yadif_arguments_t; + struct hb_filter_private_s { int pix_fmt; @@ -45,6 +52,13 @@ struct hb_filter_private_s uint8_t * yadif_ref[4][3]; int yadif_ref_stride[3]; + int cpu_count; + + hb_thread_t ** yadif_threads; // Threads for Yadif - one per CPU + hb_lock_t ** yadif_begin_lock; // Thread has work + hb_lock_t ** yadif_complete_lock; // Thread has completed work + yadif_arguments_t *yadif_arguments; // Arguments to thread for work + int mcdeint_mode; int mcdeint_qp; @@ -84,6 +98,7 @@ hb_filter_object_t hb_filter_deinterlace = hb_deinterlace_close, }; + static void yadif_store_ref( const uint8_t ** pic, hb_filter_private_t * pv ) { @@ -102,7 +117,6 @@ static void yadif_store_ref( const uint8_t ** pic, uint8_t * ref = pv->yadif_ref[2][i]; int w = pv->width[i]; - int h = pv->height[i]; int ref_stride = pv->yadif_ref_stride[i]; int y; @@ -185,38 +199,204 @@ static void yadif_filter_line( uint8_t *dst, } } -static void yadif_filter( uint8_t ** dst, - int parity, - int tff, - hb_filter_private_t * pv ) +typedef struct yadif_thread_arg_s { + hb_filter_private_t *pv; + int segment; +} yadif_thread_arg_t; + +/* + * deinterlace this segment of all three planes in a single thread. + */ +void yadif_filter_thread( void *thread_args_v ) { - int i; - for( i = 0; i < 3; i++ ) + yadif_arguments_t *yadif_work = NULL; + hb_filter_private_t * pv; + int run = 1; + int plane; + int segment, segment_start, segment_stop; + yadif_thread_arg_t *thread_args = thread_args_v; + uint8_t **dst; + int parity, tff, y, w, h, ref_stride, penultimate, ultimate; + + + pv = thread_args->pv; + segment = thread_args->segment; + + hb_log("Yadif Deinterlace thread started for segment %d", segment); + + while( run ) { - int w = pv->width[i]; - int h = pv->height[i]; - int ref_stride = pv->yadif_ref_stride[i]; + /* + * Wait here until there is work to do. hb_lock() blocks until + * render releases it to say that there is more work to do. + */ + hb_lock( pv->yadif_begin_lock[segment] ); - int y; - for( y = 0; y < h; y++ ) + yadif_work = &pv->yadif_arguments[segment]; + + if( yadif_work->stop ) + { + /* + * No more work to do, exit this thread. + */ + run = 0; + continue; + } + + if( yadif_work->dst == NULL ) + { + hb_error( "Thread started when no work available" ); + hb_snooze(500); + continue; + } + + /* + * Process all three planes, but only this segment of it. + */ + for( plane = 0; plane < 3; plane++) { - if( (y ^ parity) & 1 ) - { - uint8_t *prev = &pv->yadif_ref[0][i][y*ref_stride]; - uint8_t *cur = &pv->yadif_ref[1][i][y*ref_stride]; - uint8_t *next = &pv->yadif_ref[2][i][y*ref_stride]; - uint8_t *dst2 = &dst[i][y*w]; - yadif_filter_line( dst2, prev, cur, next, i, parity ^ tff, pv ); + dst = yadif_work->dst; + parity = yadif_work->parity; + tff = yadif_work->tff; + w = pv->width[plane]; + h = pv->height[plane]; + penultimate = h -2; + ultimate = h - 1; + ref_stride = pv->yadif_ref_stride[plane]; + segment_start = ( h / pv->cpu_count ) * segment; + if( segment == pv->cpu_count - 1 ) + { + /* + * Final segment + */ + segment_stop = h; + } else { + segment_stop = ( h / pv->cpu_count ) * ( segment + 1 ); } - else + + for( y = segment_start; y < segment_stop; y++ ) { - memcpy( &dst[i][y*w], - &pv->yadif_ref[1][i][y*ref_stride], - w * sizeof(uint8_t) ); + if( ( ( y ^ parity ) & 1 ) ) + { + /* This is the bottom field when TFF and vice-versa. + It's the field that gets filtered. Because yadif + needs 2 lines above and below the one being filtered, + we need to mirror the edges. When TFF, this means + replacing the 2nd line with a copy of the 1st, + and the last with the second-to-last. */ + if( y > 1 && y < ( h -2 ) ) + { + /* This isn't the top or bottom, proceed as normal to yadif. */ + uint8_t *prev = &pv->yadif_ref[0][plane][y*ref_stride]; + uint8_t *cur = &pv->yadif_ref[1][plane][y*ref_stride]; + uint8_t *next = &pv->yadif_ref[2][plane][y*ref_stride]; + uint8_t *dst2 = &dst[plane][y*w]; + + yadif_filter_line( dst2, + prev, + cur, + next, + plane, + parity ^ tff, + pv ); + } + else if( y == 0 ) + { + /* BFF, so y0 = y1 */ + memcpy( &dst[plane][y*w], + &pv->yadif_ref[1][plane][1*ref_stride], + w * sizeof(uint8_t) ); + } + else if( y == 1 ) + { + /* TFF, so y1 = y0 */ + memcpy( &dst[plane][y*w], + &pv->yadif_ref[1][plane][0], + w * sizeof(uint8_t) ); + } + else if( y == penultimate ) + { + /* BFF, so penultimate y = ultimate y */ + memcpy( &dst[plane][y*w], + &pv->yadif_ref[1][plane][ultimate*ref_stride], + w * sizeof(uint8_t) ); + } + else if( y == ultimate ) + { + /* TFF, so ultimate y = penultimate y */ + memcpy( &dst[plane][y*w], + &pv->yadif_ref[1][plane][penultimate*ref_stride], + w * sizeof(uint8_t) ); + } + } + else + { + /* Preserve this field unfiltered */ + memcpy( &dst[plane][y*w], + &pv->yadif_ref[1][plane][y*ref_stride], + w * sizeof(uint8_t) ); + } } } + /* + * Finished this segment, let everyone know. + */ + hb_unlock( pv->yadif_complete_lock[segment] ); } + free( thread_args_v ); +} + + +/* + * threaded yadif - each thread deinterlaces a single segment of all + * three planes. Where a segment is defined as the frame divided by + * the number of CPUs. + * + * This function blocks until the frame is deinterlaced. + */ +static void yadif_filter( uint8_t ** dst, + int parity, + int tff, + hb_filter_private_t * pv ) +{ + + int segment; + + for( segment = 0; segment < pv->cpu_count; segment++ ) + { + /* + * Setup the work for this plane. + */ + pv->yadif_arguments[segment].parity = parity; + pv->yadif_arguments[segment].tff = tff; + pv->yadif_arguments[segment].dst = dst; + + /* + * Let the thread for this plane know that we've setup work + * for it by releasing the begin lock (ensuring that the + * complete lock is already locked so that we block when + * we try to lock it again below). + */ + hb_lock( pv->yadif_complete_lock[segment] ); + hb_unlock( pv->yadif_begin_lock[segment] ); + } + + /* + * Wait until all three threads have completed by trying to get + * the complete lock that we locked earlier for each thread, which + * will block until that thread has completed the work on that + * plane. + */ + for( segment = 0; segment < pv->cpu_count; segment++ ) + { + hb_lock( pv->yadif_complete_lock[segment] ); + hb_unlock( pv->yadif_complete_lock[segment] ); + } + + /* + * Entire frame is now deinterlaced. + */ } static void mcdeint_filter( uint8_t ** dst, @@ -351,9 +531,8 @@ hb_filter_private_t * hb_deinterlace_init( int pix_fmt, pv->width[1] = pv->width[2] = width >> 1; pv->height[1] = pv->height[2] = height >> 1; - int buf_size = 3 * width * height / 2; - pv->buf_out[0] = hb_buffer_init( buf_size ); - pv->buf_out[1] = hb_buffer_init( buf_size ); + pv->buf_out[0] = hb_video_buffer_init( width, height ); + pv->buf_out[1] = hb_video_buffer_init( width, height ); pv->buf_settings = hb_buffer_init( 0 ); pv->yadif_ready = 0; @@ -372,6 +551,8 @@ hb_filter_private_t * hb_deinterlace_init( int pix_fmt, &pv->mcdeint_qp ); } + pv->cpu_count = hb_get_cpu_count(); + /* Allocate yadif specific buffers */ if( pv->yadif_mode >= 0 ) { @@ -389,6 +570,45 @@ hb_filter_private_t * hb_deinterlace_init( int pix_fmt, pv->yadif_ref[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w; } } + + /* + * Create yadif threads and locks. + */ + pv->yadif_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count ); + pv->yadif_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count ); + pv->yadif_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count ); + pv->yadif_arguments = malloc( sizeof( yadif_arguments_t ) * pv->cpu_count ); + + for( i = 0; i < pv->cpu_count; i++ ) + { + yadif_thread_arg_t *thread_args; + + thread_args = malloc( sizeof( yadif_thread_arg_t ) ); + + if( thread_args ) { + thread_args->pv = pv; + thread_args->segment = i; + + pv->yadif_begin_lock[i] = hb_lock_init(); + pv->yadif_complete_lock[i] = hb_lock_init(); + + /* + * Important to start off with the threads locked waiting + * on input. + */ + hb_lock( pv->yadif_begin_lock[i] ); + + pv->yadif_arguments[i].stop = 0; + pv->yadif_arguments[i].dst = NULL; + + pv->yadif_threads[i] = hb_thread_init( "yadif_filter_segment", + yadif_filter_thread, + thread_args, + HB_NORMAL_PRIORITY ); + } else { + hb_error( "Yadif could not create threads" ); + } + } } /* Allocate mcdeint specific buffers */ @@ -433,7 +653,7 @@ hb_filter_private_t * hb_deinterlace_init( int pix_fmt, avctx_enc->flags |= CODEC_FLAG_QPEL; } - avcodec_open(avctx_enc, enc); + hb_avcodec_open(avctx_enc, enc); } pv->mcdeint_frame = avcodec_alloc_frame(); @@ -478,6 +698,27 @@ void hb_deinterlace_close( hb_filter_private_t * pv ) *p = NULL; } } + + for( i = 0; i < pv->cpu_count; i++) + { + /* + * Tell each yadif thread to stop, and then cleanup. + */ + pv->yadif_arguments[i].stop = 1; + hb_unlock( pv->yadif_begin_lock[i] ); + + hb_thread_close( &pv->yadif_threads[i] ); + hb_lock_close( &pv->yadif_begin_lock[i] ); + hb_lock_close( &pv->yadif_complete_lock[i] ); + } + + /* + * free memory for yadif structs + */ + free( pv->yadif_threads ); + free( pv->yadif_begin_lock ); + free( pv->yadif_complete_lock ); + free( pv->yadif_arguments ); } /* Cleanup mcdeint specific buffers */ @@ -485,7 +726,7 @@ void hb_deinterlace_close( hb_filter_private_t * pv ) { if( pv->mcdeint_avctx_enc ) { - avcodec_close( pv->mcdeint_avctx_enc ); + hb_avcodec_close( pv->mcdeint_avctx_enc ); av_freep( &pv->mcdeint_avctx_enc ); } if( pv->mcdeint_outbuf )