X-Git-Url: http://git.osdn.jp/view?a=blobdiff_plain;f=libhb%2Fdecomb.c;h=df1838a7d1dd63d1730ccfb4a403e8084706334e;hb=533776bbad20db93fe964bc69975f108b2a30888;hp=1b18328d4f8a3cbc2ee28778d26ff58dd8130a20;hpb=be046c367de057ec624f155ff05cabb3bc8f99c2;p=handbrake-jp%2Fhandbrake-jp-git.git diff --git a/libhb/decomb.c b/libhb/decomb.c index 1b18328d..df1838a7 100644 --- a/libhb/decomb.c +++ b/libhb/decomb.c @@ -21,6 +21,18 @@ #define MIN3(a,b,c) MIN(MIN(a,b),c) #define MAX3(a,b,c) MAX(MAX(a,b),c) +typedef struct yadif_arguments_s { + uint8_t **dst; + int parity; + int tff; + int stop; + int is_combed; +} yadif_arguments_t; + +typedef struct decomb_arguments_s { + int stop; +} decomb_arguments_t; + struct hb_filter_private_s { int pix_fmt; @@ -62,6 +74,19 @@ struct hb_filter_private_s AVPicture pic_out; hb_buffer_t * buf_out[2]; hb_buffer_t * buf_settings; + + int cpu_count; + + hb_thread_t ** yadif_threads; // Threads for Yadif - one per CPU + hb_lock_t ** yadif_begin_lock; // Thread has work + hb_lock_t ** yadif_complete_lock; // Thread has completed work + yadif_arguments_t *yadif_arguments; // Arguments to thread for work + + hb_thread_t ** decomb_threads; // Threads for comb detection - one per CPU + hb_lock_t ** decomb_begin_lock; // Thread has work + hb_lock_t ** decomb_complete_lock; // Thread has completed work + decomb_arguments_t *decomb_arguments; // Arguments to thread for work + }; hb_filter_private_t * hb_decomb_init( int pix_fmt, @@ -81,7 +106,7 @@ void hb_decomb_close( hb_filter_private_t * pv ); hb_filter_object_t hb_filter_decomb = { FILTER_DECOMB, - "Deinterlaces selectively with yadif/mcdeint or lowpass5 blending", + "Deinterlaces selectively with yadif/mcdeint and lowpass5 blending", NULL, hb_decomb_init, hb_decomb_work, @@ -328,7 +353,7 @@ int check_combing_mask( hb_filter_private_t * pv ) } } -int tritical_detect_comb( hb_filter_private_t * pv ) +int detect_combed_segment( hb_filter_private_t * pv, int segment_start, int segment_stop ) { /* A mish-mash of various comb detection tricks picked up from neuron2's Decomb plugin for @@ -346,14 +371,22 @@ int tritical_detect_comb( hb_filter_private_t * pv ) int athresh_squared = athresh * athresh; int athresh6 = 6 *athresh; - /* One pas for Y, one pass for Cb, one pass for Cr */ + /* One pas for Y, one pass for U, one pass for V */ for( k = 0; k < 1; k++ ) { int ref_stride = pv->ref_stride[k]; width = pv->width[k]; height = pv->height[k]; - - for( y = 2; y < ( height - 2 ); y++ ) + + /* Comb detection has to start at y = 2 and end at + y = height - 2, because it needs to examine + 2 pixels above and 2 below the current pixel. */ + if( segment_start < 2 ) + segment_start = 2; + if( segment_stop > height - 2 ) + segment_stop = height - 2; + + for( y = segment_start; y < segment_stop; y++ ) { /* These are just to make the buffer locations easier to read. */ int back_2 = ( y - 2 )*ref_stride ; @@ -418,8 +451,8 @@ int tritical_detect_comb( hb_filter_private_t * pv ) if( motion || ( pv->yadif_deinterlaced_frames==0 && pv->blend_deinterlaced_frames==0 && pv->unfiltered_frames==0) ) { - /*That means it's time for the spatial check. - We've got several options here. */ + /* That means it's time for the spatial check. + We've got several options here. */ if( spatial_metric == 0 ) { /* Simple 32detect style comb detection */ @@ -475,6 +508,105 @@ int tritical_detect_comb( hb_filter_private_t * pv ) } } } +} + +typedef struct decomb_thread_arg_s { + hb_filter_private_t *pv; + int segment; +} decomb_thread_arg_t; + +/* + * comb detect this segment of all three planes in a single thread. + */ +void decomb_filter_thread( void *thread_args_v ) +{ + decomb_arguments_t *decomb_work = NULL; + hb_filter_private_t * pv; + int run = 1; + int segment, segment_start, segment_stop, plane; + decomb_thread_arg_t *thread_args = thread_args_v; + + pv = thread_args->pv; + segment = thread_args->segment; + + hb_log("decomb thread started for segment %d", segment); + + while( run ) + { + /* + * Wait here until there is work to do. hb_lock() blocks until + * render releases it to say that there is more work to do. + */ + hb_lock( pv->decomb_begin_lock[segment] ); + + decomb_work = &pv->decomb_arguments[segment]; + + if( decomb_work->stop ) + { + /* + * No more work to do, exit this thread. + */ + run = 0; + continue; + } + + /* + * Process segment (for now just from luma) + */ + for( plane = 0; plane < 1; plane++) + { + + int w = pv->width[plane]; + int h = pv->height[plane]; + int ref_stride = pv->ref_stride[plane]; + segment_start = ( h / pv->cpu_count ) * segment; + if( segment == pv->cpu_count - 1 ) + { + /* + * Final segment + */ + segment_stop = h; + } else { + segment_stop = ( h / pv->cpu_count ) * ( segment + 1 ); + } + + detect_combed_segment( pv, segment_start, segment_stop ); + } + /* + * Finished this segment, let everyone know. + */ + hb_unlock( pv->decomb_complete_lock[segment] ); + } + free( thread_args_v ); +} + +int comb_segmenter( hb_filter_private_t * pv ) +{ + int segment; + + for( segment = 0; segment < pv->cpu_count; segment++ ) + { + /* + * Let the thread for this plane know that we've setup work + * for it by releasing the begin lock (ensuring that the + * complete lock is already locked so that we block when + * we try to lock it again below). + */ + hb_lock( pv->decomb_complete_lock[segment] ); + hb_unlock( pv->decomb_begin_lock[segment] ); + } + + /* + * Wait until all three threads have completed by trying to get + * the complete lock that we locked earlier for each thread, which + * will block until that thread has completed the work on that + * plane. + */ + for( segment = 0; segment < pv->cpu_count; segment++ ) + { + hb_lock( pv->decomb_complete_lock[segment] ); + hb_unlock( pv->decomb_complete_lock[segment] ); + } return check_combing_mask( pv ); } @@ -488,9 +620,12 @@ static void yadif_filter_line( uint8_t *dst, int y, hb_filter_private_t * pv ) { + /* While prev and next point to the previous and next frames, + prev2 and next2 will shift depending on the parity, usually 1. + They are the previous and next fields, the fields temporally adjacent + to the other field in the current frame--the one not being filtered. */ uint8_t *prev2 = parity ? prev : cur ; uint8_t *next2 = parity ? cur : next; - int w = pv->width[plane]; int refs = pv->ref_stride[plane]; int x; @@ -499,16 +634,16 @@ static void yadif_filter_line( uint8_t *dst, { /* Pixel above*/ int c = cur[-refs]; - /* Temporal average -- the current pixel location in the previous and next fields */ + /* Temporal average: the current location in the adjacent fields */ int d = (prev2[0] + next2[0])>>1; /* Pixel below */ int e = cur[+refs]; - /* How the current pixel changes from the field before to the field after */ + /* How the current pixel changes between the adjacent fields */ int temporal_diff0 = ABS(prev2[0] - next2[0]); - /* The average of how much the pixels above and below change from the field before to now. */ + /* The average of how much the pixels above and below change from the frame before to now. */ int temporal_diff1 = ( ABS(prev[-refs] - cur[-refs]) + ABS(prev[+refs] - cur[+refs]) ) >> 1; - /* The average of how much the pixels above and below change from now to the next field. */ + /* The average of how much the pixels above and below change from now to the next frame. */ int temporal_diff2 = ( ABS(next[-refs] - cur[-refs]) + ABS(next[+refs] - cur[+refs]) ) >> 1; /* For the actual difference, use the largest of the previous average diffs. */ int diff = MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2); @@ -566,8 +701,8 @@ static void yadif_filter_line( uint8_t *dst, YADIF_CHECK(-1) YADIF_CHECK(-2) }} }} YADIF_CHECK( 1) YADIF_CHECK( 2) }} }} - /* Temporally adjust the spatial prediction by comparing - against fields in the previous and next frames. */ + /* Temporally adjust the spatial prediction by + comparing against lines in the adjacent fields. */ int b = (prev2[-2*refs] + next2[-2*refs])>>1; int f = (prev2[+2*refs] + next2[+2*refs])>>1; @@ -596,13 +731,124 @@ static void yadif_filter_line( uint8_t *dst, } } +typedef struct yadif_thread_arg_s { + hb_filter_private_t *pv; + int segment; +} yadif_thread_arg_t; + +/* + * deinterlace this segment of all three planes in a single thread. + */ +void yadif_decomb_filter_thread( void *thread_args_v ) +{ + yadif_arguments_t *yadif_work = NULL; + hb_filter_private_t * pv; + int run = 1; + int plane; + int segment, segment_start, segment_stop; + yadif_thread_arg_t *thread_args = thread_args_v; + uint8_t **dst; + int parity, tff, y, w, h, ref_stride, is_combed; + + pv = thread_args->pv; + segment = thread_args->segment; + + hb_log("yadif thread started for segment %d", segment); + + while( run ) + { + /* + * Wait here until there is work to do. hb_lock() blocks until + * render releases it to say that there is more work to do. + */ + hb_lock( pv->yadif_begin_lock[segment] ); + + yadif_work = &pv->yadif_arguments[segment]; + + if( yadif_work->stop ) + { + /* + * No more work to do, exit this thread. + */ + run = 0; + continue; + } + + if( yadif_work->dst == NULL ) + { + hb_error( "thread started when no work available" ); + hb_snooze(500); + continue; + } + + is_combed = pv->yadif_arguments[segment].is_combed; + + /* + * Process all three planes, but only this segment of it. + */ + for( plane = 0; plane < 3; plane++) + { + + dst = yadif_work->dst; + parity = yadif_work->parity; + tff = yadif_work->tff; + w = pv->width[plane]; + h = pv->height[plane]; + ref_stride = pv->ref_stride[plane]; + segment_start = ( h / pv->cpu_count ) * segment; + if( segment == pv->cpu_count - 1 ) + { + /* + * Final segment + */ + segment_stop = h; + } else { + segment_stop = ( h / pv->cpu_count ) * ( segment + 1 ); + } + + for( y = segment_start; y < segment_stop; y++ ) + { + if( ( pv->mode == 4 && is_combed ) || is_combed == 2 ) + { + uint8_t *prev = &pv->ref[0][plane][y*ref_stride]; + uint8_t *cur = &pv->ref[1][plane][y*ref_stride]; + uint8_t *next = &pv->ref[2][plane][y*ref_stride]; + uint8_t *dst2 = &dst[plane][y*w]; + + blend_filter_line( dst2, cur, plane, y, pv ); + } + else if( (y ^ parity) & 1 && is_combed == 1 ) + { + uint8_t *prev = &pv->ref[0][plane][y*ref_stride]; + uint8_t *cur = &pv->ref[1][plane][y*ref_stride]; + uint8_t *next = &pv->ref[2][plane][y*ref_stride]; + uint8_t *dst2 = &dst[plane][y*w]; + + yadif_filter_line( dst2, prev, cur, next, plane, parity ^ tff, y, pv ); + } + else + { + memcpy( &dst[plane][y*w], + &pv->ref[1][plane][y*ref_stride], + w * sizeof(uint8_t) ); + } + } + } + /* + * Finished this segment, let everyone know. + */ + hb_unlock( pv->yadif_complete_lock[segment] ); + } + free( thread_args_v ); +} + static void yadif_filter( uint8_t ** dst, int parity, int tff, hb_filter_private_t * pv ) { - int is_combed = tritical_detect_comb( pv ); + int is_combed = comb_segmenter( pv ); if( is_combed == 1 ) { @@ -616,40 +862,65 @@ static void yadif_filter( uint8_t ** dst, { pv->unfiltered_frames++; } - - int i; - for( i = 0; i < 3; i++ ) + + if( is_combed ) { - int w = pv->width[i]; - int h = pv->height[i]; - int ref_stride = pv->ref_stride[i]; - - int y; - for( y = 0; y < h; y++ ) - { - if( ( pv->mode == 4 && is_combed ) || is_combed == 2 ) - { - uint8_t *prev = &pv->ref[0][i][y*ref_stride]; - uint8_t *cur = &pv->ref[1][i][y*ref_stride]; - uint8_t *next = &pv->ref[2][i][y*ref_stride]; - uint8_t *dst2 = &dst[i][y*w]; + int segment; + + for( segment = 0; segment < pv->cpu_count; segment++ ) + { + /* + * Setup the work for this plane. + */ + pv->yadif_arguments[segment].parity = parity; + pv->yadif_arguments[segment].tff = tff; + pv->yadif_arguments[segment].dst = dst; + pv->yadif_arguments[segment].is_combed = is_combed; + + /* + * Let the thread for this plane know that we've setup work + * for it by releasing the begin lock (ensuring that the + * complete lock is already locked so that we block when + * we try to lock it again below). + */ + hb_lock( pv->yadif_complete_lock[segment] ); + hb_unlock( pv->yadif_begin_lock[segment] ); + } - blend_filter_line( dst2, cur, i, y, pv ); - } - else if( (y ^ parity) & 1 && is_combed == 1 ) - { - uint8_t *prev = &pv->ref[0][i][y*ref_stride]; - uint8_t *cur = &pv->ref[1][i][y*ref_stride]; - uint8_t *next = &pv->ref[2][i][y*ref_stride]; - uint8_t *dst2 = &dst[i][y*w]; + /* + * Wait until all three threads have completed by trying to get + * the complete lock that we locked earlier for each thread, which + * will block until that thread has completed the work on that + * plane. + */ + for( segment = 0; segment < pv->cpu_count; segment++ ) + { + hb_lock( pv->yadif_complete_lock[segment] ); + hb_unlock( pv->yadif_complete_lock[segment] ); + } - yadif_filter_line( dst2, prev, cur, next, i, parity ^ tff, y, pv ); - } - else + /* + * Entire frame is now deinterlaced. + */ + } + else + { + /* Just passing through... */ + int i; + for( i = 0; i < 3; i++ ) + { + uint8_t * ref = pv->ref[1][i]; + uint8_t * dest = dst[i]; + + int w = pv->width[i]; + int ref_stride = pv->ref_stride[i]; + + int y; + for( y = 0; y < pv->height[i]; y++ ) { - memcpy( &dst[i][y*w], - &pv->ref[1][i][y*ref_stride], - w * sizeof(uint8_t) ); + memcpy(dest, ref, w); + dest += w; + ref += ref_stride; } } } @@ -822,6 +1093,9 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt, &pv->block_width, &pv->block_height ); } + + pv->cpu_count = hb_get_cpu_count(); + if( pv->mode == 2 || pv->mode == 3 ) { @@ -851,9 +1125,94 @@ hb_filter_private_t * hb_decomb_init( int pix_fmt, int w = ((pv->width[0] + 31) & (~31))>>is_chroma; int h = ((pv->height[0]+6+ 31) & (~31))>>is_chroma; - pv->mask[i] = malloc( w*h*sizeof(uint8_t) ) + 3*w; + pv->mask[i] = calloc( 1, w*h*sizeof(uint8_t) ) + 3*w; } + /* + * Create yadif threads and locks. + */ + pv->yadif_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count ); + pv->yadif_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count ); + pv->yadif_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count ); + pv->yadif_arguments = malloc( sizeof( yadif_arguments_t ) * pv->cpu_count ); + + for( i = 0; i < pv->cpu_count; i++ ) + { + yadif_thread_arg_t *thread_args; + + thread_args = malloc( sizeof( yadif_thread_arg_t ) ); + + if( thread_args ) + { + thread_args->pv = pv; + thread_args->segment = i; + + pv->yadif_begin_lock[i] = hb_lock_init(); + pv->yadif_complete_lock[i] = hb_lock_init(); + + /* + * Important to start off with the threads locked waiting + * on input. + */ + hb_lock( pv->yadif_begin_lock[i] ); + + pv->yadif_arguments[i].stop = 0; + pv->yadif_arguments[i].dst = NULL; + + pv->yadif_threads[i] = hb_thread_init( "yadif_filter_segment", + yadif_decomb_filter_thread, + thread_args, + HB_NORMAL_PRIORITY ); + } + else + { + hb_error( "yadif could not create threads" ); + } + } + + /* + * Create decomb threads and locks. + */ + pv->decomb_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count ); + pv->decomb_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count ); + pv->decomb_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count ); + pv->decomb_arguments = malloc( sizeof( decomb_arguments_t ) * pv->cpu_count ); + + for( i = 0; i < pv->cpu_count; i++ ) + { + decomb_thread_arg_t *decomb_thread_args; + + decomb_thread_args = malloc( sizeof( decomb_thread_arg_t ) ); + + if( decomb_thread_args ) + { + decomb_thread_args->pv = pv; + decomb_thread_args->segment = i; + + pv->decomb_begin_lock[i] = hb_lock_init(); + pv->decomb_complete_lock[i] = hb_lock_init(); + + /* + * Important to start off with the threads locked waiting + * on input. + */ + hb_lock( pv->decomb_begin_lock[i] ); + + pv->decomb_arguments[i].stop = 0; + + pv->decomb_threads[i] = hb_thread_init( "decomb_filter_segment", + decomb_filter_thread, + decomb_thread_args, + HB_NORMAL_PRIORITY ); + } + else + { + hb_error( "decomb could not create threads" ); + } + } + + + /* Allocate mcdeint specific buffers */ if( pv->mcdeint_mode >= 0 ) { @@ -953,6 +1312,48 @@ void hb_decomb_close( hb_filter_private_t * pv ) } } + for( i = 0; i < pv->cpu_count; i++) + { + /* + * Tell each yadif thread to stop, and then cleanup. + */ + pv->yadif_arguments[i].stop = 1; + hb_unlock( pv->yadif_begin_lock[i] ); + + hb_thread_close( &pv->yadif_threads[i] ); + hb_lock_close( &pv->yadif_begin_lock[i] ); + hb_lock_close( &pv->yadif_complete_lock[i] ); + } + + /* + * free memory for yadif structs + */ + free( pv->yadif_threads ); + free( pv->yadif_begin_lock ); + free( pv->yadif_complete_lock ); + free( pv->yadif_arguments ); + + for( i = 0; i < pv->cpu_count; i++) + { + /* + * Tell each decomb thread to stop, and then cleanup. + */ + pv->decomb_arguments[i].stop = 1; + hb_unlock( pv->decomb_begin_lock[i] ); + + hb_thread_close( &pv->decomb_threads[i] ); + hb_lock_close( &pv->decomb_begin_lock[i] ); + hb_lock_close( &pv->decomb_complete_lock[i] ); + } + + /* + * free memory for decomb structs + */ + free( pv->decomb_threads ); + free( pv->decomb_begin_lock ); + free( pv->decomb_complete_lock ); + free( pv->decomb_arguments ); + /* Cleanup mcdeint specific buffers */ if( pv->mcdeint_mode >= 0 ) {