2 Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "mpeg2dec/mpeg2.h"
23 #define SUPPRESS_AV_LOG
25 #define YADIF_MODE_DEFAULT -1
26 #define YADIF_PARITY_DEFAULT -1
28 #define MCDEINT_MODE_DEFAULT -1
29 #define MCDEINT_QP_DEFAULT 1
31 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
32 #define MIN3(a,b,c) MIN(MIN(a,b),c)
33 #define MAX3(a,b,c) MAX(MAX(a,b),c)
35 typedef struct yadif_arguments_s {
42 struct hb_filter_private_s
52 uint8_t * yadif_ref[4][3];
53 int yadif_ref_stride[3];
57 hb_thread_t ** yadif_threads; // Threads for Yadif - one per CPU
58 hb_lock_t ** yadif_begin_lock; // Thread has work
59 hb_lock_t ** yadif_complete_lock; // Thread has completed work
60 yadif_arguments_t *yadif_arguments; // Arguments to thread for work
65 int mcdeint_outbuf_size;
66 uint8_t * mcdeint_outbuf;
67 AVCodecContext * mcdeint_avctx_enc;
68 AVFrame * mcdeint_frame;
69 AVFrame * mcdeint_frame_dec;
73 hb_buffer_t * buf_out[2];
74 hb_buffer_t * buf_settings;
77 hb_filter_private_t * hb_deinterlace_init( int pix_fmt,
82 int hb_deinterlace_work( hb_buffer_t * buf_in,
83 hb_buffer_t ** buf_out,
87 hb_filter_private_t * pv );
89 void hb_deinterlace_close( hb_filter_private_t * pv );
91 hb_filter_object_t hb_filter_deinterlace =
94 "Deinterlace (ffmpeg or yadif/mcdeint)",
102 static void yadif_store_ref( const uint8_t ** pic,
103 hb_filter_private_t * pv )
105 memcpy( pv->yadif_ref[3],
107 sizeof(uint8_t *)*3 );
109 memmove( pv->yadif_ref[0],
111 sizeof(uint8_t *)*3*3 );
114 for( i = 0; i < 3; i++ )
116 const uint8_t * src = pic[i];
117 uint8_t * ref = pv->yadif_ref[2][i];
119 int w = pv->width[i];
120 int ref_stride = pv->yadif_ref_stride[i];
123 for( y = 0; y < pv->height[i]; y++ )
126 src = (uint8_t*)src + w;
127 ref = (uint8_t*)ref + ref_stride;
132 static void yadif_filter_line( uint8_t *dst,
138 hb_filter_private_t * pv )
140 uint8_t *prev2 = parity ? prev : cur ;
141 uint8_t *next2 = parity ? cur : next;
143 int w = pv->width[plane];
144 int refs = pv->yadif_ref_stride[plane];
147 for( x = 0; x < w; x++)
150 int d = (prev2[0] + next2[0])>>1;
152 int temporal_diff0 = ABS(prev2[0] - next2[0]);
153 int temporal_diff1 = ( ABS(prev[-refs] - c) + ABS(prev[+refs] - e) ) >> 1;
154 int temporal_diff2 = ( ABS(next[-refs] - c) + ABS(next[+refs] - e) ) >> 1;
155 int diff = MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
156 int spatial_pred = (c+e)>>1;
157 int spatial_score = ABS(cur[-refs-1] - cur[+refs-1]) + ABS(c-e) +
158 ABS(cur[-refs+1] - cur[+refs+1]) - 1;
160 #define YADIF_CHECK(j)\
161 { int score = ABS(cur[-refs-1+j] - cur[+refs-1-j])\
162 + ABS(cur[-refs +j] - cur[+refs -j])\
163 + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\
164 if( score < spatial_score ){\
165 spatial_score = score;\
166 spatial_pred = (cur[-refs +j] + cur[+refs -j])>>1;\
168 YADIF_CHECK(-1) YADIF_CHECK(-2) }} }}
169 YADIF_CHECK( 1) YADIF_CHECK( 2) }} }}
171 if( pv->yadif_mode < 2 )
173 int b = (prev2[-2*refs] + next2[-2*refs])>>1;
174 int f = (prev2[+2*refs] + next2[+2*refs])>>1;
176 int max = MAX3(d-e, d-c, MIN(b-c, f-e));
177 int min = MIN3(d-e, d-c, MAX(b-c, f-e));
179 diff = MAX3( diff, min, -max );
182 if( spatial_pred > d + diff )
184 spatial_pred = d + diff;
186 else if( spatial_pred < d - diff )
188 spatial_pred = d - diff;
191 dst[0] = spatial_pred;
202 typedef struct yadif_thread_arg_s {
203 hb_filter_private_t *pv;
205 } yadif_thread_arg_t;
208 * deinterlace this segment of all three planes in a single thread.
210 void yadif_filter_thread( void *thread_args_v )
212 yadif_arguments_t *yadif_work = NULL;
213 hb_filter_private_t * pv;
216 int segment, segment_start, segment_stop;
217 yadif_thread_arg_t *thread_args = thread_args_v;
219 int parity, tff, y, w, h, ref_stride, penultimate, ultimate;
222 pv = thread_args->pv;
223 segment = thread_args->segment;
225 hb_log("Yadif Deinterlace thread started for segment %d", segment);
230 * Wait here until there is work to do. hb_lock() blocks until
231 * render releases it to say that there is more work to do.
233 hb_lock( pv->yadif_begin_lock[segment] );
235 yadif_work = &pv->yadif_arguments[segment];
237 if( yadif_work->stop )
240 * No more work to do, exit this thread.
246 if( yadif_work->dst == NULL )
248 hb_error( "Thread started when no work available" );
254 * Process all three planes, but only this segment of it.
256 for( plane = 0; plane < 3; plane++)
259 dst = yadif_work->dst;
260 parity = yadif_work->parity;
261 tff = yadif_work->tff;
262 w = pv->width[plane];
263 h = pv->height[plane];
266 ref_stride = pv->yadif_ref_stride[plane];
267 segment_start = ( h / pv->cpu_count ) * segment;
268 if( segment == pv->cpu_count - 1 )
275 segment_stop = ( h / pv->cpu_count ) * ( segment + 1 );
278 for( y = segment_start; y < segment_stop; y++ )
280 if( ( ( y ^ parity ) & 1 ) )
282 /* This is the bottom field when TFF and vice-versa.
283 It's the field that gets filtered. Because yadif
284 needs 2 lines above and below the one being filtered,
285 we need to mirror the edges. When TFF, this means
286 replacing the 2nd line with a copy of the 1st,
287 and the last with the second-to-last. */
288 if( y > 1 && y < ( h -2 ) )
290 /* This isn't the top or bottom, proceed as normal to yadif. */
291 uint8_t *prev = &pv->yadif_ref[0][plane][y*ref_stride];
292 uint8_t *cur = &pv->yadif_ref[1][plane][y*ref_stride];
293 uint8_t *next = &pv->yadif_ref[2][plane][y*ref_stride];
294 uint8_t *dst2 = &dst[plane][y*w];
296 yadif_filter_line( dst2,
306 /* BFF, so y0 = y1 */
307 memcpy( &dst[plane][y*w],
308 &pv->yadif_ref[1][plane][1*ref_stride],
309 w * sizeof(uint8_t) );
313 /* TFF, so y1 = y0 */
314 memcpy( &dst[plane][y*w],
315 &pv->yadif_ref[1][plane][0],
316 w * sizeof(uint8_t) );
318 else if( y == penultimate )
320 /* BFF, so penultimate y = ultimate y */
321 memcpy( &dst[plane][y*w],
322 &pv->yadif_ref[1][plane][ultimate*ref_stride],
323 w * sizeof(uint8_t) );
325 else if( y == ultimate )
327 /* TFF, so ultimate y = penultimate y */
328 memcpy( &dst[plane][y*w],
329 &pv->yadif_ref[1][plane][penultimate*ref_stride],
330 w * sizeof(uint8_t) );
335 /* Preserve this field unfiltered */
336 memcpy( &dst[plane][y*w],
337 &pv->yadif_ref[1][plane][y*ref_stride],
338 w * sizeof(uint8_t) );
343 * Finished this segment, let everyone know.
345 hb_unlock( pv->yadif_complete_lock[segment] );
347 free( thread_args_v );
352 * threaded yadif - each thread deinterlaces a single segment of all
353 * three planes. Where a segment is defined as the frame divided by
354 * the number of CPUs.
356 * This function blocks until the frame is deinterlaced.
358 static void yadif_filter( uint8_t ** dst,
361 hb_filter_private_t * pv )
366 for( segment = 0; segment < pv->cpu_count; segment++ )
369 * Setup the work for this plane.
371 pv->yadif_arguments[segment].parity = parity;
372 pv->yadif_arguments[segment].tff = tff;
373 pv->yadif_arguments[segment].dst = dst;
376 * Let the thread for this plane know that we've setup work
377 * for it by releasing the begin lock (ensuring that the
378 * complete lock is already locked so that we block when
379 * we try to lock it again below).
381 hb_lock( pv->yadif_complete_lock[segment] );
382 hb_unlock( pv->yadif_begin_lock[segment] );
386 * Wait until all three threads have completed by trying to get
387 * the complete lock that we locked earlier for each thread, which
388 * will block until that thread has completed the work on that
391 for( segment = 0; segment < pv->cpu_count; segment++ )
393 hb_lock( pv->yadif_complete_lock[segment] );
394 hb_unlock( pv->yadif_complete_lock[segment] );
398 * Entire frame is now deinterlaced.
402 static void mcdeint_filter( uint8_t ** dst,
405 hb_filter_private_t * pv )
410 #ifdef SUPPRESS_AV_LOG
411 /* TODO: temporarily change log level to suppress obnoxious debug output */
412 int loglevel = av_log_get_level();
413 av_log_set_level( AV_LOG_QUIET );
418 pv->mcdeint_frame->data[i] = src[i];
419 pv->mcdeint_frame->linesize[i] = pv->width[i];
421 pv->mcdeint_avctx_enc->me_cmp = FF_CMP_SAD;
422 pv->mcdeint_avctx_enc->me_sub_cmp = FF_CMP_SAD;
423 pv->mcdeint_frame->quality = pv->mcdeint_qp * FF_QP2LAMBDA;
425 out_size = avcodec_encode_video( pv->mcdeint_avctx_enc,
427 pv->mcdeint_outbuf_size,
430 pv->mcdeint_frame_dec = pv->mcdeint_avctx_enc->coded_frame;
432 for( i = 0; i < 3; i++ )
434 int w = pv->width[i];
435 int h = pv->height[i];
436 int fils = pv->mcdeint_frame_dec->linesize[i];
437 int srcs = pv->width[i];
439 for( y = 0; y < h; y++ )
441 if( (y ^ parity) & 1 )
443 for( x = 0; x < w; x++ )
445 if( (x-2)+(y-1)*w >= 0 && (x+2)+(y+1)*w < w*h )
448 &pv->mcdeint_frame_dec->data[i][x + y*fils];
449 uint8_t * srcp = &src[i][x + y*srcs];
451 int diff0 = filp[-fils] - srcp[-srcs];
452 int diff1 = filp[+fils] - srcp[+srcs];
455 ABS(srcp[-srcs-1] - srcp[+srcs-1])
456 + ABS(srcp[-srcs ] - srcp[+srcs ])
457 + ABS(srcp[-srcs+1] - srcp[+srcs+1]) - 1;
461 #define MCDEINT_CHECK(j)\
462 { int score = ABS(srcp[-srcs-1+j] - srcp[+srcs-1-j])\
463 + ABS(srcp[-srcs +j] - srcp[+srcs -j])\
464 + ABS(srcp[-srcs+1+j] - srcp[+srcs+1-j]);\
465 if( score < spatial_score ) {\
466 spatial_score = score;\
467 diff0 = filp[-fils+j] - srcp[-srcs+j];\
468 diff1 = filp[+fils-j] - srcp[+srcs-j];
470 MCDEINT_CHECK(-1) MCDEINT_CHECK(-2) }} }}
471 MCDEINT_CHECK( 1) MCDEINT_CHECK( 2) }} }}
473 if(diff0 + diff1 > 0)
475 temp -= (diff0 + diff1 -
476 ABS( ABS(diff0) - ABS(diff1) ) / 2) / 2;
480 temp -= (diff0 + diff1 +
481 ABS( ABS(diff0) - ABS(diff1) ) / 2) / 2;
484 filp[0] = dst[i][x + y*w] =
485 temp > 255U ? ~(temp>>31) : temp;
490 pv->mcdeint_frame_dec->data[i][x + y*fils];
496 for( y = 0; y < h; y++ )
498 if( !((y ^ parity) & 1) )
500 for( x = 0; x < w; x++ )
502 pv->mcdeint_frame_dec->data[i][x + y*fils] =
503 dst[i][x + y*w]= src[i][x + y*srcs];
509 #ifdef SUPPRESS_AV_LOG
510 /* TODO: restore previous log level */
511 av_log_set_level(loglevel);
515 hb_filter_private_t * hb_deinterlace_init( int pix_fmt,
520 if( pix_fmt != PIX_FMT_YUV420P )
525 hb_filter_private_t * pv = calloc( 1, sizeof(struct hb_filter_private_s) );
527 pv->pix_fmt = pix_fmt;
529 pv->width[0] = width;
530 pv->height[0] = height;
531 pv->width[1] = pv->width[2] = width >> 1;
532 pv->height[1] = pv->height[2] = height >> 1;
534 pv->buf_out[0] = hb_video_buffer_init( width, height );
535 pv->buf_out[1] = hb_video_buffer_init( width, height );
536 pv->buf_settings = hb_buffer_init( 0 );
539 pv->yadif_mode = YADIF_MODE_DEFAULT;
540 pv->yadif_parity = YADIF_PARITY_DEFAULT;
542 pv->mcdeint_mode = MCDEINT_MODE_DEFAULT;
543 pv->mcdeint_qp = MCDEINT_QP_DEFAULT;
547 sscanf( settings, "%d:%d:%d:%d",
554 pv->cpu_count = hb_get_cpu_count();
556 /* Allocate yadif specific buffers */
557 if( pv->yadif_mode >= 0 )
560 for( i = 0; i < 3; i++ )
563 int w = ((width + 31) & (~31))>>is_chroma;
564 int h = ((height+6+ 31) & (~31))>>is_chroma;
566 pv->yadif_ref_stride[i] = w;
568 for( j = 0; j < 3; j++ )
570 pv->yadif_ref[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w;
575 * Create yadif threads and locks.
577 pv->yadif_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count );
578 pv->yadif_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
579 pv->yadif_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
580 pv->yadif_arguments = malloc( sizeof( yadif_arguments_t ) * pv->cpu_count );
582 for( i = 0; i < pv->cpu_count; i++ )
584 yadif_thread_arg_t *thread_args;
586 thread_args = malloc( sizeof( yadif_thread_arg_t ) );
589 thread_args->pv = pv;
590 thread_args->segment = i;
592 pv->yadif_begin_lock[i] = hb_lock_init();
593 pv->yadif_complete_lock[i] = hb_lock_init();
596 * Important to start off with the threads locked waiting
599 hb_lock( pv->yadif_begin_lock[i] );
601 pv->yadif_arguments[i].stop = 0;
602 pv->yadif_arguments[i].dst = NULL;
604 pv->yadif_threads[i] = hb_thread_init( "yadif_filter_segment",
607 HB_NORMAL_PRIORITY );
609 hb_error( "Yadif could not create threads" );
614 /* Allocate mcdeint specific buffers */
615 if( pv->mcdeint_mode >= 0 )
618 avcodec_register_all();
620 AVCodec * enc = avcodec_find_encoder( CODEC_ID_SNOW );
623 for (i = 0; i < 3; i++ )
625 AVCodecContext * avctx_enc;
627 avctx_enc = pv->mcdeint_avctx_enc = avcodec_alloc_context();
629 avctx_enc->width = width;
630 avctx_enc->height = height;
631 avctx_enc->time_base = (AVRational){1,25}; // meaningless
632 avctx_enc->gop_size = 300;
633 avctx_enc->max_b_frames = 0;
634 avctx_enc->pix_fmt = PIX_FMT_YUV420P;
635 avctx_enc->flags = CODEC_FLAG_QSCALE | CODEC_FLAG_LOW_DELAY;
636 avctx_enc->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
637 avctx_enc->global_quality = 1;
638 avctx_enc->flags2 = CODEC_FLAG2_MEMC_ONLY;
639 avctx_enc->me_cmp = FF_CMP_SAD; //SSE;
640 avctx_enc->me_sub_cmp = FF_CMP_SAD; //SSE;
641 avctx_enc->mb_cmp = FF_CMP_SSE;
643 switch( pv->mcdeint_mode )
648 avctx_enc->me_method = ME_UMH;
650 avctx_enc->flags |= CODEC_FLAG_4MV;
651 avctx_enc->dia_size =2;
653 avctx_enc->flags |= CODEC_FLAG_QPEL;
656 hb_avcodec_open(avctx_enc, enc);
659 pv->mcdeint_frame = avcodec_alloc_frame();
660 pv->mcdeint_outbuf_size = width * height * 10;
661 pv->mcdeint_outbuf = malloc( pv->mcdeint_outbuf_size );
667 void hb_deinterlace_close( hb_filter_private_t * pv )
674 /* Cleanup frame buffers */
677 hb_buffer_close( &pv->buf_out[0] );
681 hb_buffer_close( &pv->buf_out[1] );
683 if (pv->buf_settings )
685 hb_buffer_close( &pv->buf_settings );
688 /* Cleanup yadif specific buffers */
689 if( pv->yadif_mode >= 0 )
692 for( i = 0; i<3*3; i++ )
694 uint8_t **p = &pv->yadif_ref[i%3][i/3];
697 free( *p - 3*pv->yadif_ref_stride[i/3] );
702 for( i = 0; i < pv->cpu_count; i++)
705 * Tell each yadif thread to stop, and then cleanup.
707 pv->yadif_arguments[i].stop = 1;
708 hb_unlock( pv->yadif_begin_lock[i] );
710 hb_thread_close( &pv->yadif_threads[i] );
711 hb_lock_close( &pv->yadif_begin_lock[i] );
712 hb_lock_close( &pv->yadif_complete_lock[i] );
716 * free memory for yadif structs
718 free( pv->yadif_threads );
719 free( pv->yadif_begin_lock );
720 free( pv->yadif_complete_lock );
721 free( pv->yadif_arguments );
724 /* Cleanup mcdeint specific buffers */
725 if( pv->mcdeint_mode >= 0 )
727 if( pv->mcdeint_avctx_enc )
729 hb_avcodec_close( pv->mcdeint_avctx_enc );
730 av_freep( &pv->mcdeint_avctx_enc );
732 if( pv->mcdeint_outbuf )
734 free( pv->mcdeint_outbuf );
741 int hb_deinterlace_work( hb_buffer_t * buf_in,
742 hb_buffer_t ** buf_out,
746 hb_filter_private_t * pv )
749 pix_fmt != pv->pix_fmt ||
750 width != pv->width[0] ||
751 height != pv->height[0] )
753 return FILTER_FAILED;
756 avpicture_fill( &pv->pic_in, buf_in->data,
757 pix_fmt, width, height );
759 /* Use libavcodec deinterlace if yadif_mode < 0 */
760 if( pv->yadif_mode < 0 )
762 avpicture_fill( &pv->pic_out, pv->buf_out[0]->data,
763 pix_fmt, width, height );
765 avpicture_deinterlace( &pv->pic_out, &pv->pic_in,
766 pix_fmt, width, height );
768 hb_buffer_copy_settings( pv->buf_out[0], buf_in );
770 *buf_out = pv->buf_out[0];
775 /* Determine if top-field first layout */
777 if( pv->yadif_parity < 0 )
779 tff = !!(buf_in->flags & PIC_FLAG_TOP_FIELD_FIRST);
783 tff = (pv->yadif_parity & 1) ^ 1;
786 /* Store current frame in yadif cache */
787 yadif_store_ref( (const uint8_t**)pv->pic_in.data, pv );
789 /* If yadif is not ready, store another ref and return FILTER_DELAY */
790 if( pv->yadif_ready == 0 )
792 yadif_store_ref( (const uint8_t**)pv->pic_in.data, pv );
794 hb_buffer_copy_settings( pv->buf_settings, buf_in );
796 /* don't let 'work_loop' send a chapter mark upstream */
797 buf_in->new_chap = 0;
804 /* Perform yadif and mcdeint filtering */
806 for( frame = 0; frame <= (pv->yadif_mode & 1); frame++ )
808 int parity = frame ^ tff ^ 1;
810 avpicture_fill( &pv->pic_out, pv->buf_out[!(frame^1)]->data,
811 pix_fmt, width, height );
813 yadif_filter( pv->pic_out.data, parity, tff, pv );
815 if( pv->mcdeint_mode >= 0 )
817 avpicture_fill( &pv->pic_in, pv->buf_out[(frame^1)]->data,
818 pix_fmt, width, height );
820 mcdeint_filter( pv->pic_in.data, pv->pic_out.data, parity, pv );
822 *buf_out = pv->buf_out[ (frame^1)];
826 *buf_out = pv->buf_out[!(frame^1)];
830 /* Copy buffered settings to output buffer settings */
831 hb_buffer_copy_settings( *buf_out, pv->buf_settings );
833 /* Replace buffered settings with input buffer settings */
834 hb_buffer_copy_settings( pv->buf_settings, buf_in );
836 /* don't let 'work_loop' send a chapter mark upstream */
837 buf_in->new_chap = 0;