OSDN Git Service

This should fix the flickering line bugs with deinterlace and decomb. Yadif needs...
[handbrake-jp/handbrake-jp-git.git] / libhb / decomb.c
1 /* $Id: decomb.c,v 1.14 2008/04/25 5:00:00 jbrjake Exp $
2
3    This file is part of the HandBrake source code.
4    Homepage: <http://handbrake.fr/>.
5    It may be used under the terms of the GNU General Public License. 
6    
7    The yadif algorithm was created by Michael Niedermayer. */
8 #include "hb.h"
9 #include "libavcodec/avcodec.h"
10 #include "mpeg2dec/mpeg2.h"
11
12 #define SUPPRESS_AV_LOG
13
14 #define MODE_DEFAULT     1
15 #define PARITY_DEFAULT   -1
16
17 #define MCDEINT_MODE_DEFAULT   -1
18 #define MCDEINT_QP_DEFAULT      1
19
20 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
21 #define MIN3(a,b,c) MIN(MIN(a,b),c)
22 #define MAX3(a,b,c) MAX(MAX(a,b),c)
23
24 typedef struct yadif_arguments_s {
25     uint8_t **dst;
26     int parity;
27     int tff;
28     int stop;
29     int is_combed;
30 } yadif_arguments_t;
31
32 typedef struct decomb_arguments_s {
33     int stop;
34 } decomb_arguments_t;
35
36 struct hb_filter_private_s
37 {
38     int              pix_fmt;
39     int              width[3];
40     int              height[3];
41
42     int              mode;
43     int              spatial_metric;
44     int              motion_threshold;
45     int              spatial_threshold;
46     int              block_threshold;
47     int              block_width;
48     int              block_height;
49
50     int              parity;
51     
52     int              yadif_ready;
53
54     int              mcdeint_mode;
55     int              mcdeint_qp;
56
57     int              mcdeint_outbuf_size;
58     uint8_t        * mcdeint_outbuf;
59     AVCodecContext * mcdeint_avctx_enc;
60     AVFrame        * mcdeint_frame;
61     AVFrame        * mcdeint_frame_dec;
62
63     int              yadif_deinterlaced_frames;
64     int              blend_deinterlaced_frames;
65     int              unfiltered_frames;
66
67     uint8_t        * ref[4][3];
68     int              ref_stride[3];
69
70     /* Make a buffer to store a comb mask. */
71     uint8_t        * mask[3];
72
73     AVPicture        pic_in;
74     AVPicture        pic_out;
75     hb_buffer_t *    buf_out[2];
76     hb_buffer_t *    buf_settings;
77     
78     int              cpu_count;
79
80     hb_thread_t    ** yadif_threads;         // Threads for Yadif - one per CPU
81     hb_lock_t      ** yadif_begin_lock;      // Thread has work
82     hb_lock_t      ** yadif_complete_lock;   // Thread has completed work
83     yadif_arguments_t *yadif_arguments;      // Arguments to thread for work
84     
85     hb_thread_t    ** decomb_threads;        // Threads for comb detection - one per CPU
86     hb_lock_t      ** decomb_begin_lock;     // Thread has work
87     hb_lock_t      ** decomb_complete_lock;  // Thread has completed work
88     decomb_arguments_t *decomb_arguments;    // Arguments to thread for work
89     
90 };
91
92 hb_filter_private_t * hb_decomb_init( int pix_fmt,
93                                            int width,
94                                            int height,
95                                            char * settings );
96
97 int hb_decomb_work(      const hb_buffer_t * buf_in,
98                          hb_buffer_t ** buf_out,
99                          int pix_fmt,
100                          int width,
101                          int height,
102                          hb_filter_private_t * pv );
103
104 void hb_decomb_close( hb_filter_private_t * pv );
105
106 hb_filter_object_t hb_filter_decomb =
107 {
108     FILTER_DECOMB,
109     "Deinterlaces selectively with yadif/mcdeint and lowpass5 blending",
110     NULL,
111     hb_decomb_init,
112     hb_decomb_work,
113     hb_decomb_close,
114 };
115
116 int cubic_interpolate( int y0, int y1, int y2, int y3 )
117 {
118     /* From http://www.neuron2.net/library/cubicinterp.html */
119     int result = ( y0 * -3 ) + ( y1 * 23 ) + ( y2 * 23 ) + ( y3 * -3 );
120     result /= 40;
121     
122     if( result > 255 )
123     {
124         result = 255;
125     }
126     else if( result < 0 )
127     {
128         result = 0;
129     }
130     
131     return result;
132 }
133
134 static void store_ref( const uint8_t ** pic,
135                              hb_filter_private_t * pv )
136 {
137     memcpy( pv->ref[3],
138             pv->ref[0],
139             sizeof(uint8_t *)*3 );
140
141     memmove( pv->ref[0],
142              pv->ref[1],
143              sizeof(uint8_t *)*3*3 );
144
145     int i;
146     for( i = 0; i < 3; i++ )
147     {
148         const uint8_t * src = pic[i];
149         uint8_t * ref = pv->ref[2][i];
150
151         int w = pv->width[i];
152         int h = pv->height[i];
153         int ref_stride = pv->ref_stride[i];
154
155         int y;
156         for( y = 0; y < pv->height[i]; y++ )
157         {
158             memcpy(ref, src, w);
159             src = (uint8_t*)src + w;
160             ref = (uint8_t*)ref + ref_stride;
161         }
162     }
163 }
164
165 static void get_ref( uint8_t ** pic, hb_filter_private_t * pv, int frm )
166 {
167     int i;
168     for( i = 0; i < 3; i++ )
169     {
170         uint8_t * dst = pic[i];
171         const uint8_t * ref = pv->ref[frm][i];
172         int w = pv->width[i];
173         int ref_stride = pv->ref_stride[i];
174         
175         int y;
176         for( y = 0; y < pv->height[i]; y++ )
177         {
178             memcpy(dst, ref, w);
179             dst += w;
180             ref += ref_stride;
181         }
182     }
183 }
184
185 int blend_filter_pixel( int up2, int up1, int current, int down1, int down2 )
186 {
187     /* Low-pass 5-tap filter */
188     int result = 0;
189     result += -up2;
190     result += up1 * 2;
191     result += current * 6;
192     result += down1 *2;
193     result += -down2;
194     result /= 8;
195
196     if( result > 255 )
197     {
198         result = 255;
199     }
200     if( result < 0 )
201     {
202         result = 0;
203     }
204     
205     return result;
206 }
207
208 static void blend_filter_line( uint8_t *dst,
209                                uint8_t *cur,
210                                int plane,
211                                int y,
212                                hb_filter_private_t * pv )
213 {
214     int w = pv->width[plane];
215     int refs = pv->ref_stride[plane];
216     int x;
217
218     for( x = 0; x < w; x++)
219     {
220         int a, b, c, d, e;
221         
222         a = cur[-2*refs];
223         b = cur[-refs];
224         c = cur[0];
225         d = cur[+refs];
226         e = cur[2*refs];
227         
228         if( y == 0 )
229         {
230             /* First line, so A and B don't exist.*/
231             a = cur[0];
232             b = cur[0];
233         }
234         else if( y == 1 )
235         {
236             /* Second line, no A. */
237             a = cur[-refs];
238         }
239         else if( y == (pv->height[plane] - 2) )
240         {
241             /* Second to last line, no E. */
242             e = cur[+refs];
243         }
244         else if( y == (pv->height[plane] -1) )
245         {
246             /* Last line, no D or E. */
247             d = cur[0];
248             e = cur[0];
249         }
250                 
251         dst[0] = blend_filter_pixel( a, b, c, d, e );
252
253         dst++;
254         cur++;
255     }
256 }
257
258 int check_combing_mask( hb_filter_private_t * pv )
259 {
260     /* Go through the mask in X*Y blocks. If any of these windows
261        have threshold or more combed pixels, consider the whole
262        frame to be combed and send it on to be deinterlaced.     */
263
264     /* Block mask threshold -- The number of pixels
265        in a block_width * block_height window of
266        he mask that need to show combing for the
267        whole frame to be seen as such.            */
268     int threshold       = pv->block_threshold;
269     int block_width     = pv->block_width;
270     int block_height    = pv->block_height;
271     int block_x, block_y;
272     int block_score = 0; int send_to_blend = 0;
273     
274     int x, y, k;
275
276     for( k = 0; k < 1; k++ )
277     {
278         int ref_stride = pv->ref_stride[k];
279         for( y = 0; y < ( pv->height[k] - block_height ); y = y + block_height )
280         {
281             for( x = 0; x < ( pv->width[k] - block_width ); x = x + block_width )
282             {
283                 block_score = 0;
284                 for( block_y = 0; block_y < block_height; block_y++ )
285                 {
286                     for( block_x = 0; block_x < block_width; block_x++ )
287                     {
288                         int mask_y = y + block_y;
289                         int mask_x = x + block_x;
290                         
291                         /* We only want to mark a pixel in a block as combed
292                            if the pixels above and below are as well. Got to
293                            handle the top and bottom lines separately.       */
294                         if( y + block_y == 0 )
295                         {
296                             if( pv->mask[k][mask_y*ref_stride+mask_x    ] == 255 &&
297                                 pv->mask[k][mask_y*ref_stride+mask_x + 1] == 255 )
298                                     block_score++;
299                         }
300                         else if( y + block_y == pv->height[k] - 1 )
301                         {
302                             if( pv->mask[k][mask_y*ref_stride+mask_x - 1] == 255 &&
303                                 pv->mask[k][mask_y*ref_stride+mask_x    ] == 255 )
304                                     block_score++;
305                         }
306                         else
307                         {
308                             if( pv->mask[k][mask_y*ref_stride+mask_x - 1] == 255 &&
309                                 pv->mask[k][mask_y*ref_stride+mask_x    ] == 255 &&
310                                 pv->mask[k][mask_y*ref_stride+mask_x + 1] == 255 )
311                                     block_score++;
312                         } 
313                     }
314                 }
315
316                 if( block_score >= ( threshold / 2 ) )
317                 {
318 #if 0
319                     hb_log("decomb: frame %i | score %i | type %s", pv->yadif_deinterlaced_frames + pv->blend_deinterlaced_frames +  pv->unfiltered_frames + 1, block_score, pv->buf_settings->flags & 16 ? "Film" : "Video");
320 #endif
321                     if ( block_score <= threshold && !( pv->buf_settings->flags & 16) )
322                     {
323                         /* Blend video content that scores between
324                            ( threshold / 2 ) and threshold.        */
325                         send_to_blend = 1;
326                     }
327                     else if( block_score > threshold )
328                     {
329                         if( pv->buf_settings->flags & 16 )
330                         {
331                             /* Blend progressive content above the threshold.*/
332                             return 2;
333                         }
334                         else
335                         {
336                             /* Yadif deinterlace video content above the threshold. */
337                             return 1;
338                         }
339                     }
340                 }
341             }
342         } 
343     }
344     
345     if( send_to_blend )
346     {
347         return 2;
348     }
349     else
350     {
351         /* Consider this frame to be uncombed. */
352         return 0;
353     }
354 }
355
356 int detect_combed_segment( hb_filter_private_t * pv, int segment_start, int segment_stop )
357 {
358     /* A mish-mash of various comb detection tricks
359        picked up from neuron2's Decomb plugin for
360        AviSynth and tritical's IsCombedT and
361        IsCombedTIVTC plugins.                       */
362        
363     int x, y, k, width, height;
364     
365     /* Comb scoring algorithm */
366     int spatial_metric  = pv->spatial_metric;
367     /* Motion threshold */
368     int mthresh         = pv->motion_threshold;
369     /* Spatial threshold */
370     int athresh         = pv->spatial_threshold;
371     int athresh_squared = athresh * athresh;
372     int athresh6        = 6 *athresh;
373
374     /* One pas for Y, one pass for U, one pass for V */    
375     for( k = 0; k < 1; k++ )
376     {
377         int ref_stride  = pv->ref_stride[k];
378         width           = pv->width[k];
379         height          = pv->height[k];
380         
381         /* Comb detection has to start at y = 2 and end at
382            y = height - 2, because it needs to examine
383            2 pixels above and 2 below the current pixel.      */
384         if( segment_start < 2 )
385             segment_start = 2;
386         if( segment_stop > height - 2 )
387             segment_stop = height - 2;
388             
389         for( y =  segment_start; y < segment_stop; y++ )
390         {
391             /* These are just to make the buffer locations easier to read. */
392             int back_2    = ( y - 2 )*ref_stride ;
393             int back_1    = ( y - 1 )*ref_stride;
394             int current   =         y*ref_stride;
395             int forward_1 = ( y + 1 )*ref_stride;
396             int forward_2 = ( y + 2 )*ref_stride;
397             
398             /* We need to examine a column of 5 pixels
399                in the prev, cur, and next frames.      */
400             uint8_t previous_frame[5];
401             uint8_t current_frame[5];
402             uint8_t next_frame[5];
403             
404             for( x = 0; x < width; x++ )
405             {
406                 /* Fill up the current frame array with the current pixel values.*/
407                 current_frame[0] = pv->ref[1][k][back_2    + x];
408                 current_frame[1] = pv->ref[1][k][back_1    + x];
409                 current_frame[2] = pv->ref[1][k][current   + x];
410                 current_frame[3] = pv->ref[1][k][forward_1 + x];
411                 current_frame[4] = pv->ref[1][k][forward_2 + x];
412
413                 int up_diff   = current_frame[2] - current_frame[1];
414                 int down_diff = current_frame[2] - current_frame[3];
415
416                 if( ( up_diff >  athresh && down_diff >  athresh ) ||
417                     ( up_diff < -athresh && down_diff < -athresh ) )
418                 {
419                     /* The pixel above and below are different,
420                        and they change in the same "direction" too.*/
421                     int motion = 0;
422                     if( mthresh > 0 )
423                     {
424                         /* Make sure there's sufficient motion between frame t-1 to frame t+1. */
425                         previous_frame[0] = pv->ref[0][k][back_2    + x];
426                         previous_frame[1] = pv->ref[0][k][back_1    + x];
427                         previous_frame[2] = pv->ref[0][k][current   + x];
428                         previous_frame[3] = pv->ref[0][k][forward_1 + x];
429                         previous_frame[4] = pv->ref[0][k][forward_2 + x];
430                         next_frame[0]     = pv->ref[2][k][back_2    + x];
431                         next_frame[1]     = pv->ref[2][k][back_1    + x];
432                         next_frame[2]     = pv->ref[2][k][current   + x];
433                         next_frame[3]     = pv->ref[2][k][forward_1 + x];
434                         next_frame[4]     = pv->ref[2][k][forward_2 + x];
435                         
436                         if( abs( previous_frame[2] - current_frame[2] ) > mthresh &&
437                             abs(  current_frame[1] - next_frame[1]    ) > mthresh &&
438                             abs(  current_frame[3] - next_frame[3]    ) > mthresh )
439                                 motion++;
440                         if( abs(     next_frame[2] - current_frame[2] ) > mthresh &&
441                             abs( previous_frame[1] - current_frame[1] ) > mthresh &&
442                             abs( previous_frame[3] - current_frame[3] ) > mthresh )
443                                 motion++;
444                     }
445                     else
446                     {
447                         /* User doesn't want to check for motion,
448                            so move on to the spatial check.       */
449                         motion = 1;
450                     }
451                            
452                     if( motion || ( pv->yadif_deinterlaced_frames==0 && pv->blend_deinterlaced_frames==0 && pv->unfiltered_frames==0) )
453                     {
454                            /* That means it's time for the spatial check.
455                               We've got several options here.             */
456                         if( spatial_metric == 0 )
457                         {
458                             /* Simple 32detect style comb detection */
459                             if( ( abs( current_frame[2] - current_frame[4] ) < 10  ) &&
460                                 ( abs( current_frame[2] - current_frame[3] ) > 15 ) )
461                             {
462                                 pv->mask[k][y*ref_stride + x] = 255;
463                             }
464                             else
465                             {
466                                 pv->mask[k][y*ref_stride + x] = 0;
467                             }
468                         }
469                         else if( spatial_metric == 1 )
470                         {
471                             /* This, for comparison, is what IsCombed uses.
472                                It's better, but still noise senstive.      */
473                                int combing = ( current_frame[1] - current_frame[2] ) *
474                                              ( current_frame[3] - current_frame[2] );
475                                
476                                if( combing > athresh_squared )
477                                    pv->mask[k][y*ref_stride + x] = 255; 
478                                else
479                                    pv->mask[k][y*ref_stride + x] = 0;
480                         }
481                         else if( spatial_metric == 2 )
482                         {
483                             /* Tritical's noise-resistant combing scorer.
484                                The check is done on a bob+blur convolution. */
485                             int combing = abs( current_frame[0]
486                                              + ( 4 * current_frame[2] )
487                                              + current_frame[4]
488                                              - ( 3 * ( current_frame[1]
489                                                      + current_frame[3] ) ) );
490
491                             /* If the frame is sufficiently combed,
492                                then mark it down on the mask as 255. */
493                             if( combing > athresh6 )
494                                 pv->mask[k][y*ref_stride + x] = 255; 
495                             else
496                                 pv->mask[k][y*ref_stride + x] = 0;
497                         }
498                     }
499                     else
500                     {
501                         pv->mask[k][y*ref_stride + x] = 0;
502                     }
503                 }
504                 else
505                 {
506                     pv->mask[k][y*ref_stride + x] = 0;
507                 }
508             }
509         }
510     }
511 }
512
513 typedef struct decomb_thread_arg_s {
514     hb_filter_private_t *pv;
515     int segment;
516 } decomb_thread_arg_t;
517
518 /*
519  * comb detect this segment of all three planes in a single thread.
520  */
521 void decomb_filter_thread( void *thread_args_v )
522 {
523     decomb_arguments_t *decomb_work = NULL;
524     hb_filter_private_t * pv;
525     int run = 1;
526     int segment, segment_start, segment_stop, plane;
527     decomb_thread_arg_t *thread_args = thread_args_v;
528
529     pv = thread_args->pv;
530     segment = thread_args->segment;
531
532     hb_log("decomb thread started for segment %d", segment);
533
534     while( run )
535     {
536         /*
537          * Wait here until there is work to do. hb_lock() blocks until
538          * render releases it to say that there is more work to do.
539          */
540         hb_lock( pv->decomb_begin_lock[segment] );
541
542         decomb_work = &pv->decomb_arguments[segment];
543
544         if( decomb_work->stop )
545         {
546             /*
547              * No more work to do, exit this thread.
548              */
549             run = 0;
550             continue;
551         } 
552
553         /*
554          * Process segment (for now just from luma)
555          */
556         for( plane = 0; plane < 1; plane++)
557         {
558
559             int w = pv->width[plane];
560             int h = pv->height[plane];
561             int ref_stride = pv->ref_stride[plane];
562             segment_start = ( h / pv->cpu_count ) * segment;
563             if( segment == pv->cpu_count - 1 )
564             {
565                 /*
566                  * Final segment
567                  */
568                 segment_stop = h;
569             } else {
570                 segment_stop = ( h / pv->cpu_count ) * ( segment + 1 );
571             }
572             
573             detect_combed_segment( pv, segment_start, segment_stop );
574         }
575         /*
576          * Finished this segment, let everyone know.
577          */
578         hb_unlock( pv->decomb_complete_lock[segment] );
579     }
580     free( thread_args_v );
581 }
582
583 int comb_segmenter( hb_filter_private_t * pv )
584 {
585     int segment;
586
587     for( segment = 0; segment < pv->cpu_count; segment++ )
588     {  
589         /*
590          * Let the thread for this plane know that we've setup work 
591          * for it by releasing the begin lock (ensuring that the
592          * complete lock is already locked so that we block when
593          * we try to lock it again below).
594          */
595         hb_lock( pv->decomb_complete_lock[segment] );
596         hb_unlock( pv->decomb_begin_lock[segment] );
597     }
598
599     /*
600      * Wait until all three threads have completed by trying to get
601      * the complete lock that we locked earlier for each thread, which
602      * will block until that thread has completed the work on that
603      * plane.
604      */
605     for( segment = 0; segment < pv->cpu_count; segment++ )
606     {
607         hb_lock( pv->decomb_complete_lock[segment] );
608         hb_unlock( pv->decomb_complete_lock[segment] );
609     }
610     
611     return check_combing_mask( pv );
612 }
613
614 static void yadif_filter_line( uint8_t *dst,
615                                uint8_t *prev,
616                                uint8_t *cur,
617                                uint8_t *next,
618                                int plane,
619                                int parity,
620                                int y,
621                                hb_filter_private_t * pv )
622 {
623     /* While prev and next point to the previous and next frames,
624        prev2 and next2 will shift depending on the parity, usually 1.
625        They are the previous and next fields, the fields temporally adjacent
626        to the other field in the current frame--the one not being filtered.  */
627     uint8_t *prev2 = parity ? prev : cur ;
628     uint8_t *next2 = parity ? cur  : next;
629     int w = pv->width[plane];
630     int refs = pv->ref_stride[plane];
631     int x;
632     
633     /* Decomb's cubic interpolation can only function when there are
634        three samples above and below, so regress to yadif's traditional
635        two-tap interpolation when filtering at the top and bottom edges. */
636     int edge = 0;
637     if( ( y < 3 ) || ( y > ( pv->height[plane] - 4 ) )  )
638         edge = 1;
639
640     for( x = 0; x < w; x++)
641     {
642         /* Pixel above*/
643         int c              = cur[-refs];
644         /* Temporal average: the current location in the adjacent fields */
645         int d              = (prev2[0] + next2[0])>>1;
646         /* Pixel below */
647         int e              = cur[+refs];
648         
649         /* How the current pixel changes between the adjacent fields */
650         int temporal_diff0 = ABS(prev2[0] - next2[0]);
651         /* The average of how much the pixels above and below change from the frame before to now. */
652         int temporal_diff1 = ( ABS(prev[-refs] - cur[-refs]) + ABS(prev[+refs] - cur[+refs]) ) >> 1;
653         /* The average of how much the pixels above and below change from now to the next frame. */
654         int temporal_diff2 = ( ABS(next[-refs] - cur[-refs]) + ABS(next[+refs] - cur[+refs]) ) >> 1;
655         /* For the actual difference, use the largest of the previous average diffs. */
656         int diff           = MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
657         
658         /* SAD of how the pixel-1, the pixel, and the pixel+1 change from the line above to below. */ 
659         int spatial_score  = ABS(cur[-refs-1] - cur[+refs-1]) + ABS(cur[-refs]-cur[+refs]) +
660                                      ABS(cur[-refs+1] - cur[+refs+1]) - 1;         
661         int spatial_pred;
662          
663         /* Spatial pred is either a bilinear or cubic vertical interpolation. */
664         if( pv->mode > 0 && !edge)
665         {
666             spatial_pred = cubic_interpolate( cur[-3*refs], cur[-refs], cur[+refs], cur[3*refs] );
667         }
668         else
669         {
670             spatial_pred = (c+e)>>1;
671         }
672
673 /* EDDI: Edge Directed Deinterlacing Interpolation
674    Uses the Martinez-Lim Line Shift Parametric Modeling algorithm...I think.
675    Checks 4 different slopes to see if there is more similarity along a diagonal
676    than there was vertically. If a diagonal is more similar, then it indicates
677    an edge, so interpolate along that instead of a vertical line, using either
678    linear or cubic interpolation depending on mode. */
679 #define YADIF_CHECK(j)\
680         {   int score = ABS(cur[-refs-1+j] - cur[+refs-1-j])\
681                       + ABS(cur[-refs  +j] - cur[+refs  -j])\
682                       + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\
683             if( score < spatial_score ){\
684                 spatial_score = score;\
685                 if( pv->mode > 0 && !edge )\
686                 {\
687                     switch(j)\
688                     {\
689                         case -1:\
690                             spatial_pred = cubic_interpolate(cur[-3 * refs - 3], cur[-refs -1], cur[+refs + 1], cur[3* refs + 3] );\
691                         break;\
692                         case -2:\
693                             spatial_pred = cubic_interpolate( ( ( cur[-3*refs - 4] + cur[-refs - 4] ) / 2 ) , cur[-refs -2], cur[+refs + 2], ( ( cur[3*refs + 4] + cur[refs + 4] ) / 2 ) );\
694                         break;\
695                         case 1:\
696                             spatial_pred = cubic_interpolate(cur[-3 * refs +3], cur[-refs +1], cur[+refs - 1], cur[3* refs -3] );\
697                         break;\
698                         case 2:\
699                             spatial_pred = cubic_interpolate(( ( cur[-3*refs + 4] + cur[-refs + 4] ) / 2 ), cur[-refs +2], cur[+refs - 2], ( ( cur[3*refs - 4] + cur[refs - 4] ) / 2 ) );\
700                         break;\
701                     }\
702                 }\
703                 else\
704                 {\
705                     spatial_pred = ( cur[-refs +j] + cur[+refs -j] ) >>1;\
706                 }\
707                 
708                 YADIF_CHECK(-1) YADIF_CHECK(-2) }} }}
709                 YADIF_CHECK( 1) YADIF_CHECK( 2) }} }}
710                                 
711         /* Temporally adjust the spatial prediction by
712            comparing against lines in the adjacent fields. */
713         int b = (prev2[-2*refs] + next2[-2*refs])>>1;
714         int f = (prev2[+2*refs] + next2[+2*refs])>>1;
715         
716         /* Find the median value */
717         int max = MAX3(d-e, d-c, MIN(b-c, f-e));
718         int min = MIN3(d-e, d-c, MAX(b-c, f-e));
719         diff = MAX3( diff, min, -max );
720         
721         if( spatial_pred > d + diff )
722         {
723             spatial_pred = d + diff;
724         }
725         else if( spatial_pred < d - diff )
726         {
727             spatial_pred = d - diff;
728         }
729         
730         dst[0] = spatial_pred;
731                         
732         dst++;
733         cur++;
734         prev++;
735         next++;
736         prev2++;
737         next2++;
738     }
739 }
740
741 typedef struct yadif_thread_arg_s {
742     hb_filter_private_t *pv;
743     int segment;
744 } yadif_thread_arg_t;
745
746 /*
747  * deinterlace this segment of all three planes in a single thread.
748  */
749 void yadif_decomb_filter_thread( void *thread_args_v )
750 {
751     yadif_arguments_t *yadif_work = NULL;
752     hb_filter_private_t * pv;
753     int run = 1;
754     int plane;
755     int segment, segment_start, segment_stop;
756     yadif_thread_arg_t *thread_args = thread_args_v;
757     uint8_t **dst;
758     int parity, tff, y, w, h, penultimate, ultimate, ref_stride, is_combed;
759
760     pv = thread_args->pv;
761     segment = thread_args->segment;
762
763     hb_log("yadif thread started for segment %d", segment);
764
765     while( run )
766     {
767         /*
768          * Wait here until there is work to do. hb_lock() blocks until
769          * render releases it to say that there is more work to do.
770          */
771         hb_lock( pv->yadif_begin_lock[segment] );
772
773         yadif_work = &pv->yadif_arguments[segment];
774
775         if( yadif_work->stop )
776         {
777             /*
778              * No more work to do, exit this thread.
779              */
780             run = 0;
781             continue;
782         } 
783
784         if( yadif_work->dst == NULL )
785         {
786             hb_error( "thread started when no work available" );
787             hb_snooze(500);
788             continue;
789         }
790         
791         is_combed = pv->yadif_arguments[segment].is_combed;
792
793         /*
794          * Process all three planes, but only this segment of it.
795          */
796         for( plane = 0; plane < 3; plane++)
797         {
798
799             dst = yadif_work->dst;
800             parity = yadif_work->parity;
801             tff = yadif_work->tff;
802             w = pv->width[plane];
803             h = pv->height[plane];
804             penultimate = h - 2;
805             ultimate = h - 1;
806             ref_stride = pv->ref_stride[plane];
807             segment_start = ( h / pv->cpu_count ) * segment;
808             if( segment == pv->cpu_count - 1 )
809             {
810                 /*
811                  * Final segment
812                  */
813                 segment_stop = h;
814             } else {
815                 segment_stop = ( h / pv->cpu_count ) * ( segment + 1 );
816             }
817
818             for( y = segment_start; y < segment_stop; y++ )
819             {
820                 if( ( pv->mode == 4 && is_combed ) || is_combed == 2 )
821                 {
822                     /* This line gets blend filtered, not yadif filtered. */
823                     uint8_t *prev = &pv->ref[0][plane][y*ref_stride];
824                     uint8_t *cur  = &pv->ref[1][plane][y*ref_stride];
825                     uint8_t *next = &pv->ref[2][plane][y*ref_stride];
826                     uint8_t *dst2 = &dst[plane][y*w];
827
828                     blend_filter_line( dst2, cur, plane, y, pv );
829                 }
830                 else if( ( ( y ^ parity ) &  1 )  && ( is_combed == 1 ) )
831                 {
832                     /* This line gets yadif filtered. It is the bottom field
833                        when TFF and vice-versa. It's the field that gets
834                        filtered. Because yadif needs 2 lines above and below
835                        the one being filtered, we need to mirror the edges.
836                        When TFF, this means replacing the 2nd line with a
837                        copy of the 1st, and the last with the second-to-last. */
838                     if( y > 1 && y < ( h -2 ) )
839                     {
840                         /* This isn't the top or bottom, proceed as normal to yadif. */
841                         uint8_t *prev = &pv->ref[0][plane][y*ref_stride];
842                         uint8_t *cur  = &pv->ref[1][plane][y*ref_stride];
843                         uint8_t *next = &pv->ref[2][plane][y*ref_stride];
844                         uint8_t *dst2 = &dst[plane][y*w];
845
846                         yadif_filter_line( dst2, 
847                                            prev, 
848                                            cur, 
849                                            next, 
850                                            plane, 
851                                            parity ^ tff,
852                                            y, 
853                                            pv );
854                     }
855                     else if( y == 0 )
856                     {
857                         /* BFF, so y0 = y1 */
858                         memcpy( &dst[plane][y*w],
859                                 &pv->ref[1][plane][1*ref_stride],
860                                 w * sizeof(uint8_t) );
861                     }
862                     else if( y == 1 )
863                     {
864                         /* TFF, so y1 = y0 */
865                         memcpy( &dst[plane][y*w],
866                                 &pv->ref[1][plane][0],
867                                 w * sizeof(uint8_t) );
868                     }
869                     else if( y == penultimate )
870                     {
871                         /* BFF, so penultimate y = ultimate y */
872                         memcpy( &dst[plane][y*w],
873                                 &pv->ref[1][plane][ultimate*ref_stride],
874                                 w * sizeof(uint8_t) );
875                     }
876                     else if( y == ultimate )
877                     {
878                         /* TFF, so ultimate y = penultimate y */
879                         memcpy( &dst[plane][y*w],
880                                 &pv->ref[1][plane][penultimate*ref_stride],
881                                 w * sizeof(uint8_t) );
882                     }
883                 }
884                 else
885                 {
886                     memcpy( &dst[plane][y*w],
887                             &pv->ref[1][plane][y*ref_stride],
888                             w * sizeof(uint8_t) );              
889                 }
890             }
891         }
892         /*
893          * Finished this segment, let everyone know.
894          */
895         hb_unlock( pv->yadif_complete_lock[segment] );
896     }
897     free( thread_args_v );
898 }
899
900 static void yadif_filter( uint8_t ** dst,
901                           int parity,
902                           int tff,
903                           hb_filter_private_t * pv )
904 {
905     
906     int is_combed = comb_segmenter( pv );
907     
908     if( is_combed == 1 )
909     {
910         pv->yadif_deinterlaced_frames++;
911     }
912     else if( is_combed == 2 )
913     {
914         pv->blend_deinterlaced_frames++;
915     }
916     else
917     {
918         pv->unfiltered_frames++;
919     }
920     
921     if( is_combed )
922     {
923         int segment;
924
925         for( segment = 0; segment < pv->cpu_count; segment++ )
926         {  
927             /*
928              * Setup the work for this plane.
929              */
930             pv->yadif_arguments[segment].parity = parity;
931             pv->yadif_arguments[segment].tff = tff;
932             pv->yadif_arguments[segment].dst = dst;
933             pv->yadif_arguments[segment].is_combed = is_combed;
934
935             /*
936              * Let the thread for this plane know that we've setup work 
937              * for it by releasing the begin lock (ensuring that the
938              * complete lock is already locked so that we block when
939              * we try to lock it again below).
940              */
941             hb_lock( pv->yadif_complete_lock[segment] );
942             hb_unlock( pv->yadif_begin_lock[segment] );
943         }
944
945         /*
946          * Wait until all three threads have completed by trying to get
947          * the complete lock that we locked earlier for each thread, which
948          * will block until that thread has completed the work on that
949          * plane.
950          */
951         for( segment = 0; segment < pv->cpu_count; segment++ )
952         {
953             hb_lock( pv->yadif_complete_lock[segment] );
954             hb_unlock( pv->yadif_complete_lock[segment] );
955         }
956
957         /*
958          * Entire frame is now deinterlaced.
959          */
960     }
961     else
962     {
963         /*  Just passing through... */
964         int i;
965         for( i = 0; i < 3; i++ )
966         {
967             uint8_t * ref = pv->ref[1][i];
968             uint8_t * dest = dst[i];
969             
970             int w = pv->width[i];
971             int ref_stride = pv->ref_stride[i];
972             
973             int y;
974             for( y = 0; y < pv->height[i]; y++ )
975             {
976                 memcpy(dest, ref, w);
977                 dest += w;
978                 ref += ref_stride;
979             }
980         }
981     }
982 }
983
984 static void mcdeint_filter( uint8_t ** dst,
985                             uint8_t ** src,
986                             int parity,
987                             hb_filter_private_t * pv )
988 {
989     int x, y, i;
990     int out_size;
991
992 #ifdef SUPPRESS_AV_LOG
993     /* TODO: temporarily change log level to suppress obnoxious debug output */
994     int loglevel = av_log_get_level();
995     av_log_set_level( AV_LOG_QUIET );
996 #endif
997
998     for( i=0; i<3; i++ )
999     {
1000         pv->mcdeint_frame->data[i] = src[i];
1001         pv->mcdeint_frame->linesize[i] = pv->width[i];
1002     }
1003     pv->mcdeint_avctx_enc->me_cmp     = FF_CMP_SAD;
1004     pv->mcdeint_avctx_enc->me_sub_cmp = FF_CMP_SAD;
1005     pv->mcdeint_frame->quality        = pv->mcdeint_qp * FF_QP2LAMBDA;
1006
1007     out_size = avcodec_encode_video( pv->mcdeint_avctx_enc,
1008                                      pv->mcdeint_outbuf,
1009                                      pv->mcdeint_outbuf_size,
1010                                      pv->mcdeint_frame );
1011
1012     pv->mcdeint_frame_dec = pv->mcdeint_avctx_enc->coded_frame;
1013
1014     for( i = 0; i < 3; i++ )
1015     {
1016         int w    = pv->width[i];
1017         int h    = pv->height[i];
1018         int fils = pv->mcdeint_frame_dec->linesize[i];
1019         int srcs = pv->width[i];
1020
1021         for( y = 0; y < h; y++ )
1022         {
1023             if( (y ^ parity) & 1 )
1024             {
1025                 for( x = 0; x < w; x++ )
1026                 {
1027                     if( (x-2)+(y-1)*w >= 0 && (x+2)+(y+1)*w < w*h )
1028                     {
1029                         uint8_t * filp =
1030                             &pv->mcdeint_frame_dec->data[i][x + y*fils];
1031                         uint8_t * srcp = &src[i][x + y*srcs];
1032
1033                         int diff0 = filp[-fils] - srcp[-srcs];
1034                         int diff1 = filp[+fils] - srcp[+srcs];
1035
1036                         int spatial_score =
1037                               ABS(srcp[-srcs-1] - srcp[+srcs-1])
1038                             + ABS(srcp[-srcs  ] - srcp[+srcs  ])
1039                             + ABS(srcp[-srcs+1] - srcp[+srcs+1]) - 1;
1040
1041                         int temp = filp[0];
1042
1043 #define MCDEINT_CHECK(j)\
1044                         {   int score = ABS(srcp[-srcs-1+j] - srcp[+srcs-1-j])\
1045                                       + ABS(srcp[-srcs  +j] - srcp[+srcs  -j])\
1046                                       + ABS(srcp[-srcs+1+j] - srcp[+srcs+1-j]);\
1047                             if( score < spatial_score ) {\
1048                                 spatial_score = score;\
1049                                 diff0 = filp[-fils+j] - srcp[-srcs+j];\
1050                                 diff1 = filp[+fils-j] - srcp[+srcs-j];
1051
1052                         MCDEINT_CHECK(-1) MCDEINT_CHECK(-2) }} }}
1053                         MCDEINT_CHECK( 1) MCDEINT_CHECK( 2) }} }}
1054
1055                         if(diff0 + diff1 > 0)
1056                         {
1057                             temp -= (diff0 + diff1 -
1058                                      ABS( ABS(diff0) - ABS(diff1) ) / 2) / 2;
1059                         }
1060                         else
1061                         {
1062                             temp -= (diff0 + diff1 +
1063                                      ABS( ABS(diff0) - ABS(diff1) ) / 2) / 2;
1064                         }
1065
1066                         filp[0] = dst[i][x + y*w] =
1067                             temp > 255U ? ~(temp>>31) : temp;
1068                     }
1069                     else
1070                     {
1071                         dst[i][x + y*w] =
1072                             pv->mcdeint_frame_dec->data[i][x + y*fils];
1073                     }
1074                 }
1075             }
1076         }
1077
1078         for( y = 0; y < h; y++ )
1079         {
1080             if( !((y ^ parity) & 1) )
1081             {
1082                 for( x = 0; x < w; x++ )
1083                 {
1084                     pv->mcdeint_frame_dec->data[i][x + y*fils] =
1085                         dst[i][x + y*w]= src[i][x + y*srcs];
1086                 }
1087             }
1088         }
1089     }
1090
1091 #ifdef SUPPRESS_AV_LOG
1092     /* TODO: restore previous log level */
1093     av_log_set_level(loglevel);
1094 #endif
1095 }
1096
1097 hb_filter_private_t * hb_decomb_init( int pix_fmt,
1098                                            int width,
1099                                            int height,
1100                                            char * settings )
1101 {
1102     if( pix_fmt != PIX_FMT_YUV420P )
1103     {
1104         return 0;
1105     }
1106
1107     hb_filter_private_t * pv = calloc( 1, sizeof(struct hb_filter_private_s) );
1108
1109     pv->pix_fmt = pix_fmt;
1110
1111     pv->width[0]  = width;
1112     pv->height[0] = height;
1113     pv->width[1]  = pv->width[2]  = width >> 1;
1114     pv->height[1] = pv->height[2] = height >> 1;
1115
1116     int buf_size = 3 * width * height / 2;
1117     pv->buf_out[0] = hb_buffer_init( buf_size );
1118     pv->buf_out[1] = hb_buffer_init( buf_size );
1119     pv->buf_settings = hb_buffer_init( 0 );
1120
1121     pv->yadif_deinterlaced_frames = 0;
1122     pv->blend_deinterlaced_frames = 0;
1123     pv->unfiltered_frames = 0;
1124
1125     pv->yadif_ready    = 0;
1126
1127     pv->mode     = MODE_DEFAULT;
1128     pv->spatial_metric = 2;
1129     pv->motion_threshold = 6;
1130     pv->spatial_threshold = 9;
1131     pv->block_threshold = 80;
1132     pv->block_width = 16;
1133     pv->block_height = 16;
1134     
1135     pv->parity   = PARITY_DEFAULT;
1136
1137     pv->mcdeint_mode   = MCDEINT_MODE_DEFAULT;
1138     pv->mcdeint_qp     = MCDEINT_QP_DEFAULT;
1139
1140     if( settings )
1141     {
1142         sscanf( settings, "%d:%d:%d:%d:%d:%d:%d",
1143                 &pv->mode,
1144                 &pv->spatial_metric,
1145                 &pv->motion_threshold,
1146                 &pv->spatial_threshold,
1147                 &pv->block_threshold,
1148                 &pv->block_width,
1149                 &pv->block_height );
1150     }
1151     
1152     pv->cpu_count = hb_get_cpu_count();
1153     
1154
1155     if( pv->mode == 2 || pv->mode == 3 )
1156     {
1157         pv->mcdeint_mode = 0;
1158     }
1159     
1160     /* Allocate yadif specific buffers */
1161     int i, j;
1162     for( i = 0; i < 3; i++ )
1163     {
1164         int is_chroma = !!i;
1165         int w = ((width   + 31) & (~31))>>is_chroma;
1166         int h = ((height+6+ 31) & (~31))>>is_chroma;
1167
1168         pv->ref_stride[i] = w;
1169
1170         for( j = 0; j < 3; j++ )
1171         {
1172             pv->ref[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w;
1173         }
1174     }
1175
1176     /* Allocate a buffer to store a comb mask. */
1177     for( i = 0; i < 3; i++ )
1178     {
1179         int is_chroma = !!i;
1180         int w = ((pv->width[0]   + 31) & (~31))>>is_chroma;
1181         int h = ((pv->height[0]+6+ 31) & (~31))>>is_chroma;
1182
1183         pv->mask[i] = calloc( 1, w*h*sizeof(uint8_t) ) + 3*w;
1184     }
1185
1186      /*
1187       * Create yadif threads and locks.
1188       */
1189      pv->yadif_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count );
1190      pv->yadif_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1191      pv->yadif_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1192      pv->yadif_arguments = malloc( sizeof( yadif_arguments_t ) * pv->cpu_count );
1193
1194      for( i = 0; i < pv->cpu_count; i++ )
1195      {
1196          yadif_thread_arg_t *thread_args;
1197
1198          thread_args = malloc( sizeof( yadif_thread_arg_t ) );
1199
1200          if( thread_args )
1201          {
1202              thread_args->pv = pv;
1203              thread_args->segment = i;
1204
1205              pv->yadif_begin_lock[i] = hb_lock_init();
1206              pv->yadif_complete_lock[i] = hb_lock_init();
1207
1208              /*
1209               * Important to start off with the threads locked waiting
1210               * on input.
1211               */
1212              hb_lock( pv->yadif_begin_lock[i] );
1213
1214              pv->yadif_arguments[i].stop = 0;
1215              pv->yadif_arguments[i].dst = NULL;
1216              
1217              pv->yadif_threads[i] = hb_thread_init( "yadif_filter_segment",
1218                                                     yadif_decomb_filter_thread,
1219                                                     thread_args,
1220                                                     HB_NORMAL_PRIORITY );
1221          }
1222          else
1223          {
1224              hb_error( "yadif could not create threads" );
1225          }
1226     }
1227     
1228     /*
1229      * Create decomb threads and locks.
1230      */
1231     pv->decomb_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count );
1232     pv->decomb_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1233     pv->decomb_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1234     pv->decomb_arguments = malloc( sizeof( decomb_arguments_t ) * pv->cpu_count );
1235     
1236     for( i = 0; i < pv->cpu_count; i++ )
1237     {
1238         decomb_thread_arg_t *decomb_thread_args;
1239     
1240         decomb_thread_args = malloc( sizeof( decomb_thread_arg_t ) );
1241     
1242         if( decomb_thread_args )
1243         {
1244             decomb_thread_args->pv = pv;
1245             decomb_thread_args->segment = i;
1246     
1247             pv->decomb_begin_lock[i] = hb_lock_init();
1248             pv->decomb_complete_lock[i] = hb_lock_init();
1249     
1250             /*
1251              * Important to start off with the threads locked waiting
1252              * on input.
1253              */
1254             hb_lock( pv->decomb_begin_lock[i] );
1255     
1256             pv->decomb_arguments[i].stop = 0;
1257     
1258             pv->decomb_threads[i] = hb_thread_init( "decomb_filter_segment",
1259                                                    decomb_filter_thread,
1260                                                    decomb_thread_args,
1261                                                    HB_NORMAL_PRIORITY );
1262         }
1263         else
1264         {
1265             hb_error( "decomb could not create threads" );
1266         }
1267     }
1268
1269     
1270     
1271     /* Allocate mcdeint specific buffers */
1272     if( pv->mcdeint_mode >= 0 )
1273     {
1274         avcodec_init();
1275         avcodec_register_all();
1276
1277         AVCodec * enc = avcodec_find_encoder( CODEC_ID_SNOW );
1278
1279         int i;
1280         for (i = 0; i < 3; i++ )
1281         {
1282             AVCodecContext * avctx_enc;
1283
1284             avctx_enc = pv->mcdeint_avctx_enc = avcodec_alloc_context();
1285
1286             avctx_enc->width                    = width;
1287             avctx_enc->height                   = height;
1288             avctx_enc->time_base                = (AVRational){1,25};  // meaningless
1289             avctx_enc->gop_size                 = 300;
1290             avctx_enc->max_b_frames             = 0;
1291             avctx_enc->pix_fmt                  = PIX_FMT_YUV420P;
1292             avctx_enc->flags                    = CODEC_FLAG_QSCALE | CODEC_FLAG_LOW_DELAY;
1293             avctx_enc->strict_std_compliance    = FF_COMPLIANCE_EXPERIMENTAL;
1294             avctx_enc->global_quality           = 1;
1295             avctx_enc->flags2                   = CODEC_FLAG2_MEMC_ONLY;
1296             avctx_enc->me_cmp                   = FF_CMP_SAD; //SSE;
1297             avctx_enc->me_sub_cmp               = FF_CMP_SAD; //SSE;
1298             avctx_enc->mb_cmp                   = FF_CMP_SSE;
1299
1300             switch( pv->mcdeint_mode )
1301             {
1302                 case 3:
1303                     avctx_enc->refs = 3;
1304                 case 2:
1305                     avctx_enc->me_method = ME_UMH;
1306                 case 1:
1307                     avctx_enc->flags |= CODEC_FLAG_4MV;
1308                     avctx_enc->dia_size =2;
1309                 case 0:
1310                     avctx_enc->flags |= CODEC_FLAG_QPEL;
1311             }
1312
1313             avcodec_open(avctx_enc, enc);
1314         }
1315
1316         pv->mcdeint_frame       = avcodec_alloc_frame();
1317         pv->mcdeint_outbuf_size = width * height * 10;
1318         pv->mcdeint_outbuf      = malloc( pv->mcdeint_outbuf_size );
1319     }
1320
1321     return pv;
1322 }
1323
1324 void hb_decomb_close( hb_filter_private_t * pv )
1325 {
1326     if( !pv )
1327     {
1328         return;
1329     }
1330     
1331     hb_log("decomb: yadif deinterlaced %i | blend deinterlaced %i | unfiltered %i | total %i", pv->yadif_deinterlaced_frames, pv->blend_deinterlaced_frames, pv->unfiltered_frames, pv->yadif_deinterlaced_frames + pv->blend_deinterlaced_frames + pv->unfiltered_frames);
1332
1333     /* Cleanup frame buffers */
1334     if( pv->buf_out[0] )
1335     {
1336         hb_buffer_close( &pv->buf_out[0] );
1337     }
1338     if( pv->buf_out[1] )
1339     {
1340         hb_buffer_close( &pv->buf_out[1] );
1341     }
1342     if (pv->buf_settings )
1343     {
1344         hb_buffer_close( &pv->buf_settings );
1345     }
1346
1347     /* Cleanup yadif specific buffers */
1348     int i;
1349     for( i = 0; i<3*3; i++ )
1350     {
1351         uint8_t **p = &pv->ref[i%3][i/3];
1352         if (*p)
1353         {
1354             free( *p - 3*pv->ref_stride[i/3] );
1355             *p = NULL;
1356         }
1357     }
1358     
1359     /* Cleanup combing mask. */
1360     for( i = 0; i<3*3; i++ )
1361     {
1362         uint8_t **p = &pv->mask[i/3];
1363         if (*p)
1364         {
1365             free( *p - 3*pv->ref_stride[i/3] );
1366             *p = NULL;
1367         }
1368     }
1369     
1370     for( i = 0; i < pv->cpu_count; i++)
1371     {
1372         /*
1373          * Tell each yadif thread to stop, and then cleanup.
1374          */
1375         pv->yadif_arguments[i].stop = 1;
1376         hb_unlock(  pv->yadif_begin_lock[i] );
1377
1378         hb_thread_close( &pv->yadif_threads[i] );
1379         hb_lock_close( &pv->yadif_begin_lock[i] );
1380         hb_lock_close( &pv->yadif_complete_lock[i] );
1381     }
1382     
1383     /*
1384      * free memory for yadif structs
1385      */
1386     free( pv->yadif_threads );
1387     free( pv->yadif_begin_lock );
1388     free( pv->yadif_complete_lock );
1389     free( pv->yadif_arguments );
1390     
1391     for( i = 0; i < pv->cpu_count; i++)
1392     {
1393         /*
1394          * Tell each decomb thread to stop, and then cleanup.
1395          */
1396         pv->decomb_arguments[i].stop = 1;
1397         hb_unlock(  pv->decomb_begin_lock[i] );
1398
1399         hb_thread_close( &pv->decomb_threads[i] );
1400         hb_lock_close( &pv->decomb_begin_lock[i] );
1401         hb_lock_close( &pv->decomb_complete_lock[i] );
1402     }
1403     
1404     /*
1405      * free memory for decomb structs
1406      */
1407     free( pv->decomb_threads );
1408     free( pv->decomb_begin_lock );
1409     free( pv->decomb_complete_lock );
1410     free( pv->decomb_arguments );
1411     
1412     /* Cleanup mcdeint specific buffers */
1413     if( pv->mcdeint_mode >= 0 )
1414     {
1415         if( pv->mcdeint_avctx_enc )
1416         {
1417             avcodec_close( pv->mcdeint_avctx_enc );
1418             av_freep( &pv->mcdeint_avctx_enc );
1419         }
1420         if( pv->mcdeint_outbuf )
1421         {
1422             free( pv->mcdeint_outbuf );
1423         }
1424     }
1425
1426     free( pv );
1427 }
1428
1429 int hb_decomb_work( const hb_buffer_t * cbuf_in,
1430                     hb_buffer_t ** buf_out,
1431                     int pix_fmt,
1432                     int width,
1433                     int height,
1434                     hb_filter_private_t * pv )
1435 {
1436     hb_buffer_t * buf_in = (hb_buffer_t *)cbuf_in;
1437
1438     if( !pv ||
1439         pix_fmt != pv->pix_fmt ||
1440         width   != pv->width[0] ||
1441         height  != pv->height[0] )
1442     {
1443         return FILTER_FAILED;
1444     }
1445
1446     avpicture_fill( &pv->pic_in, buf_in->data,
1447                     pix_fmt, width, height );
1448
1449     /* Determine if top-field first layout */
1450     int tff;
1451     if( pv->parity < 0 )
1452     {
1453         tff = !!(buf_in->flags & PIC_FLAG_TOP_FIELD_FIRST);
1454     }
1455     else
1456     {
1457         tff = (pv->parity & 1) ^ 1;
1458     }
1459
1460     /* Store current frame in yadif cache */
1461     store_ref( (const uint8_t**)pv->pic_in.data, pv );
1462
1463     /* If yadif is not ready, store another ref and return FILTER_DELAY */
1464     if( pv->yadif_ready == 0 )
1465     {
1466         store_ref( (const uint8_t**)pv->pic_in.data, pv );
1467
1468         hb_buffer_copy_settings( pv->buf_settings, buf_in );
1469
1470         /* don't let 'work_loop' send a chapter mark upstream */
1471         buf_in->new_chap  = 0;
1472
1473         pv->yadif_ready = 1;
1474
1475         return FILTER_DELAY;
1476     }
1477
1478     /* Perform yadif filtering */        
1479     int frame;
1480     for( frame = 0; frame <= ( ( pv->mode == 2 || pv->mode == 3 )? 1 : 0 ) ; frame++ )
1481     {
1482         int parity = frame ^ tff ^ 1;
1483
1484         avpicture_fill( &pv->pic_out, pv->buf_out[!(frame^1)]->data,
1485                         pix_fmt, width, height );
1486
1487         yadif_filter( pv->pic_out.data, parity, tff, pv );
1488
1489         if( pv->mcdeint_mode >= 0 )
1490         {
1491             /* Perform mcdeint filtering */
1492             avpicture_fill( &pv->pic_in,  pv->buf_out[(frame^1)]->data,
1493                             pix_fmt, width, height );
1494
1495             mcdeint_filter( pv->pic_in.data, pv->pic_out.data, parity, pv );
1496         }
1497
1498         *buf_out = pv->buf_out[!(frame^1)];
1499     }
1500
1501     /* Copy buffered settings to output buffer settings */
1502     hb_buffer_copy_settings( *buf_out, pv->buf_settings );
1503
1504     /* Replace buffered settings with input buffer settings */
1505     hb_buffer_copy_settings( pv->buf_settings, buf_in );
1506
1507     /* don't let 'work_loop' send a chapter mark upstream */
1508     buf_in->new_chap  = 0;
1509
1510     return FILTER_OK;
1511 }