OSDN Git Service

Fixes a missing and misplace option in decomb's comments and in-line CLI help. Thanks...
[handbrake-jp/handbrake-jp-git.git] / libhb / decomb.c
1 /* $Id: decomb.c,v 1.14 2008/04/25 5:00:00 jbrjake Exp $
2
3    This file is part of the HandBrake source code.
4    Homepage: <http://handbrake.fr/>.
5    It may be used under the terms of the GNU General Public License. 
6    
7    The yadif algorithm was created by Michael Niedermayer.
8    Tritical's work inspired much of the comb detection code:
9    http://web.missouri.edu/~kes25c/
10 */
11
12 /*****
13 Parameters:
14     Mode : Spatial metric : Motion thresh : Spatial thresh : Block thresh :
15     Block width : Block height
16
17 Appended for EEDI2:
18     Magnitude thresh : Variance thresh : Laplacian thresh : Dilation thresh :
19     Erosion thresh : Noise thresh : Max search distance : Post-processing
20
21 Plus:
22     Parity
23     
24 Defaults:
25     7:2:6:9:80:16:16:10:20:20:4:2:50:24:1:-1
26 *****/
27
28 #define MODE_YADIF       1 // Use yadif
29 #define MODE_BLEND       2 // Use blending interpolation
30 #define MODE_CUBIC       4 // Use cubic interpolation
31 #define MODE_EEDI2       8 // Use EEDI2 interpolation
32 #define MODE_MCDEINT    16 // Post-process with mcdeint
33 #define MODE_MASK       32 // Output combing masks instead of pictures
34
35 /***** 
36 These modes can be layered. For example, Yadif (1) + EEDI2 (8) = 9,
37 which will feed EEDI2 interpolations to yadif.
38
39 ** Working combos:
40  1: Just yadif
41  2: Just blend
42  3: Switch between yadif and blend
43  4: Just cubic interpolate
44  5: Cubic->yadif
45  6: Switch between cubic and blend
46  7: Switch between cubic->yadif and blend
47  8: Just EEDI2 interpolate
48  9: EEDI2->yadif
49 10: Switch between EEDI2 and blend
50 11: Switch between EEDI2->yadif and blend
51 17: Yadif->mcdeint
52 18: Blend->mcdeint
53 19: Switch between blending and yadif -> mcdeint
54 20: Cubic->mdeint
55 21: Cubic->yadif->mcdeint
56 22: Cubic or blend -> mcdeint
57 23: Cubic->yadif or blend -> mcdeint
58 24: EEDI2->mcdeint
59 25: EEDI2->yadif->mcdeint
60 ...okay I'm getting bored now listing all these different modes
61 32: Passes through the combing mask for every combed frame (white for combed pixels, otherwise black)
62 33+: Overlay the combing mask for every combed frame on top of the filtered output (white for combed pixels)
63
64 12-15: EEDI2 will override cubic interpolation
65 16: DOES NOT WORK BY ITSELF-- mcdeint needs to be fed by another deinterlacer
66 *****/
67
68 #include "hb.h"
69 #include "hbffmpeg.h"
70 #include "mpeg2dec/mpeg2.h"
71 #include "eedi2.h"
72
73 #define SUPPRESS_AV_LOG
74
75 #define PARITY_DEFAULT   -1
76
77 #define MCDEINT_MODE_DEFAULT   -1
78 #define MCDEINT_QP_DEFAULT      1
79
80 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
81 #define MIN3(a,b,c) MIN(MIN(a,b),c)
82 #define MAX3(a,b,c) MAX(MAX(a,b),c)
83
84 // Some names to correspond to the pv->eedi_half array's contents
85 #define SRCPF 0
86 #define MSKPF 1
87 #define TMPPF 2
88 #define DSTPF 3
89 // Some names to correspond to the pv->eedi_full array's contents
90 #define DST2PF 0
91 #define TMP2PF2 1
92 #define MSK2PF 2
93 #define TMP2PF 3
94 #define DST2MPF 4
95
96 struct yadif_arguments_s {
97     uint8_t **dst;
98     int parity;
99     int tff;
100     int stop;
101     int is_combed;
102 };
103
104 struct decomb_arguments_s {
105     int stop;
106 };
107
108 struct eedi2_arguments_s {
109     int stop;
110 };
111
112 typedef struct yadif_arguments_s yadif_arguments_t;
113 typedef struct decomb_arguments_s decomb_arguments_t;
114 typedef struct eedi2_arguments_s eedi2_arguments_t;
115
116 typedef struct eedi2_thread_arg_s {
117     hb_filter_private_t *pv;
118     int plane;
119 } eedi2_thread_arg_t;
120
121 typedef struct decomb_thread_arg_s {
122     hb_filter_private_t *pv;
123     int segment;
124 } decomb_thread_arg_t;
125
126 typedef struct yadif_thread_arg_s {
127     hb_filter_private_t *pv;
128     int segment;
129 } yadif_thread_arg_t;
130
131 struct hb_filter_private_s
132 {
133     int              pix_fmt;
134     int              width[3];
135     int              height[3];
136
137     // Decomb parameters
138     int              mode;
139     int              spatial_metric;
140     int              motion_threshold;
141     int              spatial_threshold;
142     int              block_threshold;
143     int              block_width;
144     int              block_height;
145     
146     // EEDI2 parameters
147     int              magnitude_threshold;
148     int              variance_threshold;
149     int              laplacian_threshold;
150     int              dilation_threshold;
151     int              erosion_threshold;
152     int              noise_threshold;
153     int              maximum_search_distance;
154     int              post_processing;
155
156     int              parity;
157     int              tff;
158     
159     int              yadif_ready;
160
161     int              mcdeint_mode;
162     int              mcdeint_qp;
163
164     int              mcdeint_outbuf_size;
165     uint8_t        * mcdeint_outbuf;
166     AVCodecContext * mcdeint_avctx_enc;
167     AVFrame        * mcdeint_frame;
168     AVFrame        * mcdeint_frame_dec;
169
170     int              deinterlaced_frames;
171     int              blended_frames;
172     int              unfiltered_frames;
173
174     uint8_t        * ref[4][3];
175     int              ref_stride[3];
176
177     /* Make a buffer to store a comb mask. */
178     uint8_t        * mask[3];
179
180     uint8_t        * eedi_half[4][3];
181     uint8_t        * eedi_full[5][3];
182     int            * cx2;
183     int            * cy2;
184     int            * cxy;
185     int            * tmpc;
186     
187     AVPicture        pic_in;
188     AVPicture        pic_out;
189     hb_buffer_t *    buf_out[2];
190     hb_buffer_t *    buf_settings;
191     
192     int              cpu_count;
193
194     hb_thread_t    ** yadif_threads;         // Threads for Yadif - one per CPU
195     hb_lock_t      ** yadif_begin_lock;      // Thread has work
196     hb_lock_t      ** yadif_complete_lock;   // Thread has completed work
197     yadif_arguments_t *yadif_arguments;      // Arguments to thread for work
198     
199     hb_thread_t    ** decomb_threads;        // Threads for comb detection - one per CPU
200     hb_lock_t      ** decomb_begin_lock;     // Thread has work
201     hb_lock_t      ** decomb_complete_lock;  // Thread has completed work
202     decomb_arguments_t *decomb_arguments;    // Arguments to thread for work
203
204     hb_thread_t    ** eedi2_threads;        // Threads for eedi2 - one per plane
205     hb_lock_t      ** eedi2_begin_lock;     // Thread has work
206     hb_lock_t      ** eedi2_complete_lock;  // Thread has completed work
207     eedi2_arguments_t *eedi2_arguments;    // Arguments to thread for work
208
209 //    int              alternator;           // for bobbing parity when framedoubling
210 };
211
212 hb_filter_private_t * hb_decomb_init( int pix_fmt,
213                                            int width,
214                                            int height,
215                                            char * settings );
216
217 int hb_decomb_work(      const hb_buffer_t * buf_in,
218                          hb_buffer_t ** buf_out,
219                          int pix_fmt,
220                          int width,
221                          int height,
222                          hb_filter_private_t * pv );
223
224 void hb_decomb_close( hb_filter_private_t * pv );
225
226 hb_filter_object_t hb_filter_decomb =
227 {
228     FILTER_DECOMB,
229     "Decomb",
230     NULL,
231     hb_decomb_init,
232     hb_decomb_work,
233     hb_decomb_close,
234 };
235
236 int cubic_interpolate_pixel( int y0, int y1, int y2, int y3 )
237 {
238     /* From http://www.neuron2.net/library/cubicinterp.html */
239     int result = ( y0 * -3 ) + ( y1 * 23 ) + ( y2 * 23 ) + ( y3 * -3 );
240     result /= 40;
241     
242     if( result > 255 )
243     {
244         result = 255;
245     }
246     else if( result < 0 )
247     {
248         result = 0;
249     }
250     
251     return result;
252 }
253
254 static void cubic_interpolate_line( uint8_t *dst,
255                                uint8_t *cur,
256                                int plane,
257                                int y,
258                                hb_filter_private_t * pv )
259 {
260     int w = pv->width[plane];
261     int refs = pv->ref_stride[plane];
262     int x;
263
264     for( x = 0; x < w; x++)
265     {
266         int a, b, c, d;
267         a = b = c = d = 0;
268         
269         if( y >= 3 )
270         {
271             /* Normal top*/
272             a = cur[-3*refs];
273             b = cur[-refs];
274         }
275         else if( y == 2 || y == 1 )
276         {
277             /* There's only one sample above this pixel, use it twice. */
278             a = cur[-refs];
279             b = cur[-refs];
280         }
281         else if( y == 0 )
282         {
283             /* No samples above, triple up on the one below. */
284             a = cur[+refs];
285             b = cur[+refs];
286         }
287         
288         if( y <= ( pv->height[plane] - 4 ) )
289         {
290             /* Normal bottom*/
291             c = cur[+refs];
292             d = cur[3*refs];            
293         }
294         else if( y == ( pv->height[plane] - 3 ) || y == ( pv->height[plane] - 2 ) )
295         {
296             /* There's only one sample below, use it twice. */
297             c = cur[+refs];
298             d = cur[+refs];
299         }
300         else if( y == pv->height[plane] - 1)
301         {
302             /* No samples below, triple up on the one above. */
303             c = cur[-refs];
304             d = cur[-refs];
305         }
306         
307         dst[0] = cubic_interpolate_pixel( a, b, c, d );
308         
309         dst++;
310         cur++;
311     }
312 }
313
314 void apply_mask_line( uint8_t * srcp,
315                       uint8_t * mskp,
316                       int width )
317 {
318     int x;
319     
320     for( x = 0; x < width; x++ )
321     {
322         if( mskp[x] == 255 )
323         {
324             srcp[x] = 255;
325         }
326     }
327 }
328
329 void apply_mask( hb_filter_private_t * pv )
330 {
331     int plane, height;
332     
333     for( plane = 0; plane < 3; plane++ )
334     {
335         uint8_t * srcp = ( pv->mode & MODE_MCDEINT ) ? pv->pic_in.data[plane] : pv->pic_out.data[plane];
336         uint8_t * mskp = pv->mask[plane];
337         
338         for( height = 0; height < pv->height[plane]; height++ )
339         {
340             if( pv->mode == MODE_MASK && plane == 0 )
341             {
342                 memcpy( srcp, mskp, pv->width[plane] );
343             }
344             else if( pv->mode == MODE_MASK )
345             {
346                 memset( srcp, 128, pv->width[plane] );
347             }
348             else if( plane == 0 )
349             {
350                 apply_mask_line( srcp, mskp, pv->width[plane] );
351             }
352
353             srcp += pv->pic_out.linesize[plane];
354             mskp += pv->ref_stride[plane];
355         }
356     }
357 }
358
359 static void store_ref( const uint8_t ** pic,
360                              hb_filter_private_t * pv )
361 {
362     memcpy( pv->ref[3],
363             pv->ref[0],
364             sizeof(uint8_t *)*3 );
365
366     memmove( pv->ref[0],
367              pv->ref[1],
368              sizeof(uint8_t *)*3*3 );
369
370     int i;
371     for( i = 0; i < 3; i++ )
372     {
373         const uint8_t * src = pic[i];
374         uint8_t * ref = pv->ref[2][i];
375
376         int w = pv->width[i];
377         int h = pv->height[i];
378         int ref_stride = pv->ref_stride[i];
379
380         int y;
381         for( y = 0; y < h; y++ )
382         {
383             memcpy(ref, src, w);
384             src = (uint8_t*)src + w;
385             ref = (uint8_t*)ref + ref_stride;
386         }
387     }
388 }
389
390 /* This function may be useful in the future, if we want to output
391    a reference to an AVPicture, since they have different strides.
392 static void get_ref( uint8_t ** pic, hb_filter_private_t * pv, int frm )
393 {
394     int i;
395     for( i = 0; i < 3; i++ )
396     {
397         uint8_t * dst = pic[i];
398         const uint8_t * ref = pv->ref[frm][i];
399         int w = pv->width[i];
400         int ref_stride = pv->ref_stride[i];
401         
402         int y;
403         for( y = 0; y < pv->height[i]; y++ )
404         {
405             memcpy(dst, ref, w);
406             dst += w;
407             ref += ref_stride;
408         }
409     }
410 }
411 */
412
413 int blend_filter_pixel( int up2, int up1, int current, int down1, int down2 )
414 {
415     /* Low-pass 5-tap filter */
416     int result = 0;
417     result += -up2;
418     result += up1 * 2;
419     result += current * 6;
420     result += down1 *2;
421     result += -down2;
422     result /= 8;
423
424     if( result > 255 )
425     {
426         result = 255;
427     }
428     if( result < 0 )
429     {
430         result = 0;
431     }
432     
433     return result;
434 }
435
436 static void blend_filter_line( uint8_t *dst,
437                                uint8_t *cur,
438                                int plane,
439                                int y,
440                                hb_filter_private_t * pv )
441 {
442     int w = pv->width[plane];
443     int refs = pv->ref_stride[plane];
444     int x;
445
446     for( x = 0; x < w; x++)
447     {
448         int a, b, c, d, e;
449         
450         a = cur[-2*refs];
451         b = cur[-refs];
452         c = cur[0];
453         d = cur[+refs];
454         e = cur[2*refs];
455         
456         if( y == 0 )
457         {
458             /* First line, so A and B don't exist.*/
459             a = cur[0];
460             b = cur[0];
461         }
462         else if( y == 1 )
463         {
464             /* Second line, no A. */
465             a = cur[-refs];
466         }
467         else if( y == (pv->height[plane] - 2) )
468         {
469             /* Second to last line, no E. */
470             e = cur[+refs];
471         }
472         else if( y == (pv->height[plane] -1) )
473         {
474             /* Last line, no D or E. */
475             d = cur[0];
476             e = cur[0];
477         }
478                 
479         dst[0] = blend_filter_pixel( a, b, c, d, e );
480
481         dst++;
482         cur++;
483     }
484 }
485
486 int check_combing_mask( hb_filter_private_t * pv )
487 {
488     /* Go through the mask in X*Y blocks. If any of these windows
489        have threshold or more combed pixels, consider the whole
490        frame to be combed and send it on to be deinterlaced.     */
491
492     /* Block mask threshold -- The number of pixels
493        in a block_width * block_height window of
494        he mask that need to show combing for the
495        whole frame to be seen as such.            */
496     int threshold       = pv->block_threshold;
497     int block_width     = pv->block_width;
498     int block_height    = pv->block_height;
499     int block_x, block_y;
500     int block_score = 0; int send_to_blend = 0;
501     
502     int x, y, k;
503
504     for( k = 0; k < 1; k++ )
505     {
506         int ref_stride = pv->ref_stride[k];
507         for( y = 0; y < ( pv->height[k] - block_height ); y = y + block_height )
508         {
509             for( x = 0; x < ( pv->width[k] - block_width ); x = x + block_width )
510             {
511                 block_score = 0;
512                 for( block_y = 0; block_y < block_height; block_y++ )
513                 {
514                     for( block_x = 0; block_x < block_width; block_x++ )
515                     {
516                         int mask_y = y + block_y;
517                         int mask_x = x + block_x;
518                         
519                         /* We only want to mark a pixel in a block as combed
520                            if the pixels above and below are as well. Got to
521                            handle the top and bottom lines separately.       */
522                         if( y + block_y == 0 )
523                         {
524                             if( pv->mask[k][mask_y*ref_stride+mask_x    ] == 255 &&
525                                 pv->mask[k][mask_y*ref_stride+mask_x + 1] == 255 )
526                                     block_score++;
527                         }
528                         else if( y + block_y == pv->height[k] - 1 )
529                         {
530                             if( pv->mask[k][mask_y*ref_stride+mask_x - 1] == 255 &&
531                                 pv->mask[k][mask_y*ref_stride+mask_x    ] == 255 )
532                                     block_score++;
533                         }
534                         else
535                         {
536                             if( pv->mask[k][mask_y*ref_stride+mask_x - 1] == 255 &&
537                                 pv->mask[k][mask_y*ref_stride+mask_x    ] == 255 &&
538                                 pv->mask[k][mask_y*ref_stride+mask_x + 1] == 255 )
539                                     block_score++;
540                         } 
541                     }
542                 }
543
544                 if( block_score >= ( threshold / 2 ) )
545                 {
546 #if 0
547                     hb_log("decomb: frame %i | score %i | type %s", pv->deinterlaced_frames + pv->blended_frames +  pv->unfiltered_frames + 1, block_score, pv->buf_settings->flags & 16 ? "Film" : "Video");
548 #endif
549                     if ( block_score <= threshold && !( pv->buf_settings->flags & 16) )
550                     {
551                         /* Blend video content that scores between
552                            ( threshold / 2 ) and threshold.        */
553                         send_to_blend = 1;
554                     }
555                     else if( block_score > threshold )
556                     {
557                         if( pv->buf_settings->flags & 16 )
558                         {
559                             /* Blend progressive content above the threshold.*/
560                             return 2;
561                         }
562                         else
563                         {
564                             /* Yadif deinterlace video content above the threshold. */
565                             return 1;
566                         }
567                     }
568                 }
569             }
570         } 
571     }
572     
573     if( send_to_blend )
574     {
575         return 2;
576     }
577     else
578     {
579         /* Consider this frame to be uncombed. */
580         return 0;
581     }
582 }
583
584 void detect_combed_segment( hb_filter_private_t * pv, int segment_start, int segment_stop )
585 {
586     /* A mish-mash of various comb detection tricks
587        picked up from neuron2's Decomb plugin for
588        AviSynth and tritical's IsCombedT and
589        IsCombedTIVTC plugins.                       */
590        
591     int x, y, k, width, height;
592     
593     /* Comb scoring algorithm */
594     int spatial_metric  = pv->spatial_metric;
595     /* Motion threshold */
596     int mthresh         = pv->motion_threshold;
597     /* Spatial threshold */
598     int athresh         = pv->spatial_threshold;
599     int athresh_squared = athresh * athresh;
600     int athresh6        = 6 *athresh;
601
602     /* One pas for Y, one pass for U, one pass for V */    
603     for( k = 0; k < 1; k++ )
604     {
605         int ref_stride  = pv->ref_stride[k];
606         width           = pv->width[k];
607         height          = pv->height[k];
608         
609         /* Comb detection has to start at y = 2 and end at
610            y = height - 2, because it needs to examine
611            2 pixels above and 2 below the current pixel.      */
612         if( segment_start < 2 )
613             segment_start = 2;
614         if( segment_stop > height - 2 )
615             segment_stop = height - 2;
616             
617         for( y =  segment_start; y < segment_stop; y++ )
618         {
619             /* These are just to make the buffer locations easier to read. */
620             int back_2    = ( y - 2 )*ref_stride ;
621             int back_1    = ( y - 1 )*ref_stride;
622             int current   =         y*ref_stride;
623             int forward_1 = ( y + 1 )*ref_stride;
624             int forward_2 = ( y + 2 )*ref_stride;
625             
626             /* We need to examine a column of 5 pixels
627                in the prev, cur, and next frames.      */
628             uint8_t previous_frame[5];
629             uint8_t current_frame[5];
630             uint8_t next_frame[5];
631             
632             for( x = 0; x < width; x++ )
633             {
634                 /* Fill up the current frame array with the current pixel values.*/
635                 current_frame[0] = pv->ref[1][k][back_2    + x];
636                 current_frame[1] = pv->ref[1][k][back_1    + x];
637                 current_frame[2] = pv->ref[1][k][current   + x];
638                 current_frame[3] = pv->ref[1][k][forward_1 + x];
639                 current_frame[4] = pv->ref[1][k][forward_2 + x];
640
641                 int up_diff   = current_frame[2] - current_frame[1];
642                 int down_diff = current_frame[2] - current_frame[3];
643
644                 if( ( up_diff >  athresh && down_diff >  athresh ) ||
645                     ( up_diff < -athresh && down_diff < -athresh ) )
646                 {
647                     /* The pixel above and below are different,
648                        and they change in the same "direction" too.*/
649                     int motion = 0;
650                     if( mthresh > 0 )
651                     {
652                         /* Make sure there's sufficient motion between frame t-1 to frame t+1. */
653                         previous_frame[0] = pv->ref[0][k][back_2    + x];
654                         previous_frame[1] = pv->ref[0][k][back_1    + x];
655                         previous_frame[2] = pv->ref[0][k][current   + x];
656                         previous_frame[3] = pv->ref[0][k][forward_1 + x];
657                         previous_frame[4] = pv->ref[0][k][forward_2 + x];
658                         next_frame[0]     = pv->ref[2][k][back_2    + x];
659                         next_frame[1]     = pv->ref[2][k][back_1    + x];
660                         next_frame[2]     = pv->ref[2][k][current   + x];
661                         next_frame[3]     = pv->ref[2][k][forward_1 + x];
662                         next_frame[4]     = pv->ref[2][k][forward_2 + x];
663                         
664                         if( abs( previous_frame[2] - current_frame[2] ) > mthresh &&
665                             abs(  current_frame[1] - next_frame[1]    ) > mthresh &&
666                             abs(  current_frame[3] - next_frame[3]    ) > mthresh )
667                                 motion++;
668                         if( abs(     next_frame[2] - current_frame[2] ) > mthresh &&
669                             abs( previous_frame[1] - current_frame[1] ) > mthresh &&
670                             abs( previous_frame[3] - current_frame[3] ) > mthresh )
671                                 motion++;
672                     }
673                     else
674                     {
675                         /* User doesn't want to check for motion,
676                            so move on to the spatial check.       */
677                         motion = 1;
678                     }
679                            
680                     if( motion || ( pv->deinterlaced_frames==0 && pv->blended_frames==0 && pv->unfiltered_frames==0) )
681                     {
682                            /* That means it's time for the spatial check.
683                               We've got several options here.             */
684                         if( spatial_metric == 0 )
685                         {
686                             /* Simple 32detect style comb detection */
687                             if( ( abs( current_frame[2] - current_frame[4] ) < 10  ) &&
688                                 ( abs( current_frame[2] - current_frame[3] ) > 15 ) )
689                             {
690                                 pv->mask[k][y*ref_stride + x] = 255;
691                             }
692                             else
693                             {
694                                 pv->mask[k][y*ref_stride + x] = 0;
695                             }
696                         }
697                         else if( spatial_metric == 1 )
698                         {
699                             /* This, for comparison, is what IsCombed uses.
700                                It's better, but still noise senstive.      */
701                                int combing = ( current_frame[1] - current_frame[2] ) *
702                                              ( current_frame[3] - current_frame[2] );
703                                
704                                if( combing > athresh_squared )
705                                    pv->mask[k][y*ref_stride + x] = 255; 
706                                else
707                                    pv->mask[k][y*ref_stride + x] = 0;
708                         }
709                         else if( spatial_metric == 2 )
710                         {
711                             /* Tritical's noise-resistant combing scorer.
712                                The check is done on a bob+blur convolution. */
713                             int combing = abs( current_frame[0]
714                                              + ( 4 * current_frame[2] )
715                                              + current_frame[4]
716                                              - ( 3 * ( current_frame[1]
717                                                      + current_frame[3] ) ) );
718
719                             /* If the frame is sufficiently combed,
720                                then mark it down on the mask as 255. */
721                             if( combing > athresh6 )
722                                 pv->mask[k][y*ref_stride + x] = 255; 
723                             else
724                                 pv->mask[k][y*ref_stride + x] = 0;
725                         }
726                     }
727                     else
728                     {
729                         pv->mask[k][y*ref_stride + x] = 0;
730                     }
731                 }
732                 else
733                 {
734                     pv->mask[k][y*ref_stride + x] = 0;
735                 }
736             }
737         }
738     }
739 }
740
741 // This function calls all the eedi2 filters in sequence for a given plane.
742 // It outputs the final interpolated image to pv->eedi_full[DST2PF].
743 void eedi2_interpolate_plane( hb_filter_private_t * pv, int k )
744 {
745     /* We need all these pointers. No, seriously.
746        I swear. It's not a joke. They're used.
747        All nine of them.                         */
748     uint8_t * mskp = pv->eedi_half[MSKPF][k];
749     uint8_t * srcp = pv->eedi_half[SRCPF][k];
750     uint8_t * tmpp = pv->eedi_half[TMPPF][k];
751     uint8_t * dstp = pv->eedi_half[DSTPF][k];
752     uint8_t * dst2p = pv->eedi_full[DST2PF][k];
753     uint8_t * tmp2p2 = pv->eedi_full[TMP2PF2][k];
754     uint8_t * msk2p = pv->eedi_full[MSK2PF][k];
755     uint8_t * tmp2p = pv->eedi_full[TMP2PF][k];
756     uint8_t * dst2mp = pv->eedi_full[DST2MPF][k];
757     int * cx2 = pv->cx2;
758     int * cy2 = pv->cy2;
759     int * cxy = pv->cxy;
760     int * tmpc = pv->tmpc;
761
762     int pitch = pv->ref_stride[k];
763     int height = pv->height[k]; int width = pv->width[k];
764     int half_height = height / 2;
765
766     // edge mask
767     eedi2_build_edge_mask( mskp, pitch, srcp, pitch,
768                      pv->magnitude_threshold, pv->variance_threshold, pv->laplacian_threshold, 
769                      half_height, width );
770     eedi2_erode_edge_mask( mskp, pitch, tmpp, pitch, pv->erosion_threshold, half_height, width );
771     eedi2_dilate_edge_mask( tmpp, pitch, mskp, pitch, pv->dilation_threshold, half_height, width );
772     eedi2_erode_edge_mask( mskp, pitch, tmpp, pitch, pv->erosion_threshold, half_height, width );
773     eedi2_remove_small_gaps( tmpp, pitch, mskp, pitch, half_height, width );
774
775     // direction mask
776     eedi2_calc_directions( k, mskp, pitch, srcp, pitch, tmpp, pitch,
777                      pv->maximum_search_distance, pv->noise_threshold,
778                      half_height, width );
779     eedi2_filter_dir_map( mskp, pitch, tmpp, pitch, dstp, pitch, half_height, width );
780     eedi2_expand_dir_map( mskp, pitch, dstp, pitch, tmpp, pitch, half_height, width );
781     eedi2_filter_map( mskp, pitch, tmpp, pitch, dstp, pitch, half_height, width );
782
783     // upscale 2x vertically
784     eedi2_upscale_by_2( srcp, dst2p, half_height, pitch );
785     eedi2_upscale_by_2( dstp, tmp2p2, half_height, pitch );
786     eedi2_upscale_by_2( mskp, msk2p, half_height, pitch );
787
788     // upscale the direction mask
789     eedi2_mark_directions_2x( msk2p, pitch, tmp2p2, pitch, tmp2p, pitch, pv->tff, height, width );
790     eedi2_filter_dir_map_2x( msk2p, pitch, tmp2p, pitch,  dst2mp, pitch, pv->tff, height, width );
791     eedi2_expand_dir_map_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width );
792     eedi2_fill_gaps_2x( msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width );
793     eedi2_fill_gaps_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width );
794
795     // interpolate a full-size plane
796     eedi2_interpolate_lattice( k, tmp2p, pitch, dst2p, pitch, tmp2p2, pitch, pv->tff,
797                          pv->noise_threshold, height, width );
798
799     if( pv->post_processing == 1 || pv->post_processing == 3 )
800     {
801         // make sure the edge directions are consistent
802         eedi2_bit_blit( tmp2p2, pitch, tmp2p, pitch, pv->width[k], pv->height[k] );
803         eedi2_filter_dir_map_2x( msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width );
804         eedi2_expand_dir_map_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width );
805         eedi2_post_process( tmp2p, pitch, tmp2p2, pitch, dst2p, pitch, pv->tff, height, width );
806     }
807     if( pv->post_processing == 2 || pv->post_processing == 3 )
808     {
809         // filter junctions and corners
810         eedi2_gaussian_blur1( srcp, pitch, tmpp, pitch, srcp, pitch, half_height, width );
811         eedi2_calc_derivatives( srcp, pitch, half_height, width, cx2, cy2, cxy );
812         eedi2_gaussian_blur_sqrt2( cx2, tmpc, cx2, pitch, half_height, width);
813         eedi2_gaussian_blur_sqrt2( cy2, tmpc, cy2, pitch, half_height, width);
814         eedi2_gaussian_blur_sqrt2( cxy, tmpc, cxy, pitch, half_height, width);
815         eedi2_post_process_corner( cx2, cy2, cxy, pitch, tmp2p2, pitch, dst2p, pitch, height, width, pv->tff );
816     }
817 }
818
819 /*
820  *  eedi2 interpolate this plane in a single thread.
821  */
822 void eedi2_filter_thread( void *thread_args_v )
823 {
824     eedi2_arguments_t *eedi2_work = NULL;
825     hb_filter_private_t * pv;
826     int run = 1;
827     int plane;
828     eedi2_thread_arg_t *thread_args = thread_args_v;
829
830     pv = thread_args->pv;
831     plane = thread_args->plane;
832
833     hb_log("eedi2 thread started for plane %d", plane);
834
835     while( run )
836     {
837         /*
838          * Wait here until there is work to do. hb_lock() blocks until
839          * render releases it to say that there is more work to do.
840          */
841         hb_lock( pv->eedi2_begin_lock[plane] );
842
843         eedi2_work = &pv->eedi2_arguments[plane];
844
845         if( eedi2_work->stop )
846         {
847             /*
848              * No more work to do, exit this thread.
849              */
850             run = 0;
851             continue;
852         } 
853
854         /*
855          * Process plane
856          */
857             eedi2_interpolate_plane( pv, plane );
858         
859         /*
860          * Finished this segment, let everyone know.
861          */
862         hb_unlock( pv->eedi2_complete_lock[plane] );
863     }
864     free( thread_args_v );
865 }
866
867 // Sets up the input field planes for EEDI2 in pv->eedi_half[SRCPF]
868 // and then runs eedi2_filter_thread for each plane.
869 void eedi2_planer( hb_filter_private_t * pv )
870 {
871     /* Copy the first field from the source to a half-height frame. */
872     int i;
873     for( i = 0;  i < 3; i++ )
874     {
875         int pitch = pv->ref_stride[i];
876         int start_line = !pv->tff;
877         eedi2_fill_half_height_buffer_plane( &pv->ref[1][i][pitch*start_line], pv->eedi_half[SRCPF][i], pitch, pv->height[i] );
878     }
879     
880     int plane;
881     for( plane = 0; plane < 3; plane++ )
882     {  
883         /*
884          * Let the thread for this plane know that we've setup work 
885          * for it by releasing the begin lock (ensuring that the
886          * complete lock is already locked so that we block when
887          * we try to lock it again below).
888          */
889         hb_lock( pv->eedi2_complete_lock[plane] );
890         hb_unlock( pv->eedi2_begin_lock[plane] );
891     }
892
893     /*
894      * Wait until all three threads have completed by trying to get
895      * the complete lock that we locked earlier for each thread, which
896      * will block until that thread has completed the work on that
897      * plane.
898      */
899     for( plane = 0; plane < 3; plane++ )
900     {
901         hb_lock( pv->eedi2_complete_lock[plane] );
902         hb_unlock( pv->eedi2_complete_lock[plane] );
903     }
904 }
905
906
907 /*
908  * comb detect this segment of all three planes in a single thread.
909  */
910 void decomb_filter_thread( void *thread_args_v )
911 {
912     decomb_arguments_t *decomb_work = NULL;
913     hb_filter_private_t * pv;
914     int run = 1;
915     int segment, segment_start, segment_stop, plane;
916     decomb_thread_arg_t *thread_args = thread_args_v;
917
918     pv = thread_args->pv;
919     segment = thread_args->segment;
920
921     hb_log("decomb thread started for segment %d", segment);
922
923     while( run )
924     {
925         /*
926          * Wait here until there is work to do. hb_lock() blocks until
927          * render releases it to say that there is more work to do.
928          */
929         hb_lock( pv->decomb_begin_lock[segment] );
930
931         decomb_work = &pv->decomb_arguments[segment];
932
933         if( decomb_work->stop )
934         {
935             /*
936              * No more work to do, exit this thread.
937              */
938             run = 0;
939             continue;
940         } 
941
942         /*
943          * Process segment (for now just from luma)
944          */
945         for( plane = 0; plane < 1; plane++)
946         {
947
948             int h = pv->height[plane];
949             segment_start = ( h / pv->cpu_count ) * segment;
950             if( segment == pv->cpu_count - 1 )
951             {
952                 /*
953                  * Final segment
954                  */
955                 segment_stop = h;
956             } else {
957                 segment_stop = ( h / pv->cpu_count ) * ( segment + 1 );
958             }
959             
960             detect_combed_segment( pv, segment_start, segment_stop );
961         }
962         /*
963          * Finished this segment, let everyone know.
964          */
965         hb_unlock( pv->decomb_complete_lock[segment] );
966     }
967     free( thread_args_v );
968 }
969
970 int comb_segmenter( hb_filter_private_t * pv )
971 {
972     int segment;
973
974     for( segment = 0; segment < pv->cpu_count; segment++ )
975     {  
976         /*
977          * Let the thread for this plane know that we've setup work 
978          * for it by releasing the begin lock (ensuring that the
979          * complete lock is already locked so that we block when
980          * we try to lock it again below).
981          */
982         hb_lock( pv->decomb_complete_lock[segment] );
983         hb_unlock( pv->decomb_begin_lock[segment] );
984     }
985
986     /*
987      * Wait until all three threads have completed by trying to get
988      * the complete lock that we locked earlier for each thread, which
989      * will block until that thread has completed the work on that
990      * plane.
991      */
992     for( segment = 0; segment < pv->cpu_count; segment++ )
993     {
994         hb_lock( pv->decomb_complete_lock[segment] );
995         hb_unlock( pv->decomb_complete_lock[segment] );
996     }
997     
998     return check_combing_mask( pv );
999 }
1000
1001 static void yadif_filter_line( uint8_t *dst,
1002                                uint8_t *prev,
1003                                uint8_t *cur,
1004                                uint8_t *next,
1005                                int plane,
1006                                int parity,
1007                                int y,
1008                                hb_filter_private_t * pv )
1009 {
1010     /* While prev and next point to the previous and next frames,
1011        prev2 and next2 will shift depending on the parity, usually 1.
1012        They are the previous and next fields, the fields temporally adjacent
1013        to the other field in the current frame--the one not being filtered.  */
1014     uint8_t *prev2 = parity ? prev : cur ;
1015     uint8_t *next2 = parity ? cur  : next;
1016     
1017     int w = pv->width[plane];
1018     int refs = pv->ref_stride[plane];
1019     int x;
1020     int eedi2_mode = ( pv->mode & MODE_EEDI2 );
1021     
1022     /* We can replace spatial_pred with this interpolation*/
1023     uint8_t * eedi2_guess = &pv->eedi_full[DST2PF][plane][y*refs];
1024
1025     /* Decomb's cubic interpolation can only function when there are
1026        three samples above and below, so regress to yadif's traditional
1027        two-tap interpolation when filtering at the top and bottom edges. */
1028     int vertical_edge = 0;
1029     if( ( y < 3 ) || ( y > ( pv->height[plane] - 4 ) )  )
1030         vertical_edge = 1;
1031
1032     for( x = 0; x < w; x++)
1033     {
1034         /* Pixel above*/
1035         int c              = cur[-refs];
1036         /* Temporal average: the current location in the adjacent fields */
1037         int d              = (prev2[0] + next2[0])>>1;
1038         /* Pixel below */
1039         int e              = cur[+refs];
1040         
1041         /* How the current pixel changes between the adjacent fields */
1042         int temporal_diff0 = ABS(prev2[0] - next2[0]);
1043         /* The average of how much the pixels above and below change from the frame before to now. */
1044         int temporal_diff1 = ( ABS(prev[-refs] - cur[-refs]) + ABS(prev[+refs] - cur[+refs]) ) >> 1;
1045         /* The average of how much the pixels above and below change from now to the next frame. */
1046         int temporal_diff2 = ( ABS(next[-refs] - cur[-refs]) + ABS(next[+refs] - cur[+refs]) ) >> 1;
1047         /* For the actual difference, use the largest of the previous average diffs. */
1048         int diff           = MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
1049
1050         int spatial_pred;
1051         
1052         if( eedi2_mode )
1053         {
1054             /* Who needs yadif's spatial predictions when we can have EEDI2's? */
1055             spatial_pred = eedi2_guess[0];
1056             eedi2_guess++;
1057         }
1058         else // Yadif spatial interpolation
1059         {
1060             /* SAD of how the pixel-1, the pixel, and the pixel+1 change from the line above to below. */ 
1061             int spatial_score  = ABS(cur[-refs-1] - cur[+refs-1]) + ABS(cur[-refs]-cur[+refs]) +
1062                                          ABS(cur[-refs+1] - cur[+refs+1]) - 1;         
1063             
1064             /* Spatial pred is either a bilinear or cubic vertical interpolation. */
1065             if( ( pv->mode & MODE_CUBIC ) && !vertical_edge)
1066             {
1067                 spatial_pred = cubic_interpolate_pixel( cur[-3*refs], cur[-refs], cur[+refs], cur[3*refs] );
1068             }
1069             else
1070             {
1071                 spatial_pred = (c+e)>>1;
1072             }
1073
1074         /* EDDI: Edge Directed Deinterlacing Interpolation
1075            Checks 4 different slopes to see if there is more similarity along a diagonal
1076            than there was vertically. If a diagonal is more similar, then it indicates
1077            an edge, so interpolate along that instead of a vertical line, using either
1078            linear or cubic interpolation depending on mode. */
1079         #define YADIF_CHECK(j)\
1080                 {   int score = ABS(cur[-refs-1+j] - cur[+refs-1-j])\
1081                               + ABS(cur[-refs  +j] - cur[+refs  -j])\
1082                               + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\
1083                     if( score < spatial_score ){\
1084                         spatial_score = score;\
1085                         if( ( pv->mode & MODE_CUBIC ) && !vertical_edge )\
1086                         {\
1087                             switch(j)\
1088                             {\
1089                                 case -1:\
1090                                     spatial_pred = cubic_interpolate_pixel(cur[-3 * refs - 3], cur[-refs -1], cur[+refs + 1], cur[3* refs + 3] );\
1091                                 break;\
1092                                 case -2:\
1093                                     spatial_pred = cubic_interpolate_pixel( ( ( cur[-3*refs - 4] + cur[-refs - 4] ) / 2 ) , cur[-refs -2], cur[+refs + 2], ( ( cur[3*refs + 4] + cur[refs + 4] ) / 2 ) );\
1094                                 break;\
1095                                 case 1:\
1096                                     spatial_pred = cubic_interpolate_pixel(cur[-3 * refs +3], cur[-refs +1], cur[+refs - 1], cur[3* refs -3] );\
1097                                 break;\
1098                                 case 2:\
1099                                     spatial_pred = cubic_interpolate_pixel(( ( cur[-3*refs + 4] + cur[-refs + 4] ) / 2 ), cur[-refs +2], cur[+refs - 2], ( ( cur[3*refs - 4] + cur[refs - 4] ) / 2 ) );\
1100                                 break;\
1101                             }\
1102                         }\
1103                         else\
1104                         {\
1105                             spatial_pred = ( cur[-refs +j] + cur[+refs -j] ) >>1;\
1106                         }\
1107
1108                         if( x >= 2 && x <= w - 3 )
1109                         {
1110                             YADIF_CHECK(-1)
1111                             if( x >= 3 && x <= w - 4 )
1112                             {
1113                                 YADIF_CHECK(-2) }} }}
1114                             }
1115                         }
1116                         if( x >= 2 && x <= w - 3 )
1117                         {
1118                             YADIF_CHECK(1)
1119                             if( x >= 3 && x <= w - 4 )
1120                             {
1121                                 YADIF_CHECK(2) }} }}
1122                             }
1123                         }
1124         }
1125
1126         /* Temporally adjust the spatial prediction by
1127            comparing against lines in the adjacent fields. */
1128         int b = (prev2[-2*refs] + next2[-2*refs])>>1;
1129         int f = (prev2[+2*refs] + next2[+2*refs])>>1;
1130         
1131         /* Find the median value */
1132         int max = MAX3(d-e, d-c, MIN(b-c, f-e));
1133         int min = MIN3(d-e, d-c, MAX(b-c, f-e));
1134         diff = MAX3( diff, min, -max );
1135         
1136         if( spatial_pred > d + diff )
1137         {
1138             spatial_pred = d + diff;
1139         }
1140         else if( spatial_pred < d - diff )
1141         {
1142             spatial_pred = d - diff;
1143         }
1144         
1145         dst[0] = spatial_pred;
1146                         
1147         dst++;
1148         cur++;
1149         prev++;
1150         next++;
1151         prev2++;
1152         next2++;
1153     }
1154 }
1155
1156 /*
1157  * deinterlace this segment of all three planes in a single thread.
1158  */
1159 void yadif_decomb_filter_thread( void *thread_args_v )
1160 {
1161     yadif_arguments_t *yadif_work = NULL;
1162     hb_filter_private_t * pv;
1163     int run = 1;
1164     int plane;
1165     int segment, segment_start, segment_stop;
1166     yadif_thread_arg_t *thread_args = thread_args_v;
1167     uint8_t **dst;
1168     int parity, tff, y, w, h, penultimate, ultimate, ref_stride, is_combed;
1169
1170     pv = thread_args->pv;
1171     segment = thread_args->segment;
1172
1173     hb_log("yadif thread started for segment %d", segment);
1174
1175     while( run )
1176     {
1177         /*
1178          * Wait here until there is work to do. hb_lock() blocks until
1179          * render releases it to say that there is more work to do.
1180          */
1181         hb_lock( pv->yadif_begin_lock[segment] );
1182
1183         yadif_work = &pv->yadif_arguments[segment];
1184
1185         if( yadif_work->stop )
1186         {
1187             /*
1188              * No more work to do, exit this thread.
1189              */
1190             run = 0;
1191             continue;
1192         } 
1193
1194         if( yadif_work->dst == NULL )
1195         {
1196             hb_error( "thread started when no work available" );
1197             hb_snooze(500);
1198             continue;
1199         }
1200         
1201         is_combed = pv->yadif_arguments[segment].is_combed;
1202
1203         /*
1204          * Process all three planes, but only this segment of it.
1205          */
1206         for( plane = 0; plane < 3; plane++)
1207         {
1208
1209             dst = yadif_work->dst;
1210             parity = yadif_work->parity;
1211             tff = yadif_work->tff;
1212             w = pv->width[plane];
1213             h = pv->height[plane];
1214             penultimate = h - 2;
1215             ultimate = h - 1;
1216             ref_stride = pv->ref_stride[plane];
1217             segment_start = ( h / pv->cpu_count ) * segment;
1218             if( segment == pv->cpu_count - 1 )
1219             {
1220                 /*
1221                  * Final segment
1222                  */
1223                 segment_stop = h;
1224             } else {
1225                 segment_stop = ( h / pv->cpu_count ) * ( segment + 1 );
1226             }
1227
1228             for( y = segment_start; y < segment_stop; y++ )
1229             {
1230                 if( is_combed == 2 )
1231                 {
1232                     /* This line gets blend filtered, not yadif filtered. */
1233                     uint8_t *cur  = &pv->ref[1][plane][y*ref_stride];
1234                     uint8_t *dst2 = &dst[plane][y*w];
1235                     /* These will be useful if we ever do temporal blending. */
1236                     // uint8_t *prev = &pv->ref[0][plane][y*ref_stride];
1237                     // uint8_t *next = &pv->ref[2][plane][y*ref_stride];
1238
1239                     blend_filter_line( dst2, cur, plane, y, pv );
1240                 }
1241                 else if( pv->mode == MODE_CUBIC && is_combed && ( ( y ^ parity ) & 1 ) )
1242                 {
1243                     /* Just apply vertical cubic interpolation */
1244                     uint8_t *cur  = &pv->ref[1][plane][y*ref_stride];
1245                     uint8_t *dst2 = &dst[plane][y*w];
1246                     
1247                     cubic_interpolate_line( dst2, cur, plane, y, pv );
1248                 }
1249                 else if( pv->mode & MODE_YADIF && ( ( y ^ parity ) &  1 )  && ( is_combed == 1 ) )
1250                 {
1251                     /* This line gets yadif filtered. It is the bottom field
1252                        when TFF and vice-versa. It's the field that gets
1253                        filtered. Because yadif needs 2 lines above and below
1254                        the one being filtered, we need to mirror the edges.
1255                        When TFF, this means replacing the 2nd line with a
1256                        copy of the 1st, and the last with the second-to-last. */
1257                     if( y > 1 && y < ( h -2 ) )
1258                     {
1259                         /* This isn't the top or bottom, proceed as normal to yadif. */
1260                         uint8_t *prev = &pv->ref[0][plane][y*ref_stride];
1261                         uint8_t *cur  = &pv->ref[1][plane][y*ref_stride];
1262                         uint8_t *next = &pv->ref[2][plane][y*ref_stride];
1263                         uint8_t *dst2 = &dst[plane][y*w];
1264
1265                         yadif_filter_line( dst2, 
1266                                            prev, 
1267                                            cur, 
1268                                            next, 
1269                                            plane, 
1270                                            parity ^ tff,
1271                                            y, 
1272                                            pv );
1273                     }
1274                     else if( y == 0 )
1275                     {
1276                         /* BFF, so y0 = y1 */
1277                         memcpy( &dst[plane][y*w],
1278                                 &pv->ref[1][plane][1*ref_stride],
1279                                 w * sizeof(uint8_t) );
1280                     }
1281                     else if( y == 1 )
1282                     {
1283                         /* TFF, so y1 = y0 */
1284                         memcpy( &dst[plane][y*w],
1285                                 &pv->ref[1][plane][0],
1286                                 w * sizeof(uint8_t) );
1287                     }
1288                     else if( y == penultimate )
1289                     {
1290                         /* BFF, so penultimate y = ultimate y */
1291                         memcpy( &dst[plane][y*w],
1292                                 &pv->ref[1][plane][ultimate*ref_stride],
1293                                 w * sizeof(uint8_t) );
1294                     }
1295                     else if( y == ultimate )
1296                     {
1297                         /* TFF, so ultimate y = penultimate y */
1298                         memcpy( &dst[plane][y*w],
1299                                 &pv->ref[1][plane][penultimate*ref_stride],
1300                                 w * sizeof(uint8_t) );
1301                     }
1302                 }
1303                 else
1304                 {
1305                     memcpy( &dst[plane][y*w],
1306                             &pv->ref[1][plane][y*ref_stride],
1307                             w * sizeof(uint8_t) );              
1308                 }
1309             }
1310         }
1311         /*
1312          * Finished this segment, let everyone know.
1313          */
1314         hb_unlock( pv->yadif_complete_lock[segment] );
1315     }
1316     free( thread_args_v );
1317 }
1318
1319 static void yadif_filter( uint8_t ** dst,
1320                           int parity,
1321                           int tff,
1322                           hb_filter_private_t * pv )
1323 {
1324     /* If we're running comb detection, do it now, otherwise default to true. */
1325     int is_combed = pv->spatial_metric >= 0 ? comb_segmenter( pv ) : 1;
1326     
1327     /* The comb detector suggests three different values:
1328        0: Don't comb this frame.
1329        1: Deinterlace this frame.
1330        2: Blend this frame.
1331        Since that might conflict with the filter's mode,
1332        it may be necesary to adjust this value.          */
1333     if( is_combed == 1 && (pv->mode == MODE_BLEND) )
1334     {
1335         /* All combed frames are getting blended */
1336         is_combed = 2;
1337     }
1338     else if( is_combed == 2 && !( pv->mode & MODE_BLEND ) )
1339     {
1340         /* Blending is disabled, so force interpolation of these frames. */
1341         is_combed = 1;
1342     }
1343     if( is_combed == 1 &&
1344         ( pv->mode & MODE_BLEND ) &&
1345         !( pv->mode & ( MODE_YADIF | MODE_EEDI2 | MODE_CUBIC ) ) )
1346     {
1347         /* Deinterlacers are disabled, blending isn't, so blend these frames. */
1348         is_combed = 2;
1349     }
1350     else if( is_combed &&
1351              !( pv->mode & ( MODE_BLEND | MODE_YADIF | MODE_EEDI2 | MODE_CUBIC | MODE_MASK ) ) )
1352     {
1353         /* No deinterlacer or mask chosen, pass the frame through. */
1354         is_combed = 0;
1355     }
1356     
1357     if( is_combed == 1 )
1358     {
1359         pv->deinterlaced_frames++;
1360     }
1361     else if( is_combed == 2 )
1362     {
1363         pv->blended_frames++;
1364     }
1365     else
1366     {
1367         pv->unfiltered_frames++;
1368     }
1369     
1370     if( is_combed == 1 && ( pv->mode & MODE_EEDI2 ) )
1371     {
1372         /* Generate an EEDI2 interpolation */
1373         eedi2_planer( pv );
1374     }
1375     
1376     if( is_combed )
1377     {
1378         if( ( pv->mode & MODE_EEDI2 ) && !( pv->mode & MODE_YADIF ) && is_combed == 1 )
1379         {
1380             // Just pass through the EEDI2 interpolation
1381             int i;
1382             for( i = 0; i < 3; i++ )
1383             {
1384                 uint8_t * ref = pv->eedi_full[DST2PF][i];
1385                 uint8_t * dest = dst[i];
1386
1387                 int w = pv->width[i];
1388                 int ref_stride = pv->ref_stride[i];
1389
1390                 int y;
1391                 for( y = 0; y < pv->height[i]; y++ )
1392                 {
1393                     memcpy(dest, ref, w);
1394                     dest += w;
1395                     ref += ref_stride;
1396                 }
1397             }
1398         }
1399         else
1400         {
1401             int segment;
1402
1403             for( segment = 0; segment < pv->cpu_count; segment++ )
1404             {  
1405                 /*
1406                  * Setup the work for this plane.
1407                  */
1408                 pv->yadif_arguments[segment].parity = parity;
1409                 pv->yadif_arguments[segment].tff = tff;
1410                 pv->yadif_arguments[segment].dst = dst;
1411                 pv->yadif_arguments[segment].is_combed = is_combed;
1412
1413                 /*
1414                  * Let the thread for this plane know that we've setup work 
1415                  * for it by releasing the begin lock (ensuring that the
1416                  * complete lock is already locked so that we block when
1417                  * we try to lock it again below).
1418                  */
1419                 hb_lock( pv->yadif_complete_lock[segment] );
1420                 hb_unlock( pv->yadif_begin_lock[segment] );
1421             }
1422
1423             /*
1424              * Wait until all three threads have completed by trying to get
1425              * the complete lock that we locked earlier for each thread, which
1426              * will block until that thread has completed the work on that
1427              * plane.
1428              */
1429             for( segment = 0; segment < pv->cpu_count; segment++ )
1430             {
1431                 hb_lock( pv->yadif_complete_lock[segment] );
1432                 hb_unlock( pv->yadif_complete_lock[segment] );
1433             }
1434
1435             /*
1436              * Entire frame is now deinterlaced.
1437              */
1438         }
1439     }
1440     else
1441     {
1442         /*  Just passing through... */
1443         
1444         /* For mcdeint's benefit... */
1445         pv->yadif_arguments[0].is_combed = is_combed; // 0
1446         
1447         int i;
1448         for( i = 0; i < 3; i++ )
1449         {
1450             uint8_t * ref = pv->ref[1][i];
1451             uint8_t * dest = dst[i];
1452             
1453             int w = pv->width[i];
1454             int ref_stride = pv->ref_stride[i];
1455             
1456             int y;
1457             for( y = 0; y < pv->height[i]; y++ )
1458             {
1459                 memcpy(dest, ref, w);
1460                 dest += w;
1461                 ref += ref_stride;
1462             }
1463         }
1464     }
1465     
1466     if( pv->mode & MODE_MASK && pv->spatial_metric >= 0 )
1467     {
1468         if( pv->mode == MODE_MASK || is_combed )
1469         apply_mask( pv );
1470     }
1471 }
1472
1473 static void mcdeint_filter( uint8_t ** dst,
1474                             uint8_t ** src,
1475                             int parity,
1476                             hb_filter_private_t * pv )
1477 {
1478     int x, y, i;
1479     int out_size;
1480
1481 #ifdef SUPPRESS_AV_LOG
1482     /* TODO: temporarily change log level to suppress obnoxious debug output */
1483     int loglevel = av_log_get_level();
1484     av_log_set_level( AV_LOG_QUIET );
1485 #endif
1486
1487     for( i=0; i<3; i++ )
1488     {
1489         pv->mcdeint_frame->data[i] = src[i];
1490         pv->mcdeint_frame->linesize[i] = pv->width[i];
1491     }
1492     pv->mcdeint_avctx_enc->me_cmp     = FF_CMP_SAD;
1493     pv->mcdeint_avctx_enc->me_sub_cmp = FF_CMP_SAD;
1494     pv->mcdeint_frame->quality        = pv->mcdeint_qp * FF_QP2LAMBDA;
1495
1496     out_size = avcodec_encode_video( pv->mcdeint_avctx_enc,
1497                                      pv->mcdeint_outbuf,
1498                                      pv->mcdeint_outbuf_size,
1499                                      pv->mcdeint_frame );
1500
1501     pv->mcdeint_frame_dec = pv->mcdeint_avctx_enc->coded_frame;
1502
1503     for( i = 0; i < 3; i++ )
1504     {
1505         int w    = pv->width[i];
1506         int h    = pv->height[i];
1507         int fils = pv->mcdeint_frame_dec->linesize[i];
1508         int srcs = pv->width[i];
1509
1510         for( y = 0; y < h; y++ )
1511         {
1512             if( (y ^ parity) & 1 )
1513             {
1514                 for( x = 0; x < w; x++ )
1515                 {
1516                     if( (x-1)+(y-1)*w >= 0 && (x+1)+(y+1)*w < w*h )
1517                     {
1518                         uint8_t * filp =
1519                             &pv->mcdeint_frame_dec->data[i][x + y*fils];
1520                         uint8_t * srcp = &src[i][x + y*srcs];
1521
1522                         int diff0 = filp[-fils] - srcp[-srcs];
1523                         int diff1 = filp[+fils] - srcp[+srcs];
1524                         int spatial_score;
1525                         
1526                         spatial_score =
1527                             ABS(srcp[-srcs-1] - srcp[+srcs-1]) +
1528                             ABS(srcp[-srcs  ] - srcp[+srcs  ]) +
1529                             ABS(srcp[-srcs+1] - srcp[+srcs+1]) - 1;
1530
1531                         int temp = filp[0];
1532
1533 #define MCDEINT_CHECK(j)\
1534                         {   int score = ABS(srcp[-srcs-1+j] - srcp[+srcs-1-j])\
1535                                       + ABS(srcp[-srcs  +j] - srcp[+srcs  -j])\
1536                                       + ABS(srcp[-srcs+1+j] - srcp[+srcs+1-j]);\
1537                             if( score < spatial_score ) {\
1538                                 spatial_score = score;\
1539                                 diff0 = filp[-fils+j] - srcp[-srcs+j];\
1540                                 diff1 = filp[+fils-j] - srcp[+srcs-j];
1541
1542                         if( x >= 2 && x <= w - 3 )
1543                         {
1544                             MCDEINT_CHECK(-1)
1545                             if( x >= 3 && x <= w - 4 )
1546                             {
1547                                 MCDEINT_CHECK(-2) }} }}
1548                             }
1549                         }
1550                         if( x >= 2 && x <= w - 3 )
1551                         {
1552                             MCDEINT_CHECK(1)
1553                             if( x >= 3 && x <= w - 4 )
1554                             {
1555                                 MCDEINT_CHECK(2) }} }}
1556                             }
1557                         }
1558
1559                         if(diff0 + diff1 > 0)
1560                         {
1561                             temp -= (diff0 + diff1 -
1562                                      ABS( ABS(diff0) - ABS(diff1) ) / 2) / 2;
1563                         }
1564                         else
1565                         {
1566                             temp -= (diff0 + diff1 +
1567                                      ABS( ABS(diff0) - ABS(diff1) ) / 2) / 2;
1568                         }
1569
1570                         filp[0] = dst[i][x + y*w] =
1571                             temp > 255U ? ~(temp>>31) : temp;
1572                     }
1573                     else
1574                     {
1575                         dst[i][x + y*w] =
1576                             pv->mcdeint_frame_dec->data[i][x + y*fils];
1577                     }
1578                 }
1579             }
1580             else
1581             {
1582                 for( x = 0; x < w; x++ )
1583                 {
1584                     pv->mcdeint_frame_dec->data[i][x + y*fils] =
1585                         dst[i][x + y*w]= src[i][x + y*srcs];
1586                 }
1587             }
1588         }
1589     }
1590
1591 #ifdef SUPPRESS_AV_LOG
1592     /* TODO: restore previous log level */
1593     av_log_set_level(loglevel);
1594 #endif
1595 }
1596
1597 hb_filter_private_t * hb_decomb_init( int pix_fmt,
1598                                            int width,
1599                                            int height,
1600                                            char * settings )
1601 {
1602     if( pix_fmt != PIX_FMT_YUV420P )
1603     {
1604         return 0;
1605     }
1606
1607     hb_filter_private_t * pv = calloc( 1, sizeof(struct hb_filter_private_s) );
1608
1609     pv->pix_fmt = pix_fmt;
1610
1611     pv->width[0]  = width;
1612     pv->height[0] = height;
1613     pv->width[1]  = pv->width[2]  = width >> 1;
1614     pv->height[1] = pv->height[2] = height >> 1;
1615
1616     pv->buf_out[0] = hb_video_buffer_init( width, height );
1617     pv->buf_out[1] = hb_video_buffer_init( width, height );
1618     pv->buf_settings = hb_buffer_init( 0 );
1619
1620     pv->deinterlaced_frames = 0;
1621     pv->blended_frames = 0;
1622     pv->unfiltered_frames = 0;
1623
1624     pv->yadif_ready    = 0;
1625
1626     pv->mode     = MODE_YADIF | MODE_BLEND | MODE_CUBIC;
1627     pv->spatial_metric = 2;
1628     pv->motion_threshold = 6;
1629     pv->spatial_threshold = 9;
1630     pv->block_threshold = 80;
1631     pv->block_width = 16;
1632     pv->block_height = 16;
1633     
1634     pv->magnitude_threshold = 10;
1635     pv->variance_threshold = 20;
1636     pv->laplacian_threshold = 20;
1637     pv->dilation_threshold = 4;
1638     pv->erosion_threshold = 2;
1639     pv->noise_threshold = 50;
1640     pv->maximum_search_distance = 24;
1641     pv->post_processing = 1;
1642
1643     pv->parity   = PARITY_DEFAULT;
1644
1645     pv->mcdeint_mode   = MCDEINT_MODE_DEFAULT;
1646     pv->mcdeint_qp     = MCDEINT_QP_DEFAULT;
1647
1648     if( settings )
1649     {
1650         sscanf( settings, "%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
1651                 &pv->mode,
1652                 &pv->spatial_metric,
1653                 &pv->motion_threshold,
1654                 &pv->spatial_threshold,
1655                 &pv->block_threshold,
1656                 &pv->block_width,
1657                 &pv->block_height,
1658                 &pv->magnitude_threshold,
1659                 &pv->variance_threshold,
1660                 &pv->laplacian_threshold,
1661                 &pv->dilation_threshold,
1662                 &pv->erosion_threshold,
1663                 &pv->noise_threshold,
1664                 &pv->maximum_search_distance,
1665                 &pv->post_processing,
1666                 &pv->parity );
1667     }
1668     
1669     pv->cpu_count = hb_get_cpu_count();
1670     
1671
1672     if( pv->mode & MODE_MCDEINT )
1673     {
1674         pv->mcdeint_mode = 2;
1675     }
1676     
1677     /* Allocate yadif specific buffers */
1678     int i, j;
1679     for( i = 0; i < 3; i++ )
1680     {
1681         int is_chroma = !!i;
1682         int w = ((width   + 31) & (~31))>>is_chroma;
1683         int h = ((height+6+ 31) & (~31))>>is_chroma;
1684
1685         pv->ref_stride[i] = w;
1686
1687         for( j = 0; j < 3; j++ )
1688         {
1689             pv->ref[j][i] = calloc( 1, w*h*sizeof(uint8_t) ) + 3*w;
1690         }
1691     }
1692
1693     /* Allocate a buffer to store a comb mask. */
1694     for( i = 0; i < 3; i++ )
1695     {
1696         int is_chroma = !!i;
1697         int w = ((pv->width[0]   + 31) & (~31))>>is_chroma;
1698         int h = ((pv->height[0]+6+ 31) & (~31))>>is_chroma;
1699
1700         pv->mask[i] = calloc( 1, w*h*sizeof(uint8_t) ) + 3*w;
1701     }
1702     
1703     if( pv->mode & MODE_EEDI2 )
1704     {
1705         /* Allocate half-height eedi2 buffers */
1706         height = pv->height[0] / 2;
1707         for( i = 0; i < 3; i++ )
1708         {
1709             int is_chroma = !!i;
1710             int w = ((width   + 31) & (~31))>>is_chroma;
1711             int h = ((height+6+ 31) & (~31))>>is_chroma;
1712
1713             for( j = 0; j < 4; j++ )
1714             {
1715                 pv->eedi_half[j][i] = calloc( 1, w*h*sizeof(uint8_t) ) + 3*w;
1716             }
1717         }
1718
1719         /* Allocate full-height eedi2 buffers */
1720         height = pv->height[0];
1721         for( i = 0; i < 3; i++ )
1722         {
1723             int is_chroma = !!i;
1724             int w = ((width   + 31) & (~31))>>is_chroma;
1725             int h = ((height+6+ 31) & (~31))>>is_chroma;
1726
1727             for( j = 0; j < 5; j++ )
1728             {
1729                 pv->eedi_full[j][i] = calloc( 1, w*h*sizeof(uint8_t) ) + 3*w;
1730             }
1731         }
1732     }
1733     
1734      /*
1735       * Create yadif threads and locks.
1736       */
1737      pv->yadif_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count );
1738      pv->yadif_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1739      pv->yadif_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1740      pv->yadif_arguments = malloc( sizeof( yadif_arguments_t ) * pv->cpu_count );
1741
1742      for( i = 0; i < pv->cpu_count; i++ )
1743      {
1744          yadif_thread_arg_t *thread_args;
1745
1746          thread_args = malloc( sizeof( yadif_thread_arg_t ) );
1747
1748          if( thread_args )
1749          {
1750              thread_args->pv = pv;
1751              thread_args->segment = i;
1752
1753              pv->yadif_begin_lock[i] = hb_lock_init();
1754              pv->yadif_complete_lock[i] = hb_lock_init();
1755
1756              /*
1757               * Important to start off with the threads locked waiting
1758               * on input.
1759               */
1760              hb_lock( pv->yadif_begin_lock[i] );
1761
1762              pv->yadif_arguments[i].stop = 0;
1763              pv->yadif_arguments[i].dst = NULL;
1764              
1765              pv->yadif_threads[i] = hb_thread_init( "yadif_filter_segment",
1766                                                     yadif_decomb_filter_thread,
1767                                                     thread_args,
1768                                                     HB_NORMAL_PRIORITY );
1769          }
1770          else
1771          {
1772              hb_error( "yadif could not create threads" );
1773          }
1774     }
1775     
1776     /*
1777      * Create decomb threads and locks.
1778      */
1779     pv->decomb_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count );
1780     pv->decomb_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1781     pv->decomb_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1782     pv->decomb_arguments = malloc( sizeof( decomb_arguments_t ) * pv->cpu_count );
1783     
1784     for( i = 0; i < pv->cpu_count; i++ )
1785     {
1786         decomb_thread_arg_t *decomb_thread_args;
1787     
1788         decomb_thread_args = malloc( sizeof( decomb_thread_arg_t ) );
1789     
1790         if( decomb_thread_args )
1791         {
1792             decomb_thread_args->pv = pv;
1793             decomb_thread_args->segment = i;
1794     
1795             pv->decomb_begin_lock[i] = hb_lock_init();
1796             pv->decomb_complete_lock[i] = hb_lock_init();
1797     
1798             /*
1799              * Important to start off with the threads locked waiting
1800              * on input.
1801              */
1802             hb_lock( pv->decomb_begin_lock[i] );
1803     
1804             pv->decomb_arguments[i].stop = 0;
1805     
1806             pv->decomb_threads[i] = hb_thread_init( "decomb_filter_segment",
1807                                                    decomb_filter_thread,
1808                                                    decomb_thread_args,
1809                                                    HB_NORMAL_PRIORITY );
1810         }
1811         else
1812         {
1813             hb_error( "decomb could not create threads" );
1814         }
1815     }
1816     
1817     if( pv->mode & MODE_EEDI2 )
1818     {
1819         /*
1820          * Create eedi2 threads and locks.
1821          */
1822         pv->eedi2_threads = malloc( sizeof( hb_thread_t* ) * 3 );
1823         pv->eedi2_begin_lock = malloc( sizeof( hb_lock_t * ) * 3 );
1824         pv->eedi2_complete_lock = malloc( sizeof( hb_lock_t * ) * 3 );
1825         pv->eedi2_arguments = malloc( sizeof( eedi2_arguments_t ) * 3 );
1826
1827         if( pv->post_processing > 1 )
1828         {
1829             pv->cx2 = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
1830             pv->cy2 = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
1831             pv->cxy = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
1832             pv->tmpc = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
1833             if( !pv->cx2 || !pv->cy2 || !pv->cxy || !pv->tmpc )
1834                 hb_log("EEDI2: failed to malloc derivative arrays");
1835             else
1836                 hb_log("EEDI2: successfully mallloced derivative arrays");
1837         }
1838
1839         for( i = 0; i < 3; i++ )
1840         {
1841             eedi2_thread_arg_t *eedi2_thread_args;
1842
1843             eedi2_thread_args = malloc( sizeof( eedi2_thread_arg_t ) );
1844
1845             if( eedi2_thread_args )
1846             {
1847                 eedi2_thread_args->pv = pv;
1848                 eedi2_thread_args->plane = i;
1849
1850                 pv->eedi2_begin_lock[i] = hb_lock_init();
1851                 pv->eedi2_complete_lock[i] = hb_lock_init();
1852
1853                 /*
1854                  * Important to start off with the threads locked waiting
1855                  * on input.
1856                  */
1857                 hb_lock( pv->eedi2_begin_lock[i] );
1858
1859                 pv->eedi2_arguments[i].stop = 0;
1860
1861                 pv->eedi2_threads[i] = hb_thread_init( "eedi2_filter_segment",
1862                                                        eedi2_filter_thread,
1863                                                        eedi2_thread_args,
1864                                                        HB_NORMAL_PRIORITY );
1865             }
1866             else
1867             {
1868                 hb_error( "eedi2 could not create threads" );
1869             }
1870         }
1871     }
1872     
1873     
1874     /* Allocate mcdeint specific buffers */
1875     if( pv->mcdeint_mode >= 0 )
1876     {
1877         avcodec_init();
1878         avcodec_register_all();
1879         AVCodec * enc = avcodec_find_encoder( CODEC_ID_SNOW );
1880         int i;
1881         for (i = 0; i < 3; i++ )
1882         {
1883             AVCodecContext * avctx_enc;
1884
1885             avctx_enc = pv->mcdeint_avctx_enc = avcodec_alloc_context();
1886
1887             avctx_enc->width                    = width;
1888             avctx_enc->height                   = height;
1889             avctx_enc->time_base                = (AVRational){1,25};  // meaningless
1890             avctx_enc->gop_size                 = 300;
1891             avctx_enc->max_b_frames             = 0;
1892             avctx_enc->pix_fmt                  = PIX_FMT_YUV420P;
1893             avctx_enc->flags                    = CODEC_FLAG_QSCALE | CODEC_FLAG_LOW_DELAY;
1894             avctx_enc->strict_std_compliance    = FF_COMPLIANCE_EXPERIMENTAL;
1895             avctx_enc->global_quality           = 1;
1896             avctx_enc->flags2                   = CODEC_FLAG2_MEMC_ONLY;
1897             avctx_enc->me_cmp                   = FF_CMP_SAD; //SSE;
1898             avctx_enc->me_sub_cmp               = FF_CMP_SAD; //SSE;
1899             avctx_enc->mb_cmp                   = FF_CMP_SSE;
1900
1901             switch( pv->mcdeint_mode )
1902             {
1903                 case 3:
1904                     avctx_enc->refs = 3;
1905                 case 2:
1906                     avctx_enc->me_method = ME_ITER;
1907                 case 1:
1908                     avctx_enc->flags |= CODEC_FLAG_4MV;
1909                     avctx_enc->dia_size =2;
1910                 case 0:
1911                     avctx_enc->flags |= CODEC_FLAG_QPEL;
1912             }
1913
1914             hb_avcodec_open(avctx_enc, enc);
1915         }
1916
1917         pv->mcdeint_frame       = avcodec_alloc_frame();
1918         pv->mcdeint_outbuf_size = width * height * 10;
1919         pv->mcdeint_outbuf      = malloc( pv->mcdeint_outbuf_size );
1920     }
1921
1922     return pv;
1923 }
1924
1925 void hb_decomb_close( hb_filter_private_t * pv )
1926 {
1927     if( !pv )
1928     {
1929         return;
1930     }
1931     
1932     hb_log("decomb: deinterlaced %i | blended %i | unfiltered %i | total %i", pv->deinterlaced_frames, pv->blended_frames, pv->unfiltered_frames, pv->deinterlaced_frames + pv->blended_frames + pv->unfiltered_frames);
1933
1934     /* Cleanup frame buffers */
1935     if( pv->buf_out[0] )
1936     {
1937         hb_buffer_close( &pv->buf_out[0] );
1938     }
1939     if( pv->buf_out[1] )
1940     {
1941         hb_buffer_close( &pv->buf_out[1] );
1942     }
1943     if (pv->buf_settings )
1944     {
1945         hb_buffer_close( &pv->buf_settings );
1946     }
1947
1948     /* Cleanup yadif specific buffers */
1949     int i;
1950     for( i = 0; i<3*3; i++ )
1951     {
1952         uint8_t **p = &pv->ref[i%3][i/3];
1953         if (*p)
1954         {
1955             free( *p - 3*pv->ref_stride[i/3] );
1956             *p = NULL;
1957         }
1958     }
1959     
1960     /* Cleanup combing mask. */
1961     for( i = 0; i<3*3; i++ )
1962     {
1963         uint8_t **p = &pv->mask[i/3];
1964         if (*p)
1965         {
1966             free( *p - 3*pv->ref_stride[i/3] );
1967             *p = NULL;
1968         }
1969     }
1970     
1971     if( pv->mode & MODE_EEDI2 )
1972     {
1973         /* Cleanup eedi-half  buffers */
1974         int j;
1975         for( i = 0; i<3; i++ )
1976         {
1977             for( j = 0; j < 4; j++ )
1978             {
1979                 uint8_t **p = &pv->eedi_half[j][i];
1980                 if (*p)
1981                 {
1982                     free( *p - 3*pv->ref_stride[i] );
1983                     *p = NULL;
1984                 }            
1985             }
1986         }
1987
1988         /* Cleanup eedi-full  buffers */
1989         for( i = 0; i<3; i++ )
1990         {
1991             for( j = 0; j < 5; j++ )
1992             {
1993                 uint8_t **p = &pv->eedi_full[j][i];
1994                 if (*p)
1995                 {
1996                     free( *p - 3*pv->ref_stride[i] );
1997                     *p = NULL;
1998                 }            
1999             }
2000         }
2001     }
2002     
2003     if( pv->post_processing > 1  && ( pv->mode & MODE_EEDI2 ) )
2004     {
2005         if (pv->cx2) eedi2_aligned_free(pv->cx2);
2006         if (pv->cy2) eedi2_aligned_free(pv->cy2);
2007         if (pv->cxy) eedi2_aligned_free(pv->cxy);
2008         if (pv->tmpc) eedi2_aligned_free(pv->tmpc);
2009     }
2010     
2011     for( i = 0; i < pv->cpu_count; i++)
2012     {
2013         /*
2014          * Tell each yadif thread to stop, and then cleanup.
2015          */
2016         pv->yadif_arguments[i].stop = 1;
2017         hb_unlock(  pv->yadif_begin_lock[i] );
2018
2019         hb_thread_close( &pv->yadif_threads[i] );
2020         hb_lock_close( &pv->yadif_begin_lock[i] );
2021         hb_lock_close( &pv->yadif_complete_lock[i] );
2022     }
2023     
2024     /*
2025      * free memory for yadif structs
2026      */
2027     free( pv->yadif_threads );
2028     free( pv->yadif_begin_lock );
2029     free( pv->yadif_complete_lock );
2030     free( pv->yadif_arguments );
2031     
2032     for( i = 0; i < pv->cpu_count; i++)
2033     {
2034         /*
2035          * Tell each decomb thread to stop, and then cleanup.
2036          */
2037         pv->decomb_arguments[i].stop = 1;
2038         hb_unlock(  pv->decomb_begin_lock[i] );
2039
2040         hb_thread_close( &pv->decomb_threads[i] );
2041         hb_lock_close( &pv->decomb_begin_lock[i] );
2042         hb_lock_close( &pv->decomb_complete_lock[i] );
2043     }
2044     
2045     /*
2046      * free memory for decomb structs
2047      */
2048     free( pv->decomb_threads );
2049     free( pv->decomb_begin_lock );
2050     free( pv->decomb_complete_lock );
2051     free( pv->decomb_arguments );
2052     
2053     if( pv->mode & MODE_EEDI2 )
2054     {
2055         for( i = 0; i < 3; i++)
2056         {
2057             /*
2058              * Tell each eedi2 thread to stop, and then cleanup.
2059              */
2060             pv->eedi2_arguments[i].stop = 1;
2061             hb_unlock(  pv->eedi2_begin_lock[i] );
2062
2063             hb_thread_close( &pv->eedi2_threads[i] );
2064             hb_lock_close( &pv->eedi2_begin_lock[i] );
2065             hb_lock_close( &pv->eedi2_complete_lock[i] );
2066         }
2067
2068         /*
2069          * free memory for eedi2 structs
2070          */
2071         free( pv->eedi2_threads );
2072         free( pv->eedi2_begin_lock );
2073         free( pv->eedi2_complete_lock );
2074         free( pv->eedi2_arguments );
2075     }
2076     
2077     /* Cleanup mcdeint specific buffers */
2078     if( pv->mcdeint_mode >= 0 )
2079     {
2080         if( pv->mcdeint_avctx_enc )
2081         {
2082             hb_avcodec_close( pv->mcdeint_avctx_enc );
2083             av_freep( &pv->mcdeint_avctx_enc );
2084         }
2085         if( pv->mcdeint_outbuf )
2086         {
2087             free( pv->mcdeint_outbuf );
2088         }
2089     }
2090
2091     free( pv );
2092 }
2093
2094 int hb_decomb_work( const hb_buffer_t * cbuf_in,
2095                     hb_buffer_t ** buf_out,
2096                     int pix_fmt,
2097                     int width,
2098                     int height,
2099                     hb_filter_private_t * pv )
2100 {
2101     hb_buffer_t * buf_in = (hb_buffer_t *)cbuf_in;
2102
2103     if( !pv ||
2104         pix_fmt != pv->pix_fmt ||
2105         width   != pv->width[0] ||
2106         height  != pv->height[0] )
2107     {
2108         return FILTER_FAILED;
2109     }
2110
2111     avpicture_fill( &pv->pic_in, buf_in->data,
2112                     pix_fmt, width, height );
2113
2114     /* Determine if top-field first layout */
2115     int tff;
2116     if( pv->parity < 0 )
2117     {
2118         tff = !!(buf_in->flags & PIC_FLAG_TOP_FIELD_FIRST);
2119     }
2120     else
2121     {
2122         tff = (pv->parity & 1) ^ 1;
2123     }
2124
2125     /* Store current frame in yadif cache */
2126     store_ref( (const uint8_t**)pv->pic_in.data, pv );
2127
2128     /* If yadif is not ready, store another ref and return FILTER_DELAY */
2129     if( pv->yadif_ready == 0 )
2130     {
2131         store_ref( (const uint8_t**)pv->pic_in.data, pv );
2132
2133         hb_buffer_copy_settings( pv->buf_settings, buf_in );
2134
2135         /* don't let 'work_loop' send a chapter mark upstream */
2136         buf_in->new_chap  = 0;
2137
2138         pv->yadif_ready = 1;
2139
2140         return FILTER_DELAY;
2141     }
2142
2143     /* Perform yadif filtering */        
2144     int frame;
2145     for( frame = 0; frame <= ( ( pv->mode & MODE_MCDEINT ) ? 1 : 0 ) ; frame++ )
2146 // This would be what to use for bobbing: for( frame = 0; frame <= 0 ; frame++ )
2147     {
2148
2149 #if 0        
2150         /* Perhaps skip the second run if the frame is uncombed? */
2151         if( frame && !pv->yadif_arguments[0].is_combed )
2152         {
2153             break;
2154         }
2155 #endif        
2156         int parity = frame ^ tff ^ 1;
2157
2158 // This will be for bobbing
2159 #if 0
2160         if( pv->alternator )
2161         {
2162             parity = !parity;
2163             pv->alternator = 0;
2164         }
2165         else
2166         {
2167             pv->alternator = 1;
2168         }
2169 #endif
2170         pv->tff = !parity;
2171
2172         avpicture_fill( &pv->pic_out, pv->buf_out[!(frame^1)]->data,
2173                         pix_fmt, width, height );
2174
2175         /* XXX
2176             Should check here and only bother filtering field 2 when
2177            field 1 was detected as combed.
2178            And when it's not, it's a progressive frame,
2179            so mcdeint should be skipped...
2180         */
2181         yadif_filter( pv->pic_out.data, parity, tff, pv );
2182
2183         /* Commented out code in the line below would skip mcdeint
2184            on uncombed frames. Possibly a bad idea, since mcdeint
2185            maintains the same snow context for the entire video... */
2186         if( pv->mcdeint_mode >= 0 /* && pv->yadif_arguments[0].is_combed */)
2187         {
2188             /* Perform mcdeint filtering */
2189             avpicture_fill( &pv->pic_in,  pv->buf_out[(frame^1)]->data,
2190                             pix_fmt, width, height );
2191
2192             mcdeint_filter( pv->pic_in.data, pv->pic_out.data, parity, pv );
2193         }
2194
2195         *buf_out = pv->buf_out[!(frame^1)];
2196     }
2197
2198     /* Copy buffered settings to output buffer settings */
2199     hb_buffer_copy_settings( *buf_out, pv->buf_settings );
2200
2201     /* Replace buffered settings with input buffer settings */
2202     hb_buffer_copy_settings( pv->buf_settings, buf_in );
2203
2204     /* don't let 'work_loop' send a chapter mark upstream */
2205     buf_in->new_chap  = 0;
2206
2207     return FILTER_OK;
2208 }