OSDN Git Service

CLI: Missed file from SubRip - a symptom of too many views and patches
[handbrake-jp/handbrake-jp-git.git] / libhb / decomb.c
1 /* $Id: decomb.c,v 1.14 2008/04/25 5:00:00 jbrjake Exp $
2
3    This file is part of the HandBrake source code.
4    Homepage: <http://handbrake.fr/>.
5    It may be used under the terms of the GNU General Public License. 
6    
7    The yadif algorithm was created by Michael Niedermayer.
8    Tritical's work inspired much of the comb detection code:
9    http://web.missouri.edu/~kes25c/
10 */
11
12 #include "hb.h"
13 #include "hbffmpeg.h"
14 #include "mpeg2dec/mpeg2.h"
15 #include "eedi2.h"
16
17 #define SUPPRESS_AV_LOG
18
19 #define MODE_DEFAULT     1
20 #define PARITY_DEFAULT   -1
21
22 #define MCDEINT_MODE_DEFAULT   -1
23 #define MCDEINT_QP_DEFAULT      1
24
25 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
26 #define MIN3(a,b,c) MIN(MIN(a,b),c)
27 #define MAX3(a,b,c) MAX(MAX(a,b),c)
28
29 // Some names to correspond to the pv->eedi_half array's contents
30 #define SRCPF 0
31 #define MSKPF 1
32 #define TMPPF 2
33 #define DSTPF 3
34 // Some names to correspond to the pv->eedi_full array's contents
35 #define DST2PF 0
36 #define TMP2PF2 1
37 #define MSK2PF 2
38 #define TMP2PF 3
39 #define DST2MPF 4
40
41 struct yadif_arguments_s {
42     uint8_t **dst;
43     int parity;
44     int tff;
45     int stop;
46     int is_combed;
47 };
48
49 struct decomb_arguments_s {
50     int stop;
51 };
52
53 struct eedi2_arguments_s {
54     int stop;
55 };
56
57 typedef struct yadif_arguments_s yadif_arguments_t;
58 typedef struct decomb_arguments_s decomb_arguments_t;
59 typedef struct eedi2_arguments_s eedi2_arguments_t;
60
61 typedef struct eedi2_thread_arg_s {
62     hb_filter_private_t *pv;
63     int plane;
64 } eedi2_thread_arg_t;
65
66 typedef struct decomb_thread_arg_s {
67     hb_filter_private_t *pv;
68     int segment;
69 } decomb_thread_arg_t;
70
71 typedef struct yadif_thread_arg_s {
72     hb_filter_private_t *pv;
73     int segment;
74 } yadif_thread_arg_t;
75
76 struct hb_filter_private_s
77 {
78     int              pix_fmt;
79     int              width[3];
80     int              height[3];
81
82     // Decomb parameters
83     int              mode;
84     int              spatial_metric;
85     int              motion_threshold;
86     int              spatial_threshold;
87     int              block_threshold;
88     int              block_width;
89     int              block_height;
90     
91     // EEDI2 parameters
92     int              magnitude_threshold;
93     int              variance_threshold;
94     int              laplacian_threshold;
95     int              dilation_threshold;
96     int              erosion_threshold;
97     int              noise_threshold;
98     int              maximum_search_distance;
99     int              post_processing;
100
101     int              parity;
102     int              tff;
103     
104     int              yadif_ready;
105
106     int              mcdeint_mode;
107     int              mcdeint_qp;
108
109     int              mcdeint_outbuf_size;
110     uint8_t        * mcdeint_outbuf;
111     AVCodecContext * mcdeint_avctx_enc;
112     AVFrame        * mcdeint_frame;
113     AVFrame        * mcdeint_frame_dec;
114
115     int              yadif_deinterlaced_frames;
116     int              blend_deinterlaced_frames;
117     int              unfiltered_frames;
118
119     uint8_t        * ref[4][3];
120     int              ref_stride[3];
121
122     /* Make a buffer to store a comb mask. */
123     uint8_t        * mask[3];
124
125     uint8_t        * eedi_half[4][3];
126     uint8_t        * eedi_full[5][3];
127     int            * cx2;
128     int            * cy2;
129     int            * cxy;
130     int            * tmpc;
131     
132     AVPicture        pic_in;
133     AVPicture        pic_out;
134     hb_buffer_t *    buf_out[2];
135     hb_buffer_t *    buf_settings;
136     
137     int              cpu_count;
138
139     hb_thread_t    ** yadif_threads;         // Threads for Yadif - one per CPU
140     hb_lock_t      ** yadif_begin_lock;      // Thread has work
141     hb_lock_t      ** yadif_complete_lock;   // Thread has completed work
142     yadif_arguments_t *yadif_arguments;      // Arguments to thread for work
143     
144     hb_thread_t    ** decomb_threads;        // Threads for comb detection - one per CPU
145     hb_lock_t      ** decomb_begin_lock;     // Thread has work
146     hb_lock_t      ** decomb_complete_lock;  // Thread has completed work
147     decomb_arguments_t *decomb_arguments;    // Arguments to thread for work
148
149     hb_thread_t    ** eedi2_threads;        // Threads for eedi2 - one per plane
150     hb_lock_t      ** eedi2_begin_lock;     // Thread has work
151     hb_lock_t      ** eedi2_complete_lock;  // Thread has completed work
152     eedi2_arguments_t *eedi2_arguments;    // Arguments to thread for work
153     
154 };
155
156 hb_filter_private_t * hb_decomb_init( int pix_fmt,
157                                            int width,
158                                            int height,
159                                            char * settings );
160
161 int hb_decomb_work(      const hb_buffer_t * buf_in,
162                          hb_buffer_t ** buf_out,
163                          int pix_fmt,
164                          int width,
165                          int height,
166                          hb_filter_private_t * pv );
167
168 void hb_decomb_close( hb_filter_private_t * pv );
169
170 hb_filter_object_t hb_filter_decomb =
171 {
172     FILTER_DECOMB,
173     "Decomb",
174     NULL,
175     hb_decomb_init,
176     hb_decomb_work,
177     hb_decomb_close,
178 };
179
180 int cubic_interpolate( int y0, int y1, int y2, int y3 )
181 {
182     /* From http://www.neuron2.net/library/cubicinterp.html */
183     int result = ( y0 * -3 ) + ( y1 * 23 ) + ( y2 * 23 ) + ( y3 * -3 );
184     result /= 40;
185     
186     if( result > 255 )
187     {
188         result = 255;
189     }
190     else if( result < 0 )
191     {
192         result = 0;
193     }
194     
195     return result;
196 }
197
198 static void store_ref( const uint8_t ** pic,
199                              hb_filter_private_t * pv )
200 {
201     memcpy( pv->ref[3],
202             pv->ref[0],
203             sizeof(uint8_t *)*3 );
204
205     memmove( pv->ref[0],
206              pv->ref[1],
207              sizeof(uint8_t *)*3*3 );
208
209     int i;
210     for( i = 0; i < 3; i++ )
211     {
212         const uint8_t * src = pic[i];
213         uint8_t * ref = pv->ref[2][i];
214
215         int w = pv->width[i];
216         int h = pv->height[i];
217         int ref_stride = pv->ref_stride[i];
218
219         int y;
220         for( y = 0; y < pv->height[i]; y++ )
221         {
222             memcpy(ref, src, w);
223             src = (uint8_t*)src + w;
224             ref = (uint8_t*)ref + ref_stride;
225         }
226     }
227 }
228
229 static void get_ref( uint8_t ** pic, hb_filter_private_t * pv, int frm )
230 {
231     int i;
232     for( i = 0; i < 3; i++ )
233     {
234         uint8_t * dst = pic[i];
235         const uint8_t * ref = pv->ref[frm][i];
236         int w = pv->width[i];
237         int ref_stride = pv->ref_stride[i];
238         
239         int y;
240         for( y = 0; y < pv->height[i]; y++ )
241         {
242             memcpy(dst, ref, w);
243             dst += w;
244             ref += ref_stride;
245         }
246     }
247 }
248
249 int blend_filter_pixel( int up2, int up1, int current, int down1, int down2 )
250 {
251     /* Low-pass 5-tap filter */
252     int result = 0;
253     result += -up2;
254     result += up1 * 2;
255     result += current * 6;
256     result += down1 *2;
257     result += -down2;
258     result /= 8;
259
260     if( result > 255 )
261     {
262         result = 255;
263     }
264     if( result < 0 )
265     {
266         result = 0;
267     }
268     
269     return result;
270 }
271
272 static void blend_filter_line( uint8_t *dst,
273                                uint8_t *cur,
274                                int plane,
275                                int y,
276                                hb_filter_private_t * pv )
277 {
278     int w = pv->width[plane];
279     int refs = pv->ref_stride[plane];
280     int x;
281
282     for( x = 0; x < w; x++)
283     {
284         int a, b, c, d, e;
285         
286         a = cur[-2*refs];
287         b = cur[-refs];
288         c = cur[0];
289         d = cur[+refs];
290         e = cur[2*refs];
291         
292         if( y == 0 )
293         {
294             /* First line, so A and B don't exist.*/
295             a = cur[0];
296             b = cur[0];
297         }
298         else if( y == 1 )
299         {
300             /* Second line, no A. */
301             a = cur[-refs];
302         }
303         else if( y == (pv->height[plane] - 2) )
304         {
305             /* Second to last line, no E. */
306             e = cur[+refs];
307         }
308         else if( y == (pv->height[plane] -1) )
309         {
310             /* Last line, no D or E. */
311             d = cur[0];
312             e = cur[0];
313         }
314                 
315         dst[0] = blend_filter_pixel( a, b, c, d, e );
316
317         dst++;
318         cur++;
319     }
320 }
321
322 int check_combing_mask( hb_filter_private_t * pv )
323 {
324     /* Go through the mask in X*Y blocks. If any of these windows
325        have threshold or more combed pixels, consider the whole
326        frame to be combed and send it on to be deinterlaced.     */
327
328     /* Block mask threshold -- The number of pixels
329        in a block_width * block_height window of
330        he mask that need to show combing for the
331        whole frame to be seen as such.            */
332     int threshold       = pv->block_threshold;
333     int block_width     = pv->block_width;
334     int block_height    = pv->block_height;
335     int block_x, block_y;
336     int block_score = 0; int send_to_blend = 0;
337     
338     int x, y, k;
339
340     for( k = 0; k < 1; k++ )
341     {
342         int ref_stride = pv->ref_stride[k];
343         for( y = 0; y < ( pv->height[k] - block_height ); y = y + block_height )
344         {
345             for( x = 0; x < ( pv->width[k] - block_width ); x = x + block_width )
346             {
347                 block_score = 0;
348                 for( block_y = 0; block_y < block_height; block_y++ )
349                 {
350                     for( block_x = 0; block_x < block_width; block_x++ )
351                     {
352                         int mask_y = y + block_y;
353                         int mask_x = x + block_x;
354                         
355                         /* We only want to mark a pixel in a block as combed
356                            if the pixels above and below are as well. Got to
357                            handle the top and bottom lines separately.       */
358                         if( y + block_y == 0 )
359                         {
360                             if( pv->mask[k][mask_y*ref_stride+mask_x    ] == 255 &&
361                                 pv->mask[k][mask_y*ref_stride+mask_x + 1] == 255 )
362                                     block_score++;
363                         }
364                         else if( y + block_y == pv->height[k] - 1 )
365                         {
366                             if( pv->mask[k][mask_y*ref_stride+mask_x - 1] == 255 &&
367                                 pv->mask[k][mask_y*ref_stride+mask_x    ] == 255 )
368                                     block_score++;
369                         }
370                         else
371                         {
372                             if( pv->mask[k][mask_y*ref_stride+mask_x - 1] == 255 &&
373                                 pv->mask[k][mask_y*ref_stride+mask_x    ] == 255 &&
374                                 pv->mask[k][mask_y*ref_stride+mask_x + 1] == 255 )
375                                     block_score++;
376                         } 
377                     }
378                 }
379
380                 if( block_score >= ( threshold / 2 ) )
381                 {
382 #if 0
383                     hb_log("decomb: frame %i | score %i | type %s", pv->yadif_deinterlaced_frames + pv->blend_deinterlaced_frames +  pv->unfiltered_frames + 1, block_score, pv->buf_settings->flags & 16 ? "Film" : "Video");
384 #endif
385                     if ( block_score <= threshold && !( pv->buf_settings->flags & 16) )
386                     {
387                         /* Blend video content that scores between
388                            ( threshold / 2 ) and threshold.        */
389                         send_to_blend = 1;
390                     }
391                     else if( block_score > threshold )
392                     {
393                         if( pv->buf_settings->flags & 16 )
394                         {
395                             /* Blend progressive content above the threshold.*/
396                             return 2;
397                         }
398                         else
399                         {
400                             /* Yadif deinterlace video content above the threshold. */
401                             return 1;
402                         }
403                     }
404                 }
405             }
406         } 
407     }
408     
409     if( send_to_blend )
410     {
411         return 2;
412     }
413     else
414     {
415         /* Consider this frame to be uncombed. */
416         return 0;
417     }
418 }
419
420 int detect_combed_segment( hb_filter_private_t * pv, int segment_start, int segment_stop )
421 {
422     /* A mish-mash of various comb detection tricks
423        picked up from neuron2's Decomb plugin for
424        AviSynth and tritical's IsCombedT and
425        IsCombedTIVTC plugins.                       */
426        
427     int x, y, k, width, height;
428     
429     /* Comb scoring algorithm */
430     int spatial_metric  = pv->spatial_metric;
431     /* Motion threshold */
432     int mthresh         = pv->motion_threshold;
433     /* Spatial threshold */
434     int athresh         = pv->spatial_threshold;
435     int athresh_squared = athresh * athresh;
436     int athresh6        = 6 *athresh;
437
438     /* One pas for Y, one pass for U, one pass for V */    
439     for( k = 0; k < 1; k++ )
440     {
441         int ref_stride  = pv->ref_stride[k];
442         width           = pv->width[k];
443         height          = pv->height[k];
444         
445         /* Comb detection has to start at y = 2 and end at
446            y = height - 2, because it needs to examine
447            2 pixels above and 2 below the current pixel.      */
448         if( segment_start < 2 )
449             segment_start = 2;
450         if( segment_stop > height - 2 )
451             segment_stop = height - 2;
452             
453         for( y =  segment_start; y < segment_stop; y++ )
454         {
455             /* These are just to make the buffer locations easier to read. */
456             int back_2    = ( y - 2 )*ref_stride ;
457             int back_1    = ( y - 1 )*ref_stride;
458             int current   =         y*ref_stride;
459             int forward_1 = ( y + 1 )*ref_stride;
460             int forward_2 = ( y + 2 )*ref_stride;
461             
462             /* We need to examine a column of 5 pixels
463                in the prev, cur, and next frames.      */
464             uint8_t previous_frame[5];
465             uint8_t current_frame[5];
466             uint8_t next_frame[5];
467             
468             for( x = 0; x < width; x++ )
469             {
470                 /* Fill up the current frame array with the current pixel values.*/
471                 current_frame[0] = pv->ref[1][k][back_2    + x];
472                 current_frame[1] = pv->ref[1][k][back_1    + x];
473                 current_frame[2] = pv->ref[1][k][current   + x];
474                 current_frame[3] = pv->ref[1][k][forward_1 + x];
475                 current_frame[4] = pv->ref[1][k][forward_2 + x];
476
477                 int up_diff   = current_frame[2] - current_frame[1];
478                 int down_diff = current_frame[2] - current_frame[3];
479
480                 if( ( up_diff >  athresh && down_diff >  athresh ) ||
481                     ( up_diff < -athresh && down_diff < -athresh ) )
482                 {
483                     /* The pixel above and below are different,
484                        and they change in the same "direction" too.*/
485                     int motion = 0;
486                     if( mthresh > 0 )
487                     {
488                         /* Make sure there's sufficient motion between frame t-1 to frame t+1. */
489                         previous_frame[0] = pv->ref[0][k][back_2    + x];
490                         previous_frame[1] = pv->ref[0][k][back_1    + x];
491                         previous_frame[2] = pv->ref[0][k][current   + x];
492                         previous_frame[3] = pv->ref[0][k][forward_1 + x];
493                         previous_frame[4] = pv->ref[0][k][forward_2 + x];
494                         next_frame[0]     = pv->ref[2][k][back_2    + x];
495                         next_frame[1]     = pv->ref[2][k][back_1    + x];
496                         next_frame[2]     = pv->ref[2][k][current   + x];
497                         next_frame[3]     = pv->ref[2][k][forward_1 + x];
498                         next_frame[4]     = pv->ref[2][k][forward_2 + x];
499                         
500                         if( abs( previous_frame[2] - current_frame[2] ) > mthresh &&
501                             abs(  current_frame[1] - next_frame[1]    ) > mthresh &&
502                             abs(  current_frame[3] - next_frame[3]    ) > mthresh )
503                                 motion++;
504                         if( abs(     next_frame[2] - current_frame[2] ) > mthresh &&
505                             abs( previous_frame[1] - current_frame[1] ) > mthresh &&
506                             abs( previous_frame[3] - current_frame[3] ) > mthresh )
507                                 motion++;
508                     }
509                     else
510                     {
511                         /* User doesn't want to check for motion,
512                            so move on to the spatial check.       */
513                         motion = 1;
514                     }
515                            
516                     if( motion || ( pv->yadif_deinterlaced_frames==0 && pv->blend_deinterlaced_frames==0 && pv->unfiltered_frames==0) )
517                     {
518                            /* That means it's time for the spatial check.
519                               We've got several options here.             */
520                         if( spatial_metric == 0 )
521                         {
522                             /* Simple 32detect style comb detection */
523                             if( ( abs( current_frame[2] - current_frame[4] ) < 10  ) &&
524                                 ( abs( current_frame[2] - current_frame[3] ) > 15 ) )
525                             {
526                                 pv->mask[k][y*ref_stride + x] = 255;
527                             }
528                             else
529                             {
530                                 pv->mask[k][y*ref_stride + x] = 0;
531                             }
532                         }
533                         else if( spatial_metric == 1 )
534                         {
535                             /* This, for comparison, is what IsCombed uses.
536                                It's better, but still noise senstive.      */
537                                int combing = ( current_frame[1] - current_frame[2] ) *
538                                              ( current_frame[3] - current_frame[2] );
539                                
540                                if( combing > athresh_squared )
541                                    pv->mask[k][y*ref_stride + x] = 255; 
542                                else
543                                    pv->mask[k][y*ref_stride + x] = 0;
544                         }
545                         else if( spatial_metric == 2 )
546                         {
547                             /* Tritical's noise-resistant combing scorer.
548                                The check is done on a bob+blur convolution. */
549                             int combing = abs( current_frame[0]
550                                              + ( 4 * current_frame[2] )
551                                              + current_frame[4]
552                                              - ( 3 * ( current_frame[1]
553                                                      + current_frame[3] ) ) );
554
555                             /* If the frame is sufficiently combed,
556                                then mark it down on the mask as 255. */
557                             if( combing > athresh6 )
558                                 pv->mask[k][y*ref_stride + x] = 255; 
559                             else
560                                 pv->mask[k][y*ref_stride + x] = 0;
561                         }
562                     }
563                     else
564                     {
565                         pv->mask[k][y*ref_stride + x] = 0;
566                     }
567                 }
568                 else
569                 {
570                     pv->mask[k][y*ref_stride + x] = 0;
571                 }
572             }
573         }
574     }
575 }
576
577 // This function calls all the eedi2 filters in sequence for a given plane.
578 // It outputs the final interpolated image to pv->eedi_full[DST2PF].
579 void eedi2_interpolate_plane( hb_filter_private_t * pv, int k )
580 {
581     /* We need all these pointers. No, seriously.
582        I swear. It's not a joke. They're used.
583        All nine of them.                         */
584     uint8_t * mskp = pv->eedi_half[MSKPF][k];
585     uint8_t * srcp = pv->eedi_half[SRCPF][k];
586     uint8_t * tmpp = pv->eedi_half[TMPPF][k];
587     uint8_t * dstp = pv->eedi_half[DSTPF][k];
588     uint8_t * dst2p = pv->eedi_full[DST2PF][k];
589     uint8_t * tmp2p2 = pv->eedi_full[TMP2PF2][k];
590     uint8_t * msk2p = pv->eedi_full[MSK2PF][k];
591     uint8_t * tmp2p = pv->eedi_full[TMP2PF][k];
592     uint8_t * dst2mp = pv->eedi_full[DST2MPF][k];
593     int * cx2 = pv->cx2;
594     int * cy2 = pv->cy2;
595     int * cxy = pv->cxy;
596     int * tmpc = pv->tmpc;
597
598     int pitch = pv->ref_stride[k];
599     int height = pv->height[k]; int width = pv->width[k];
600     int half_height = height / 2;
601
602     // edge mask
603     eedi2_build_edge_mask( mskp, pitch, srcp, pitch,
604                      pv->magnitude_threshold, pv->variance_threshold, pv->laplacian_threshold, 
605                      half_height, width );
606     eedi2_erode_edge_mask( mskp, pitch, tmpp, pitch, pv->erosion_threshold, half_height, width );
607     eedi2_dilate_edge_mask( tmpp, pitch, mskp, pitch, pv->dilation_threshold, half_height, width );
608     eedi2_erode_edge_mask( mskp, pitch, tmpp, pitch, pv->erosion_threshold, half_height, width );
609     eedi2_remove_small_gaps( tmpp, pitch, mskp, pitch, half_height, width );
610
611     // direction mask
612     eedi2_calc_directions( k, mskp, pitch, srcp, pitch, tmpp, pitch,
613                      pv->maximum_search_distance, pv->noise_threshold,
614                      half_height, width );
615     eedi2_filter_dir_map( mskp, pitch, tmpp, pitch, dstp, pitch, half_height, width );
616     eedi2_expand_dir_map( mskp, pitch, dstp, pitch, tmpp, pitch, half_height, width );
617     eedi2_filter_map( mskp, pitch, tmpp, pitch, dstp, pitch, half_height, width );
618
619     // upscale 2x vertically
620     eedi2_upscale_by_2( srcp, dst2p, half_height, pitch );
621     eedi2_upscale_by_2( dstp, tmp2p2, half_height, pitch );
622     eedi2_upscale_by_2( mskp, msk2p, half_height, pitch );
623
624     // upscale the direction mask
625     eedi2_mark_directions_2x( msk2p, pitch, tmp2p2, pitch, tmp2p, pitch, pv->tff, height, width );
626     eedi2_filter_dir_map_2x( msk2p, pitch, tmp2p, pitch,  dst2mp, pitch, pv->tff, height, width );
627     eedi2_expand_dir_map_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width );
628     eedi2_fill_gaps_2x( msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width );
629     eedi2_fill_gaps_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width );
630
631     // interpolate a full-size plane
632     eedi2_interpolate_lattice( k, tmp2p, pitch, dst2p, pitch, tmp2p2, pitch, pv->tff,
633                          pv->noise_threshold, height, width );
634
635     if( pv->post_processing == 1 || pv->post_processing == 3 )
636     {
637         // make sure the edge directions are consistent
638         eedi2_bit_blit( tmp2p2, pitch, tmp2p, pitch, pv->width[k], pv->height[k] );
639         eedi2_filter_dir_map_2x( msk2p, pitch, tmp2p, pitch, dst2mp, pitch, pv->tff, height, width );
640         eedi2_expand_dir_map_2x( msk2p, pitch, dst2mp, pitch, tmp2p, pitch, pv->tff, height, width );
641         eedi2_post_process( tmp2p, pitch, tmp2p2, pitch, dst2p, pitch, pv->tff, height, width );
642     }
643     if( pv->post_processing == 2 || pv->post_processing == 3 )
644     {
645         // filter junctions and corners
646         eedi2_gaussian_blur1( srcp, pitch, tmpp, pitch, srcp, pitch, half_height, width );
647         eedi2_calc_derivatives( srcp, pitch, half_height, width, cx2, cy2, cxy );
648         eedi2_gaussian_blur_sqrt2( cx2, tmpc, cx2, pitch, half_height, width);
649         eedi2_gaussian_blur_sqrt2( cy2, tmpc, cy2, pitch, half_height, width);
650         eedi2_gaussian_blur_sqrt2( cxy, tmpc, cxy, pitch, half_height, width);
651         eedi2_post_process_corner( cx2, cy2, cxy, pitch, tmp2p2, pitch, dst2p, pitch, height, width, pv->tff );
652     }
653 }
654
655 /*
656  *  eedi2 interpolate this plane in a single thread.
657  */
658 void eedi2_filter_thread( void *thread_args_v )
659 {
660     eedi2_arguments_t *eedi2_work = NULL;
661     hb_filter_private_t * pv;
662     int run = 1;
663     int plane;
664     eedi2_thread_arg_t *thread_args = thread_args_v;
665
666     pv = thread_args->pv;
667     plane = thread_args->plane;
668
669     hb_log("eedi2 thread started for plane %d", plane);
670
671     while( run )
672     {
673         /*
674          * Wait here until there is work to do. hb_lock() blocks until
675          * render releases it to say that there is more work to do.
676          */
677         hb_lock( pv->eedi2_begin_lock[plane] );
678
679         eedi2_work = &pv->eedi2_arguments[plane];
680
681         if( eedi2_work->stop )
682         {
683             /*
684              * No more work to do, exit this thread.
685              */
686             run = 0;
687             continue;
688         } 
689
690         /*
691          * Process plane
692          */
693             eedi2_interpolate_plane( pv, plane );
694         
695         /*
696          * Finished this segment, let everyone know.
697          */
698         hb_unlock( pv->eedi2_complete_lock[plane] );
699     }
700     free( thread_args_v );
701 }
702
703 // Sets up the input field planes for EEDI2 in pv->eedi_half[SRCPF]
704 // and then runs eedi2_filter_thread for each plane.
705 void eedi2_planer( hb_filter_private_t * pv )
706 {
707     /* Copy the first field from the source to a half-height frame. */
708     int i;
709     for( i = 0;  i < 3; i++ )
710     {
711         int pitch = pv->ref_stride[i];
712         int start_line = !pv->tff;
713         eedi2_fill_half_height_buffer_plane( &pv->ref[1][i][pitch*start_line], pv->eedi_half[SRCPF][i], pitch, pv->height[i] );
714     }
715     
716     int plane;
717     for( plane = 0; plane < 3; plane++ )
718     {  
719         /*
720          * Let the thread for this plane know that we've setup work 
721          * for it by releasing the begin lock (ensuring that the
722          * complete lock is already locked so that we block when
723          * we try to lock it again below).
724          */
725         hb_lock( pv->eedi2_complete_lock[plane] );
726         hb_unlock( pv->eedi2_begin_lock[plane] );
727     }
728
729     /*
730      * Wait until all three threads have completed by trying to get
731      * the complete lock that we locked earlier for each thread, which
732      * will block until that thread has completed the work on that
733      * plane.
734      */
735     for( plane = 0; plane < 3; plane++ )
736     {
737         hb_lock( pv->eedi2_complete_lock[plane] );
738         hb_unlock( pv->eedi2_complete_lock[plane] );
739     }
740 }
741
742
743 /*
744  * comb detect this segment of all three planes in a single thread.
745  */
746 void decomb_filter_thread( void *thread_args_v )
747 {
748     decomb_arguments_t *decomb_work = NULL;
749     hb_filter_private_t * pv;
750     int run = 1;
751     int segment, segment_start, segment_stop, plane;
752     decomb_thread_arg_t *thread_args = thread_args_v;
753
754     pv = thread_args->pv;
755     segment = thread_args->segment;
756
757     hb_log("decomb thread started for segment %d", segment);
758
759     while( run )
760     {
761         /*
762          * Wait here until there is work to do. hb_lock() blocks until
763          * render releases it to say that there is more work to do.
764          */
765         hb_lock( pv->decomb_begin_lock[segment] );
766
767         decomb_work = &pv->decomb_arguments[segment];
768
769         if( decomb_work->stop )
770         {
771             /*
772              * No more work to do, exit this thread.
773              */
774             run = 0;
775             continue;
776         } 
777
778         /*
779          * Process segment (for now just from luma)
780          */
781         for( plane = 0; plane < 1; plane++)
782         {
783
784             int w = pv->width[plane];
785             int h = pv->height[plane];
786             int ref_stride = pv->ref_stride[plane];
787             segment_start = ( h / pv->cpu_count ) * segment;
788             if( segment == pv->cpu_count - 1 )
789             {
790                 /*
791                  * Final segment
792                  */
793                 segment_stop = h;
794             } else {
795                 segment_stop = ( h / pv->cpu_count ) * ( segment + 1 );
796             }
797             
798             detect_combed_segment( pv, segment_start, segment_stop );
799         }
800         /*
801          * Finished this segment, let everyone know.
802          */
803         hb_unlock( pv->decomb_complete_lock[segment] );
804     }
805     free( thread_args_v );
806 }
807
808 int comb_segmenter( hb_filter_private_t * pv )
809 {
810     int segment;
811
812     for( segment = 0; segment < pv->cpu_count; segment++ )
813     {  
814         /*
815          * Let the thread for this plane know that we've setup work 
816          * for it by releasing the begin lock (ensuring that the
817          * complete lock is already locked so that we block when
818          * we try to lock it again below).
819          */
820         hb_lock( pv->decomb_complete_lock[segment] );
821         hb_unlock( pv->decomb_begin_lock[segment] );
822     }
823
824     /*
825      * Wait until all three threads have completed by trying to get
826      * the complete lock that we locked earlier for each thread, which
827      * will block until that thread has completed the work on that
828      * plane.
829      */
830     for( segment = 0; segment < pv->cpu_count; segment++ )
831     {
832         hb_lock( pv->decomb_complete_lock[segment] );
833         hb_unlock( pv->decomb_complete_lock[segment] );
834     }
835     
836     return check_combing_mask( pv );
837 }
838
839 static void yadif_filter_line( uint8_t *dst,
840                                uint8_t *prev,
841                                uint8_t *cur,
842                                uint8_t *next,
843                                int plane,
844                                int parity,
845                                int y,
846                                hb_filter_private_t * pv )
847 {
848     /* While prev and next point to the previous and next frames,
849        prev2 and next2 will shift depending on the parity, usually 1.
850        They are the previous and next fields, the fields temporally adjacent
851        to the other field in the current frame--the one not being filtered.  */
852     uint8_t *prev2 = parity ? prev : cur ;
853     uint8_t *next2 = parity ? cur  : next;
854     
855     int w = pv->width[plane];
856     int refs = pv->ref_stride[plane];
857     int x;
858     int eedi2_mode = (pv->mode == 5);
859     
860     /* We can replace spatial_pred with this interpolation*/
861     uint8_t * eedi2_guess = &pv->eedi_full[DST2PF][plane][y*refs];
862
863     /* Decomb's cubic interpolation can only function when there are
864        three samples above and below, so regress to yadif's traditional
865        two-tap interpolation when filtering at the top and bottom edges. */
866     int edge = 0;
867     if( ( y < 3 ) || ( y > ( pv->height[plane] - 4 ) )  )
868         edge = 1;
869
870     for( x = 0; x < w; x++)
871     {
872         /* Pixel above*/
873         int c              = cur[-refs];
874         /* Temporal average: the current location in the adjacent fields */
875         int d              = (prev2[0] + next2[0])>>1;
876         /* Pixel below */
877         int e              = cur[+refs];
878         
879         /* How the current pixel changes between the adjacent fields */
880         int temporal_diff0 = ABS(prev2[0] - next2[0]);
881         /* The average of how much the pixels above and below change from the frame before to now. */
882         int temporal_diff1 = ( ABS(prev[-refs] - cur[-refs]) + ABS(prev[+refs] - cur[+refs]) ) >> 1;
883         /* The average of how much the pixels above and below change from now to the next frame. */
884         int temporal_diff2 = ( ABS(next[-refs] - cur[-refs]) + ABS(next[+refs] - cur[+refs]) ) >> 1;
885         /* For the actual difference, use the largest of the previous average diffs. */
886         int diff           = MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
887
888         int spatial_pred;
889         
890         if( eedi2_mode )
891         {
892             /* Who needs yadif's spatial predictions when we can have EEDI2's? */
893             spatial_pred = eedi2_guess[0];
894             eedi2_guess++;
895         }
896         else // Yadif spatial interpolation
897         {
898             /* SAD of how the pixel-1, the pixel, and the pixel+1 change from the line above to below. */ 
899             int spatial_score  = ABS(cur[-refs-1] - cur[+refs-1]) + ABS(cur[-refs]-cur[+refs]) +
900                                          ABS(cur[-refs+1] - cur[+refs+1]) - 1;         
901             
902             /* Spatial pred is either a bilinear or cubic vertical interpolation. */
903             if( pv->mode > 0 && !edge)
904             {
905                 spatial_pred = cubic_interpolate( cur[-3*refs], cur[-refs], cur[+refs], cur[3*refs] );
906             }
907             else
908             {
909                 spatial_pred = (c+e)>>1;
910             }
911
912         /* EDDI: Edge Directed Deinterlacing Interpolation
913            Checks 4 different slopes to see if there is more similarity along a diagonal
914            than there was vertically. If a diagonal is more similar, then it indicates
915            an edge, so interpolate along that instead of a vertical line, using either
916            linear or cubic interpolation depending on mode. */
917         #define YADIF_CHECK(j)\
918                 {   int score = ABS(cur[-refs-1+j] - cur[+refs-1-j])\
919                               + ABS(cur[-refs  +j] - cur[+refs  -j])\
920                               + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\
921                     if( score < spatial_score ){\
922                         spatial_score = score;\
923                         if( pv->mode > 0 && !edge )\
924                         {\
925                             switch(j)\
926                             {\
927                                 case -1:\
928                                     spatial_pred = cubic_interpolate(cur[-3 * refs - 3], cur[-refs -1], cur[+refs + 1], cur[3* refs + 3] );\
929                                 break;\
930                                 case -2:\
931                                     spatial_pred = cubic_interpolate( ( ( cur[-3*refs - 4] + cur[-refs - 4] ) / 2 ) , cur[-refs -2], cur[+refs + 2], ( ( cur[3*refs + 4] + cur[refs + 4] ) / 2 ) );\
932                                 break;\
933                                 case 1:\
934                                     spatial_pred = cubic_interpolate(cur[-3 * refs +3], cur[-refs +1], cur[+refs - 1], cur[3* refs -3] );\
935                                 break;\
936                                 case 2:\
937                                     spatial_pred = cubic_interpolate(( ( cur[-3*refs + 4] + cur[-refs + 4] ) / 2 ), cur[-refs +2], cur[+refs - 2], ( ( cur[3*refs - 4] + cur[refs - 4] ) / 2 ) );\
938                                 break;\
939                             }\
940                         }\
941                         else\
942                         {\
943                             spatial_pred = ( cur[-refs +j] + cur[+refs -j] ) >>1;\
944                         }\
945
946             YADIF_CHECK(-1) YADIF_CHECK(-2) }} }}
947             YADIF_CHECK( 1) YADIF_CHECK( 2) }} }}
948         }
949
950         /* Temporally adjust the spatial prediction by
951            comparing against lines in the adjacent fields. */
952         int b = (prev2[-2*refs] + next2[-2*refs])>>1;
953         int f = (prev2[+2*refs] + next2[+2*refs])>>1;
954         
955         /* Find the median value */
956         int max = MAX3(d-e, d-c, MIN(b-c, f-e));
957         int min = MIN3(d-e, d-c, MAX(b-c, f-e));
958         diff = MAX3( diff, min, -max );
959         
960         if( spatial_pred > d + diff )
961         {
962             spatial_pred = d + diff;
963         }
964         else if( spatial_pred < d - diff )
965         {
966             spatial_pred = d - diff;
967         }
968         
969         dst[0] = spatial_pred;
970                         
971         dst++;
972         cur++;
973         prev++;
974         next++;
975         prev2++;
976         next2++;
977     }
978 }
979
980 /*
981  * deinterlace this segment of all three planes in a single thread.
982  */
983 void yadif_decomb_filter_thread( void *thread_args_v )
984 {
985     yadif_arguments_t *yadif_work = NULL;
986     hb_filter_private_t * pv;
987     int run = 1;
988     int plane;
989     int segment, segment_start, segment_stop;
990     yadif_thread_arg_t *thread_args = thread_args_v;
991     uint8_t **dst;
992     int parity, tff, y, w, h, penultimate, ultimate, ref_stride, is_combed;
993
994     pv = thread_args->pv;
995     segment = thread_args->segment;
996
997     hb_log("yadif thread started for segment %d", segment);
998
999     while( run )
1000     {
1001         /*
1002          * Wait here until there is work to do. hb_lock() blocks until
1003          * render releases it to say that there is more work to do.
1004          */
1005         hb_lock( pv->yadif_begin_lock[segment] );
1006
1007         yadif_work = &pv->yadif_arguments[segment];
1008
1009         if( yadif_work->stop )
1010         {
1011             /*
1012              * No more work to do, exit this thread.
1013              */
1014             run = 0;
1015             continue;
1016         } 
1017
1018         if( yadif_work->dst == NULL )
1019         {
1020             hb_error( "thread started when no work available" );
1021             hb_snooze(500);
1022             continue;
1023         }
1024         
1025         is_combed = pv->yadif_arguments[segment].is_combed;
1026
1027         /*
1028          * Process all three planes, but only this segment of it.
1029          */
1030         for( plane = 0; plane < 3; plane++)
1031         {
1032
1033             dst = yadif_work->dst;
1034             parity = yadif_work->parity;
1035             tff = yadif_work->tff;
1036             w = pv->width[plane];
1037             h = pv->height[plane];
1038             penultimate = h - 2;
1039             ultimate = h - 1;
1040             ref_stride = pv->ref_stride[plane];
1041             segment_start = ( h / pv->cpu_count ) * segment;
1042             if( segment == pv->cpu_count - 1 )
1043             {
1044                 /*
1045                  * Final segment
1046                  */
1047                 segment_stop = h;
1048             } else {
1049                 segment_stop = ( h / pv->cpu_count ) * ( segment + 1 );
1050             }
1051
1052             for( y = segment_start; y < segment_stop; y++ )
1053             {
1054                 if( ( pv->mode == 4 && is_combed ) || is_combed == 2 )
1055                 {
1056                     /* This line gets blend filtered, not yadif filtered. */
1057                     uint8_t *prev = &pv->ref[0][plane][y*ref_stride];
1058                     uint8_t *cur  = &pv->ref[1][plane][y*ref_stride];
1059                     uint8_t *next = &pv->ref[2][plane][y*ref_stride];
1060                     uint8_t *dst2 = &dst[plane][y*w];
1061
1062                     blend_filter_line( dst2, cur, plane, y, pv );
1063                 }
1064                 else if( ( ( y ^ parity ) &  1 )  && ( is_combed == 1 ) )
1065                 {
1066                     /* This line gets yadif filtered. It is the bottom field
1067                        when TFF and vice-versa. It's the field that gets
1068                        filtered. Because yadif needs 2 lines above and below
1069                        the one being filtered, we need to mirror the edges.
1070                        When TFF, this means replacing the 2nd line with a
1071                        copy of the 1st, and the last with the second-to-last. */
1072                     if( y > 1 && y < ( h -2 ) )
1073                     {
1074                         /* This isn't the top or bottom, proceed as normal to yadif. */
1075                         uint8_t *prev = &pv->ref[0][plane][y*ref_stride];
1076                         uint8_t *cur  = &pv->ref[1][plane][y*ref_stride];
1077                         uint8_t *next = &pv->ref[2][plane][y*ref_stride];
1078                         uint8_t *dst2 = &dst[plane][y*w];
1079
1080                         yadif_filter_line( dst2, 
1081                                            prev, 
1082                                            cur, 
1083                                            next, 
1084                                            plane, 
1085                                            parity ^ tff,
1086                                            y, 
1087                                            pv );
1088                     }
1089                     else if( y == 0 )
1090                     {
1091                         /* BFF, so y0 = y1 */
1092                         memcpy( &dst[plane][y*w],
1093                                 &pv->ref[1][plane][1*ref_stride],
1094                                 w * sizeof(uint8_t) );
1095                     }
1096                     else if( y == 1 )
1097                     {
1098                         /* TFF, so y1 = y0 */
1099                         memcpy( &dst[plane][y*w],
1100                                 &pv->ref[1][plane][0],
1101                                 w * sizeof(uint8_t) );
1102                     }
1103                     else if( y == penultimate )
1104                     {
1105                         /* BFF, so penultimate y = ultimate y */
1106                         memcpy( &dst[plane][y*w],
1107                                 &pv->ref[1][plane][ultimate*ref_stride],
1108                                 w * sizeof(uint8_t) );
1109                     }
1110                     else if( y == ultimate )
1111                     {
1112                         /* TFF, so ultimate y = penultimate y */
1113                         memcpy( &dst[plane][y*w],
1114                                 &pv->ref[1][plane][penultimate*ref_stride],
1115                                 w * sizeof(uint8_t) );
1116                     }
1117                 }
1118                 else
1119                 {
1120                     memcpy( &dst[plane][y*w],
1121                             &pv->ref[1][plane][y*ref_stride],
1122                             w * sizeof(uint8_t) );              
1123                 }
1124             }
1125         }
1126         /*
1127          * Finished this segment, let everyone know.
1128          */
1129         hb_unlock( pv->yadif_complete_lock[segment] );
1130     }
1131     free( thread_args_v );
1132 }
1133
1134 static void yadif_filter( uint8_t ** dst,
1135                           int parity,
1136                           int tff,
1137                           hb_filter_private_t * pv )
1138 {
1139     /* If we're running comb detection, do it now, otherwise blend if mode 4 and interpolate if not. */
1140     int is_combed = pv->spatial_metric >= 0 ? comb_segmenter( pv ) : pv->mode == 4 ? 2 : 1;
1141
1142     if( is_combed == 1 )
1143     {
1144         pv->yadif_deinterlaced_frames++;
1145     }
1146     else if( is_combed == 2 )
1147     {
1148         pv->blend_deinterlaced_frames++;
1149     }
1150     else
1151     {
1152         pv->unfiltered_frames++;
1153     }
1154     
1155     if( is_combed == 1 && pv->mode == 5 )
1156     {
1157         /* Generate an EEDI2 interpolation */
1158         eedi2_planer( pv );
1159     }
1160     
1161     if( is_combed )
1162     {
1163         int segment;
1164
1165         for( segment = 0; segment < pv->cpu_count; segment++ )
1166         {  
1167             /*
1168              * Setup the work for this plane.
1169              */
1170             pv->yadif_arguments[segment].parity = parity;
1171             pv->yadif_arguments[segment].tff = tff;
1172             pv->yadif_arguments[segment].dst = dst;
1173             pv->yadif_arguments[segment].is_combed = is_combed;
1174
1175             /*
1176              * Let the thread for this plane know that we've setup work 
1177              * for it by releasing the begin lock (ensuring that the
1178              * complete lock is already locked so that we block when
1179              * we try to lock it again below).
1180              */
1181             hb_lock( pv->yadif_complete_lock[segment] );
1182             hb_unlock( pv->yadif_begin_lock[segment] );
1183         }
1184
1185         /*
1186          * Wait until all three threads have completed by trying to get
1187          * the complete lock that we locked earlier for each thread, which
1188          * will block until that thread has completed the work on that
1189          * plane.
1190          */
1191         for( segment = 0; segment < pv->cpu_count; segment++ )
1192         {
1193             hb_lock( pv->yadif_complete_lock[segment] );
1194             hb_unlock( pv->yadif_complete_lock[segment] );
1195         }
1196
1197         /*
1198          * Entire frame is now deinterlaced.
1199          */
1200     }
1201     else
1202     {
1203         /*  Just passing through... */
1204         int i;
1205         for( i = 0; i < 3; i++ )
1206         {
1207             uint8_t * ref = pv->ref[1][i];
1208             uint8_t * dest = dst[i];
1209             
1210             int w = pv->width[i];
1211             int ref_stride = pv->ref_stride[i];
1212             
1213             int y;
1214             for( y = 0; y < pv->height[i]; y++ )
1215             {
1216                 memcpy(dest, ref, w);
1217                 dest += w;
1218                 ref += ref_stride;
1219             }
1220         }
1221     }
1222 }
1223
1224 static void mcdeint_filter( uint8_t ** dst,
1225                             uint8_t ** src,
1226                             int parity,
1227                             hb_filter_private_t * pv )
1228 {
1229     int x, y, i;
1230     int out_size;
1231
1232 #ifdef SUPPRESS_AV_LOG
1233     /* TODO: temporarily change log level to suppress obnoxious debug output */
1234     int loglevel = av_log_get_level();
1235     av_log_set_level( AV_LOG_QUIET );
1236 #endif
1237
1238     for( i=0; i<3; i++ )
1239     {
1240         pv->mcdeint_frame->data[i] = src[i];
1241         pv->mcdeint_frame->linesize[i] = pv->width[i];
1242     }
1243     pv->mcdeint_avctx_enc->me_cmp     = FF_CMP_SAD;
1244     pv->mcdeint_avctx_enc->me_sub_cmp = FF_CMP_SAD;
1245     pv->mcdeint_frame->quality        = pv->mcdeint_qp * FF_QP2LAMBDA;
1246
1247     out_size = avcodec_encode_video( pv->mcdeint_avctx_enc,
1248                                      pv->mcdeint_outbuf,
1249                                      pv->mcdeint_outbuf_size,
1250                                      pv->mcdeint_frame );
1251
1252     pv->mcdeint_frame_dec = pv->mcdeint_avctx_enc->coded_frame;
1253
1254     for( i = 0; i < 3; i++ )
1255     {
1256         int w    = pv->width[i];
1257         int h    = pv->height[i];
1258         int fils = pv->mcdeint_frame_dec->linesize[i];
1259         int srcs = pv->width[i];
1260
1261         for( y = 0; y < h; y++ )
1262         {
1263             if( (y ^ parity) & 1 )
1264             {
1265                 for( x = 0; x < w; x++ )
1266                 {
1267                     if( (x-2)+(y-1)*w >= 0 && (x+2)+(y+1)*w < w*h )
1268                     {
1269                         uint8_t * filp =
1270                             &pv->mcdeint_frame_dec->data[i][x + y*fils];
1271                         uint8_t * srcp = &src[i][x + y*srcs];
1272
1273                         int diff0 = filp[-fils] - srcp[-srcs];
1274                         int diff1 = filp[+fils] - srcp[+srcs];
1275
1276                         int spatial_score =
1277                               ABS(srcp[-srcs-1] - srcp[+srcs-1])
1278                             + ABS(srcp[-srcs  ] - srcp[+srcs  ])
1279                             + ABS(srcp[-srcs+1] - srcp[+srcs+1]) - 1;
1280
1281                         int temp = filp[0];
1282
1283 #define MCDEINT_CHECK(j)\
1284                         {   int score = ABS(srcp[-srcs-1+j] - srcp[+srcs-1-j])\
1285                                       + ABS(srcp[-srcs  +j] - srcp[+srcs  -j])\
1286                                       + ABS(srcp[-srcs+1+j] - srcp[+srcs+1-j]);\
1287                             if( score < spatial_score ) {\
1288                                 spatial_score = score;\
1289                                 diff0 = filp[-fils+j] - srcp[-srcs+j];\
1290                                 diff1 = filp[+fils-j] - srcp[+srcs-j];
1291
1292                         MCDEINT_CHECK(-1) MCDEINT_CHECK(-2) }} }}
1293                         MCDEINT_CHECK( 1) MCDEINT_CHECK( 2) }} }}
1294
1295                         if(diff0 + diff1 > 0)
1296                         {
1297                             temp -= (diff0 + diff1 -
1298                                      ABS( ABS(diff0) - ABS(diff1) ) / 2) / 2;
1299                         }
1300                         else
1301                         {
1302                             temp -= (diff0 + diff1 +
1303                                      ABS( ABS(diff0) - ABS(diff1) ) / 2) / 2;
1304                         }
1305
1306                         filp[0] = dst[i][x + y*w] =
1307                             temp > 255U ? ~(temp>>31) : temp;
1308                     }
1309                     else
1310                     {
1311                         dst[i][x + y*w] =
1312                             pv->mcdeint_frame_dec->data[i][x + y*fils];
1313                     }
1314                 }
1315             }
1316         }
1317
1318         for( y = 0; y < h; y++ )
1319         {
1320             if( !((y ^ parity) & 1) )
1321             {
1322                 for( x = 0; x < w; x++ )
1323                 {
1324                     pv->mcdeint_frame_dec->data[i][x + y*fils] =
1325                         dst[i][x + y*w]= src[i][x + y*srcs];
1326                 }
1327             }
1328         }
1329     }
1330
1331 #ifdef SUPPRESS_AV_LOG
1332     /* TODO: restore previous log level */
1333     av_log_set_level(loglevel);
1334 #endif
1335 }
1336
1337 hb_filter_private_t * hb_decomb_init( int pix_fmt,
1338                                            int width,
1339                                            int height,
1340                                            char * settings )
1341 {
1342     if( pix_fmt != PIX_FMT_YUV420P )
1343     {
1344         return 0;
1345     }
1346
1347     hb_filter_private_t * pv = calloc( 1, sizeof(struct hb_filter_private_s) );
1348
1349     pv->pix_fmt = pix_fmt;
1350
1351     pv->width[0]  = width;
1352     pv->height[0] = height;
1353     pv->width[1]  = pv->width[2]  = width >> 1;
1354     pv->height[1] = pv->height[2] = height >> 1;
1355
1356     pv->buf_out[0] = hb_video_buffer_init( width, height );
1357     pv->buf_out[1] = hb_video_buffer_init( width, height );
1358     pv->buf_settings = hb_buffer_init( 0 );
1359
1360     pv->yadif_deinterlaced_frames = 0;
1361     pv->blend_deinterlaced_frames = 0;
1362     pv->unfiltered_frames = 0;
1363
1364     pv->yadif_ready    = 0;
1365
1366     pv->mode     = MODE_DEFAULT;
1367     pv->spatial_metric = 2;
1368     pv->motion_threshold = 6;
1369     pv->spatial_threshold = 9;
1370     pv->block_threshold = 80;
1371     pv->block_width = 16;
1372     pv->block_height = 16;
1373     
1374     pv->magnitude_threshold = 10;
1375     pv->variance_threshold = 20;
1376     pv->laplacian_threshold = 20;
1377     pv->dilation_threshold = 4;
1378     pv->erosion_threshold = 2;
1379     pv->noise_threshold = 50;
1380     pv->maximum_search_distance = 24;
1381     pv->post_processing = 1;
1382
1383     pv->parity   = PARITY_DEFAULT;
1384
1385     pv->mcdeint_mode   = MCDEINT_MODE_DEFAULT;
1386     pv->mcdeint_qp     = MCDEINT_QP_DEFAULT;
1387
1388     if( settings )
1389     {
1390         sscanf( settings, "%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
1391                 &pv->mode,
1392                 &pv->spatial_metric,
1393                 &pv->motion_threshold,
1394                 &pv->spatial_threshold,
1395                 &pv->block_threshold,
1396                 &pv->block_width,
1397                 &pv->block_height,
1398                 &pv->magnitude_threshold,
1399                 &pv->variance_threshold,
1400                 &pv->laplacian_threshold,
1401                 &pv->dilation_threshold,
1402                 &pv->erosion_threshold,
1403                 &pv->noise_threshold,
1404                 &pv->maximum_search_distance,
1405                 &pv->post_processing );
1406     }
1407     
1408     pv->cpu_count = hb_get_cpu_count();
1409     
1410
1411     if( pv->mode == 2 || pv->mode == 3 )
1412     {
1413         pv->mcdeint_mode = 0;
1414     }
1415     
1416     /* Allocate yadif specific buffers */
1417     int i, j;
1418     for( i = 0; i < 3; i++ )
1419     {
1420         int is_chroma = !!i;
1421         int w = ((width   + 31) & (~31))>>is_chroma;
1422         int h = ((height+6+ 31) & (~31))>>is_chroma;
1423
1424         pv->ref_stride[i] = w;
1425
1426         for( j = 0; j < 3; j++ )
1427         {
1428             pv->ref[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w;
1429         }
1430     }
1431
1432     /* Allocate a buffer to store a comb mask. */
1433     for( i = 0; i < 3; i++ )
1434     {
1435         int is_chroma = !!i;
1436         int w = ((pv->width[0]   + 31) & (~31))>>is_chroma;
1437         int h = ((pv->height[0]+6+ 31) & (~31))>>is_chroma;
1438
1439         pv->mask[i] = calloc( 1, w*h*sizeof(uint8_t) ) + 3*w;
1440     }
1441     
1442     if( pv->mode == 5 )
1443     {
1444         /* Allocate half-height eedi2 buffers */
1445         height = pv->height[0] / 2;
1446         for( i = 0; i < 3; i++ )
1447         {
1448             int is_chroma = !!i;
1449             int w = ((width   + 31) & (~31))>>is_chroma;
1450             int h = ((height+6+ 31) & (~31))>>is_chroma;
1451
1452             for( j = 0; j < 4; j++ )
1453             {
1454                 pv->eedi_half[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w;
1455             }
1456         }
1457
1458         /* Allocate full-height eedi2 buffers */
1459         height = pv->height[0];
1460         for( i = 0; i < 3; i++ )
1461         {
1462             int is_chroma = !!i;
1463             int w = ((width   + 31) & (~31))>>is_chroma;
1464             int h = ((height+6+ 31) & (~31))>>is_chroma;
1465
1466             for( j = 0; j < 5; j++ )
1467             {
1468                 pv->eedi_full[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w;
1469             }
1470         }
1471     }
1472     
1473      /*
1474       * Create yadif threads and locks.
1475       */
1476      pv->yadif_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count );
1477      pv->yadif_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1478      pv->yadif_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1479      pv->yadif_arguments = malloc( sizeof( yadif_arguments_t ) * pv->cpu_count );
1480
1481      for( i = 0; i < pv->cpu_count; i++ )
1482      {
1483          yadif_thread_arg_t *thread_args;
1484
1485          thread_args = malloc( sizeof( yadif_thread_arg_t ) );
1486
1487          if( thread_args )
1488          {
1489              thread_args->pv = pv;
1490              thread_args->segment = i;
1491
1492              pv->yadif_begin_lock[i] = hb_lock_init();
1493              pv->yadif_complete_lock[i] = hb_lock_init();
1494
1495              /*
1496               * Important to start off with the threads locked waiting
1497               * on input.
1498               */
1499              hb_lock( pv->yadif_begin_lock[i] );
1500
1501              pv->yadif_arguments[i].stop = 0;
1502              pv->yadif_arguments[i].dst = NULL;
1503              
1504              pv->yadif_threads[i] = hb_thread_init( "yadif_filter_segment",
1505                                                     yadif_decomb_filter_thread,
1506                                                     thread_args,
1507                                                     HB_NORMAL_PRIORITY );
1508          }
1509          else
1510          {
1511              hb_error( "yadif could not create threads" );
1512          }
1513     }
1514     
1515     /*
1516      * Create decomb threads and locks.
1517      */
1518     pv->decomb_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count );
1519     pv->decomb_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1520     pv->decomb_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1521     pv->decomb_arguments = malloc( sizeof( decomb_arguments_t ) * pv->cpu_count );
1522     
1523     for( i = 0; i < pv->cpu_count; i++ )
1524     {
1525         decomb_thread_arg_t *decomb_thread_args;
1526     
1527         decomb_thread_args = malloc( sizeof( decomb_thread_arg_t ) );
1528     
1529         if( decomb_thread_args )
1530         {
1531             decomb_thread_args->pv = pv;
1532             decomb_thread_args->segment = i;
1533     
1534             pv->decomb_begin_lock[i] = hb_lock_init();
1535             pv->decomb_complete_lock[i] = hb_lock_init();
1536     
1537             /*
1538              * Important to start off with the threads locked waiting
1539              * on input.
1540              */
1541             hb_lock( pv->decomb_begin_lock[i] );
1542     
1543             pv->decomb_arguments[i].stop = 0;
1544     
1545             pv->decomb_threads[i] = hb_thread_init( "decomb_filter_segment",
1546                                                    decomb_filter_thread,
1547                                                    decomb_thread_args,
1548                                                    HB_NORMAL_PRIORITY );
1549         }
1550         else
1551         {
1552             hb_error( "decomb could not create threads" );
1553         }
1554     }
1555     
1556     if( pv->mode == 5 )
1557     {
1558         /*
1559          * Create eedi2 threads and locks.
1560          */
1561         pv->eedi2_threads = malloc( sizeof( hb_thread_t* ) * 3 );
1562         pv->eedi2_begin_lock = malloc( sizeof( hb_lock_t * ) * 3 );
1563         pv->eedi2_complete_lock = malloc( sizeof( hb_lock_t * ) * 3 );
1564         pv->eedi2_arguments = malloc( sizeof( eedi2_arguments_t ) * 3 );
1565
1566         if( pv->post_processing > 1 )
1567         {
1568             pv->cx2 = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
1569             pv->cy2 = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
1570             pv->cxy = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
1571             pv->tmpc = (int*)eedi2_aligned_malloc(pv->height[0]*pv->ref_stride[0]*sizeof(int), 16);
1572             if( !pv->cx2 || !pv->cy2 || !pv->cxy || !pv->tmpc )
1573                 hb_log("EEDI2: failed to malloc derivative arrays");
1574             else
1575                 hb_log("EEDI2: successfully mallloced derivative arrays");
1576         }
1577
1578         for( i = 0; i < 3; i++ )
1579         {
1580             eedi2_thread_arg_t *eedi2_thread_args;
1581
1582             eedi2_thread_args = malloc( sizeof( eedi2_thread_arg_t ) );
1583
1584             if( eedi2_thread_args )
1585             {
1586                 eedi2_thread_args->pv = pv;
1587                 eedi2_thread_args->plane = i;
1588
1589                 pv->eedi2_begin_lock[i] = hb_lock_init();
1590                 pv->eedi2_complete_lock[i] = hb_lock_init();
1591
1592                 /*
1593                  * Important to start off with the threads locked waiting
1594                  * on input.
1595                  */
1596                 hb_lock( pv->eedi2_begin_lock[i] );
1597
1598                 pv->eedi2_arguments[i].stop = 0;
1599
1600                 pv->eedi2_threads[i] = hb_thread_init( "eedi2_filter_segment",
1601                                                        eedi2_filter_thread,
1602                                                        eedi2_thread_args,
1603                                                        HB_NORMAL_PRIORITY );
1604             }
1605             else
1606             {
1607                 hb_error( "eedi2 could not create threads" );
1608             }
1609         }
1610     }
1611     
1612     
1613     /* Allocate mcdeint specific buffers */
1614     if( pv->mcdeint_mode >= 0 )
1615     {
1616         avcodec_init();
1617         avcodec_register_all();
1618
1619         AVCodec * enc = avcodec_find_encoder( CODEC_ID_SNOW );
1620
1621         int i;
1622         for (i = 0; i < 3; i++ )
1623         {
1624             AVCodecContext * avctx_enc;
1625
1626             avctx_enc = pv->mcdeint_avctx_enc = avcodec_alloc_context();
1627
1628             avctx_enc->width                    = width;
1629             avctx_enc->height                   = height;
1630             avctx_enc->time_base                = (AVRational){1,25};  // meaningless
1631             avctx_enc->gop_size                 = 300;
1632             avctx_enc->max_b_frames             = 0;
1633             avctx_enc->pix_fmt                  = PIX_FMT_YUV420P;
1634             avctx_enc->flags                    = CODEC_FLAG_QSCALE | CODEC_FLAG_LOW_DELAY;
1635             avctx_enc->strict_std_compliance    = FF_COMPLIANCE_EXPERIMENTAL;
1636             avctx_enc->global_quality           = 1;
1637             avctx_enc->flags2                   = CODEC_FLAG2_MEMC_ONLY;
1638             avctx_enc->me_cmp                   = FF_CMP_SAD; //SSE;
1639             avctx_enc->me_sub_cmp               = FF_CMP_SAD; //SSE;
1640             avctx_enc->mb_cmp                   = FF_CMP_SSE;
1641
1642             switch( pv->mcdeint_mode )
1643             {
1644                 case 3:
1645                     avctx_enc->refs = 3;
1646                 case 2:
1647                     avctx_enc->me_method = ME_UMH;
1648                 case 1:
1649                     avctx_enc->flags |= CODEC_FLAG_4MV;
1650                     avctx_enc->dia_size =2;
1651                 case 0:
1652                     avctx_enc->flags |= CODEC_FLAG_QPEL;
1653             }
1654
1655             hb_avcodec_open(avctx_enc, enc);
1656         }
1657
1658         pv->mcdeint_frame       = avcodec_alloc_frame();
1659         pv->mcdeint_outbuf_size = width * height * 10;
1660         pv->mcdeint_outbuf      = malloc( pv->mcdeint_outbuf_size );
1661     }
1662
1663     return pv;
1664 }
1665
1666 void hb_decomb_close( hb_filter_private_t * pv )
1667 {
1668     if( !pv )
1669     {
1670         return;
1671     }
1672     
1673     hb_log("decomb: %s deinterlaced %i | blend deinterlaced %i | unfiltered %i | total %i", pv->mode == 5 ? "yadif+eedi2" : "yadif", pv->yadif_deinterlaced_frames, pv->blend_deinterlaced_frames, pv->unfiltered_frames, pv->yadif_deinterlaced_frames + pv->blend_deinterlaced_frames + pv->unfiltered_frames);
1674
1675     /* Cleanup frame buffers */
1676     if( pv->buf_out[0] )
1677     {
1678         hb_buffer_close( &pv->buf_out[0] );
1679     }
1680     if( pv->buf_out[1] )
1681     {
1682         hb_buffer_close( &pv->buf_out[1] );
1683     }
1684     if (pv->buf_settings )
1685     {
1686         hb_buffer_close( &pv->buf_settings );
1687     }
1688
1689     /* Cleanup yadif specific buffers */
1690     int i;
1691     for( i = 0; i<3*3; i++ )
1692     {
1693         uint8_t **p = &pv->ref[i%3][i/3];
1694         if (*p)
1695         {
1696             free( *p - 3*pv->ref_stride[i/3] );
1697             *p = NULL;
1698         }
1699     }
1700     
1701     /* Cleanup combing mask. */
1702     for( i = 0; i<3*3; i++ )
1703     {
1704         uint8_t **p = &pv->mask[i/3];
1705         if (*p)
1706         {
1707             free( *p - 3*pv->ref_stride[i/3] );
1708             *p = NULL;
1709         }
1710     }
1711     
1712     if( pv->mode == 5 )
1713     {
1714         /* Cleanup eedi-half  buffers */
1715         int j;
1716         for( i = 0; i<3; i++ )
1717         {
1718             for( j = 0; j < 4; j++ )
1719             {
1720                 uint8_t **p = &pv->eedi_half[j][i];
1721                 if (*p)
1722                 {
1723                     free( *p - 3*pv->ref_stride[i] );
1724                     *p = NULL;
1725                 }            
1726             }
1727         }
1728
1729         /* Cleanup eedi-full  buffers */
1730         for( i = 0; i<3; i++ )
1731         {
1732             for( j = 0; j < 5; j++ )
1733             {
1734                 uint8_t **p = &pv->eedi_full[j][i];
1735                 if (*p)
1736                 {
1737                     free( *p - 3*pv->ref_stride[i] );
1738                     *p = NULL;
1739                 }            
1740             }
1741         }
1742     }
1743     
1744     if( pv->post_processing > 1  && pv->mode == 5 )
1745     {
1746         if (pv->cx2) eedi2_aligned_free(pv->cx2);
1747         if (pv->cy2) eedi2_aligned_free(pv->cy2);
1748         if (pv->cxy) eedi2_aligned_free(pv->cxy);
1749         if (pv->tmpc) eedi2_aligned_free(pv->tmpc);
1750     }
1751     
1752     for( i = 0; i < pv->cpu_count; i++)
1753     {
1754         /*
1755          * Tell each yadif thread to stop, and then cleanup.
1756          */
1757         pv->yadif_arguments[i].stop = 1;
1758         hb_unlock(  pv->yadif_begin_lock[i] );
1759
1760         hb_thread_close( &pv->yadif_threads[i] );
1761         hb_lock_close( &pv->yadif_begin_lock[i] );
1762         hb_lock_close( &pv->yadif_complete_lock[i] );
1763     }
1764     
1765     /*
1766      * free memory for yadif structs
1767      */
1768     free( pv->yadif_threads );
1769     free( pv->yadif_begin_lock );
1770     free( pv->yadif_complete_lock );
1771     free( pv->yadif_arguments );
1772     
1773     for( i = 0; i < pv->cpu_count; i++)
1774     {
1775         /*
1776          * Tell each decomb thread to stop, and then cleanup.
1777          */
1778         pv->decomb_arguments[i].stop = 1;
1779         hb_unlock(  pv->decomb_begin_lock[i] );
1780
1781         hb_thread_close( &pv->decomb_threads[i] );
1782         hb_lock_close( &pv->decomb_begin_lock[i] );
1783         hb_lock_close( &pv->decomb_complete_lock[i] );
1784     }
1785     
1786     /*
1787      * free memory for decomb structs
1788      */
1789     free( pv->decomb_threads );
1790     free( pv->decomb_begin_lock );
1791     free( pv->decomb_complete_lock );
1792     free( pv->decomb_arguments );
1793     
1794     if( pv->mode == 5 )
1795     {
1796         for( i = 0; i < 3; i++)
1797         {
1798             /*
1799              * Tell each eedi2 thread to stop, and then cleanup.
1800              */
1801             pv->eedi2_arguments[i].stop = 1;
1802             hb_unlock(  pv->eedi2_begin_lock[i] );
1803
1804             hb_thread_close( &pv->eedi2_threads[i] );
1805             hb_lock_close( &pv->eedi2_begin_lock[i] );
1806             hb_lock_close( &pv->eedi2_complete_lock[i] );
1807         }
1808
1809         /*
1810          * free memory for eedi2 structs
1811          */
1812         free( pv->eedi2_threads );
1813         free( pv->eedi2_begin_lock );
1814         free( pv->eedi2_complete_lock );
1815         free( pv->eedi2_arguments );
1816     }
1817     
1818     /* Cleanup mcdeint specific buffers */
1819     if( pv->mcdeint_mode >= 0 )
1820     {
1821         if( pv->mcdeint_avctx_enc )
1822         {
1823             hb_avcodec_close( pv->mcdeint_avctx_enc );
1824             av_freep( &pv->mcdeint_avctx_enc );
1825         }
1826         if( pv->mcdeint_outbuf )
1827         {
1828             free( pv->mcdeint_outbuf );
1829         }
1830     }
1831
1832     free( pv );
1833 }
1834
1835 int hb_decomb_work( const hb_buffer_t * cbuf_in,
1836                     hb_buffer_t ** buf_out,
1837                     int pix_fmt,
1838                     int width,
1839                     int height,
1840                     hb_filter_private_t * pv )
1841 {
1842     hb_buffer_t * buf_in = (hb_buffer_t *)cbuf_in;
1843
1844     if( !pv ||
1845         pix_fmt != pv->pix_fmt ||
1846         width   != pv->width[0] ||
1847         height  != pv->height[0] )
1848     {
1849         return FILTER_FAILED;
1850     }
1851
1852     avpicture_fill( &pv->pic_in, buf_in->data,
1853                     pix_fmt, width, height );
1854
1855     /* Determine if top-field first layout */
1856     int tff;
1857     if( pv->parity < 0 )
1858     {
1859         tff = !!(buf_in->flags & PIC_FLAG_TOP_FIELD_FIRST);
1860     }
1861     else
1862     {
1863         tff = (pv->parity & 1) ^ 1;
1864     }
1865
1866     pv->tff = tff;
1867     
1868     /* Store current frame in yadif cache */
1869     store_ref( (const uint8_t**)pv->pic_in.data, pv );
1870
1871     /* If yadif is not ready, store another ref and return FILTER_DELAY */
1872     if( pv->yadif_ready == 0 )
1873     {
1874         store_ref( (const uint8_t**)pv->pic_in.data, pv );
1875
1876         hb_buffer_copy_settings( pv->buf_settings, buf_in );
1877
1878         /* don't let 'work_loop' send a chapter mark upstream */
1879         buf_in->new_chap  = 0;
1880
1881         pv->yadif_ready = 1;
1882
1883         return FILTER_DELAY;
1884     }
1885
1886     /* Perform yadif filtering */        
1887     int frame;
1888     for( frame = 0; frame <= ( ( pv->mode == 2 || pv->mode == 3 )? 1 : 0 ) ; frame++ )
1889     {
1890         int parity = frame ^ tff ^ 1;
1891
1892         avpicture_fill( &pv->pic_out, pv->buf_out[!(frame^1)]->data,
1893                         pix_fmt, width, height );
1894
1895         yadif_filter( pv->pic_out.data, parity, tff, pv );
1896
1897         if( pv->mcdeint_mode >= 0 )
1898         {
1899             /* Perform mcdeint filtering */
1900             avpicture_fill( &pv->pic_in,  pv->buf_out[(frame^1)]->data,
1901                             pix_fmt, width, height );
1902
1903             mcdeint_filter( pv->pic_in.data, pv->pic_out.data, parity, pv );
1904         }
1905
1906         *buf_out = pv->buf_out[!(frame^1)];
1907     }
1908
1909     /* Copy buffered settings to output buffer settings */
1910     hb_buffer_copy_settings( *buf_out, pv->buf_settings );
1911
1912     /* Replace buffered settings with input buffer settings */
1913     hb_buffer_copy_settings( pv->buf_settings, buf_in );
1914
1915     /* don't let 'work_loop' send a chapter mark upstream */
1916     buf_in->new_chap  = 0;
1917
1918     return FILTER_OK;
1919 }