libhb/decomb.c

   1 /* $Id: decomb.c,v 1.14 2008/04/25 5:00:00 jbrjake Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License.
   6
   7    The yadif algorithm was created by Michael Niedermayer. */
   8 #include "hb.h"
   9 #include "libavcodec/avcodec.h"
  10 #include "mpeg2dec/mpeg2.h"
  11
  12 #define SUPPRESS_AV_LOG
  13
  14 #define MODE_DEFAULT     1
  15 #define PARITY_DEFAULT   -1
  16
  17 #define MCDEINT_MODE_DEFAULT   -1
  18 #define MCDEINT_QP_DEFAULT      1
  19
  20 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
  21 #define MIN3(a,b,c) MIN(MIN(a,b),c)
  22 #define MAX3(a,b,c) MAX(MAX(a,b),c)
  23
  24 typedef struct yadif_arguments_s {
  25     uint8_t **dst;
  26     int parity;
  27     int tff;
  28     int stop;
  29     int is_combed;
  30 } yadif_arguments_t;
  31
  32 typedef struct decomb_arguments_s {
  33     int stop;
  34 } decomb_arguments_t;
  35
  36 struct hb_filter_private_s
  37 {
  38     int              pix_fmt;
  39     int              width[3];
  40     int              height[3];
  41
  42     int              mode;
  43     int              spatial_metric;
  44     int              motion_threshold;
  45     int              spatial_threshold;
  46     int              block_threshold;
  47     int              block_width;
  48     int              block_height;
  49
  50     int              parity;
  51
  52     int              yadif_ready;
  53
  54     int              mcdeint_mode;
  55     int              mcdeint_qp;
  56
  57     int              mcdeint_outbuf_size;
  58     uint8_t        * mcdeint_outbuf;
  59     AVCodecContext * mcdeint_avctx_enc;
  60     AVFrame        * mcdeint_frame;
  61     AVFrame        * mcdeint_frame_dec;
  62
  63     int              yadif_deinterlaced_frames;
  64     int              blend_deinterlaced_frames;
  65     int              unfiltered_frames;
  66
  67     uint8_t        * ref[4][3];
  68     int              ref_stride[3];
  69
  70     /* Make a buffer to store a comb mask. */
  71     uint8_t        * mask[3];
  72
  73     AVPicture        pic_in;
  74     AVPicture        pic_out;
  75     hb_buffer_t *    buf_out[2];
  76     hb_buffer_t *    buf_settings;
  77
  78     int              cpu_count;
  79
  80     hb_thread_t    ** yadif_threads;         // Threads for Yadif - one per CPU
  81     hb_lock_t      ** yadif_begin_lock;      // Thread has work
  82     hb_lock_t      ** yadif_complete_lock;   // Thread has completed work
  83     yadif_arguments_t *yadif_arguments;      // Arguments to thread for work
  84
  85     hb_thread_t    ** decomb_threads;        // Threads for comb detection - one per CPU
  86     hb_lock_t      ** decomb_begin_lock;     // Thread has work
  87     hb_lock_t      ** decomb_complete_lock;  // Thread has completed work
  88     decomb_arguments_t *decomb_arguments;    // Arguments to thread for work
  89
  90 };
  91
  92 hb_filter_private_t * hb_decomb_init( int pix_fmt,
  93                                            int width,
  94                                            int height,
  95                                            char * settings );
  96
  97 int hb_decomb_work(      const hb_buffer_t * buf_in,
  98                          hb_buffer_t ** buf_out,
  99                          int pix_fmt,
 100                          int width,
 101                          int height,
 102                          hb_filter_private_t * pv );
 103
 104 void hb_decomb_close( hb_filter_private_t * pv );
 105
 106 hb_filter_object_t hb_filter_decomb =
 107 {
 108     FILTER_DECOMB,
 109     "Deinterlaces selectively with yadif/mcdeint and lowpass5 blending",
 110     NULL,
 111     hb_decomb_init,
 112     hb_decomb_work,
 113     hb_decomb_close,
 114 };
 115
 116 int cubic_interpolate( int y0, int y1, int y2, int y3 )
 117 {
 118     /* From http://www.neuron2.net/library/cubicinterp.html */
 119     int result = ( y0 * -3 ) + ( y1 * 23 ) + ( y2 * 23 ) + ( y3 * -3 );
 120     result /= 40;
 121
 122     if( result > 255 )
 123     {
 124         result = 255;
 125     }
 126     else if( result < 0 )
 127     {
 128         result = 0;
 129     }
 130
 131     return result;
 132 }
 133
 134 static void store_ref( const uint8_t ** pic,
 135                              hb_filter_private_t * pv )
 136 {
 137     memcpy( pv->ref[3],
 138             pv->ref[0],
 139             sizeof(uint8_t *)*3 );
 140
 141     memmove( pv->ref[0],
 142              pv->ref[1],
 143              sizeof(uint8_t *)*3*3 );
 144
 145     int i;
 146     for( i = 0; i < 3; i++ )
 147     {
 148         const uint8_t * src = pic[i];
 149         uint8_t * ref = pv->ref[2][i];
 150
 151         int w = pv->width[i];
 152         int h = pv->height[i];
 153         int ref_stride = pv->ref_stride[i];
 154
 155         int y;
 156         for( y = 0; y < pv->height[i]; y++ )
 157         {
 158             memcpy(ref, src, w);
 159             src = (uint8_t*)src + w;
 160             ref = (uint8_t*)ref + ref_stride;
 161         }
 162     }
 163 }
 164
 165 static void get_ref( uint8_t ** pic, hb_filter_private_t * pv, int frm )
 166 {
 167     int i;
 168     for( i = 0; i < 3; i++ )
 169     {
 170         uint8_t * dst = pic[i];
 171         const uint8_t * ref = pv->ref[frm][i];
 172         int w = pv->width[i];
 173         int ref_stride = pv->ref_stride[i];
 174
 175         int y;
 176         for( y = 0; y < pv->height[i]; y++ )
 177         {
 178             memcpy(dst, ref, w);
 179             dst += w;
 180             ref += ref_stride;
 181         }
 182     }
 183 }
 184
 185 int blend_filter_pixel( int up2, int up1, int current, int down1, int down2 )
 186 {
 187     /* Low-pass 5-tap filter */
 188     int result = 0;
 189     result += -up2;
 190     result += up1 * 2;
 191     result += current * 6;
 192     result += down1 *2;
 193     result += -down2;
 194     result /= 8;
 195
 196     if( result > 255 )
 197     {
 198         result = 255;
 199     }
 200     if( result < 0 )
 201     {
 202         result = 0;
 203     }
 204
 205     return result;
 206 }
 207
 208 static void blend_filter_line( uint8_t *dst,
 209                                uint8_t *cur,
 210                                int plane,
 211                                int y,
 212                                hb_filter_private_t * pv )
 213 {
 214     int w = pv->width[plane];
 215     int refs = pv->ref_stride[plane];
 216     int x;
 217
 218     for( x = 0; x < w; x++)
 219     {
 220         int a, b, c, d, e;
 221
 222         a = cur[-2*refs];
 223         b = cur[-refs];
 224         c = cur[0];
 225         d = cur[+refs];
 226         e = cur[2*refs];
 227
 228         if( y == 0 )
 229         {
 230             /* First line, so A and B don't exist.*/
 231             a = cur[0];
 232             b = cur[0];
 233         }
 234         else if( y == 1 )
 235         {
 236             /* Second line, no A. */
 237             a = cur[-refs];
 238         }
 239         else if( y == (pv->height[plane] - 2) )
 240         {
 241             /* Second to last line, no E. */
 242             e = cur[+refs];
 243         }
 244         else if( y == (pv->height[plane] -1) )
 245         {
 246             /* Last line, no D or E. */
 247             d = cur[0];
 248             e = cur[0];
 249         }
 250
 251         dst[0] = blend_filter_pixel( a, b, c, d, e );
 252
 253         dst++;
 254         cur++;
 255     }
 256 }
 257
 258 int check_combing_mask( hb_filter_private_t * pv )
 259 {
 260     /* Go through the mask in X*Y blocks. If any of these windows
 261        have threshold or more combed pixels, consider the whole
 262        frame to be combed and send it on to be deinterlaced.     */
 263
 264     /* Block mask threshold -- The number of pixels
 265        in a block_width * block_height window of
 266        he mask that need to show combing for the
 267        whole frame to be seen as such.            */
 268     int threshold       = pv->block_threshold;
 269     int block_width     = pv->block_width;
 270     int block_height    = pv->block_height;
 271     int block_x, block_y;
 272     int block_score = 0; int send_to_blend = 0;
 273
 274     int x, y, k;
 275
 276     for( k = 0; k < 1; k++ )
 277     {
 278         int ref_stride = pv->ref_stride[k];
 279         for( y = 0; y < ( pv->height[k] - block_height ); y = y + block_height )
 280         {
 281             for( x = 0; x < ( pv->width[k] - block_width ); x = x + block_width )
 282             {
 283                 block_score = 0;
 284                 for( block_y = 0; block_y < block_height; block_y++ )
 285                 {
 286                     for( block_x = 0; block_x < block_width; block_x++ )
 287                     {
 288                         int mask_y = y + block_y;
 289                         int mask_x = x + block_x;
 290
 291                         /* We only want to mark a pixel in a block as combed
 292                            if the pixels above and below are as well. Got to
 293                            handle the top and bottom lines separately.       */
 294                         if( y + block_y == 0 )
 295                         {
 296                             if( pv->mask[k][mask_y*ref_stride+mask_x    ] == 255 &&
 297                                 pv->mask[k][mask_y*ref_stride+mask_x + 1] == 255 )
 298                                     block_score++;
 299                         }
 300                         else if( y + block_y == pv->height[k] - 1 )
 301                         {
 302                             if( pv->mask[k][mask_y*ref_stride+mask_x - 1] == 255 &&
 303                                 pv->mask[k][mask_y*ref_stride+mask_x    ] == 255 )
 304                                     block_score++;
 305                         }
 306                         else
 307                         {
 308                             if( pv->mask[k][mask_y*ref_stride+mask_x - 1] == 255 &&
 309                                 pv->mask[k][mask_y*ref_stride+mask_x    ] == 255 &&
 310                                 pv->mask[k][mask_y*ref_stride+mask_x + 1] == 255 )
 311                                     block_score++;
 312                         }
 313                     }
 314                 }
 315
 316                 if( block_score >= ( threshold / 2 ) )
 317                 {
 318 #if 0
 319                     hb_log("decomb: frame %i | score %i | type %s", pv->yadif_deinterlaced_frames + pv->blend_deinterlaced_frames +  pv->unfiltered_frames + 1, block_score, pv->buf_settings->flags & 16 ? "Film" : "Video");
 320 #endif
 321                     if ( block_score <= threshold && !( pv->buf_settings->flags & 16) )
 322                     {
 323                         /* Blend video content that scores between
 324                            ( threshold / 2 ) and threshold.        */
 325                         send_to_blend = 1;
 326                     }
 327                     else if( block_score > threshold )
 328                     {
 329                         if( pv->buf_settings->flags & 16 )
 330                         {
 331                             /* Blend progressive content above the threshold.*/
 332                             return 2;
 333                         }
 334                         else
 335                         {
 336                             /* Yadif deinterlace video content above the threshold. */
 337                             return 1;
 338                         }
 339                     }
 340                 }
 341             }
 342         }
 343     }
 344
 345     if( send_to_blend )
 346     {
 347         return 2;
 348     }
 349     else
 350     {
 351         /* Consider this frame to be uncombed. */
 352         return 0;
 353     }
 354 }
 355
 356 int detect_combed_segment( hb_filter_private_t * pv, int segment_start, int segment_stop )
 357 {
 358     /* A mish-mash of various comb detection tricks
 359        picked up from neuron2's Decomb plugin for
 360        AviSynth and tritical's IsCombedT and
 361        IsCombedTIVTC plugins.                       */
 362
 363     int x, y, k, width, height;
 364
 365     /* Comb scoring algorithm */
 366     int spatial_metric  = pv->spatial_metric;
 367     /* Motion threshold */
 368     int mthresh         = pv->motion_threshold;
 369     /* Spatial threshold */
 370     int athresh         = pv->spatial_threshold;
 371     int athresh_squared = athresh * athresh;
 372     int athresh6        = 6 *athresh;
 373
 374     /* One pas for Y, one pass for U, one pass for V */
 375     for( k = 0; k < 1; k++ )
 376     {
 377         int ref_stride  = pv->ref_stride[k];
 378         width           = pv->width[k];
 379         height          = pv->height[k];
 380
 381         /* Comb detection has to start at y = 2 and end at
 382            y = height - 2, because it needs to examine
 383            2 pixels above and 2 below the current pixel.      */
 384         if( segment_start < 2 )
 385             segment_start = 2;
 386         if( segment_stop > height - 2 )
 387             segment_stop = height - 2;
 388
 389         for( y =  segment_start; y < segment_stop; y++ )
 390         {
 391             /* These are just to make the buffer locations easier to read. */
 392             int back_2    = ( y - 2 )*ref_stride ;
 393             int back_1    = ( y - 1 )*ref_stride;
 394             int current   =         y*ref_stride;
 395             int forward_1 = ( y + 1 )*ref_stride;
 396             int forward_2 = ( y + 2 )*ref_stride;
 397
 398             /* We need to examine a column of 5 pixels
 399                in the prev, cur, and next frames.      */
 400             uint8_t previous_frame[5];
 401             uint8_t current_frame[5];
 402             uint8_t next_frame[5];
 403
 404             for( x = 0; x < width; x++ )
 405             {
 406                 /* Fill up the current frame array with the current pixel values.*/
 407                 current_frame[0] = pv->ref[1][k][back_2    + x];
 408                 current_frame[1] = pv->ref[1][k][back_1    + x];
 409                 current_frame[2] = pv->ref[1][k][current   + x];
 410                 current_frame[3] = pv->ref[1][k][forward_1 + x];
 411                 current_frame[4] = pv->ref[1][k][forward_2 + x];
 412
 413                 int up_diff   = current_frame[2] - current_frame[1];
 414                 int down_diff = current_frame[2] - current_frame[3];
 415
 416                 if( ( up_diff >  athresh && down_diff >  athresh ) ||
 417                     ( up_diff < -athresh && down_diff < -athresh ) )
 418                 {
 419                     /* The pixel above and below are different,
 420                        and they change in the same "direction" too.*/
 421                     int motion = 0;
 422                     if( mthresh > 0 )
 423                     {
 424                         /* Make sure there's sufficient motion between frame t-1 to frame t+1. */
 425                         previous_frame[0] = pv->ref[0][k][back_2    + x];
 426                         previous_frame[1] = pv->ref[0][k][back_1    + x];
 427                         previous_frame[2] = pv->ref[0][k][current   + x];
 428                         previous_frame[3] = pv->ref[0][k][forward_1 + x];
 429                         previous_frame[4] = pv->ref[0][k][forward_2 + x];
 430                         next_frame[0]     = pv->ref[2][k][back_2    + x];
 431                         next_frame[1]     = pv->ref[2][k][back_1    + x];
 432                         next_frame[2]     = pv->ref[2][k][current   + x];
 433                         next_frame[3]     = pv->ref[2][k][forward_1 + x];
 434                         next_frame[4]     = pv->ref[2][k][forward_2 + x];
 435
 436                         if( abs( previous_frame[2] - current_frame[2] ) > mthresh &&
 437                             abs(  current_frame[1] - next_frame[1]    ) > mthresh &&
 438                             abs(  current_frame[3] - next_frame[3]    ) > mthresh )
 439                                 motion++;
 440                         if( abs(     next_frame[2] - current_frame[2] ) > mthresh &&
 441                             abs( previous_frame[1] - current_frame[1] ) > mthresh &&
 442                             abs( previous_frame[3] - current_frame[3] ) > mthresh )
 443                                 motion++;
 444                     }
 445                     else
 446                     {
 447                         /* User doesn't want to check for motion,
 448                            so move on to the spatial check.       */
 449                         motion = 1;
 450                     }
 451
 452                     if( motion || ( pv->yadif_deinterlaced_frames==0 && pv->blend_deinterlaced_frames==0 && pv->unfiltered_frames==0) )
 453                     {
 454                            /* That means it's time for the spatial check.
 455                               We've got several options here.             */
 456                         if( spatial_metric == 0 )
 457                         {
 458                             /* Simple 32detect style comb detection */
 459                             if( ( abs( current_frame[2] - current_frame[4] ) < 10  ) &&
 460                                 ( abs( current_frame[2] - current_frame[3] ) > 15 ) )
 461                             {
 462                                 pv->mask[k][y*ref_stride + x] = 255;
 463                             }
 464                             else
 465                             {
 466                                 pv->mask[k][y*ref_stride + x] = 0;
 467                             }
 468                         }
 469                         else if( spatial_metric == 1 )
 470                         {
 471                             /* This, for comparison, is what IsCombed uses.
 472                                It's better, but still noise senstive.      */
 473                                int combing = ( current_frame[1] - current_frame[2] ) *
 474                                              ( current_frame[3] - current_frame[2] );
 475
 476                                if( combing > athresh_squared )
 477                                    pv->mask[k][y*ref_stride + x] = 255;
 478                                else
 479                                    pv->mask[k][y*ref_stride + x] = 0;
 480                         }
 481                         else if( spatial_metric == 2 )
 482                         {
 483                             /* Tritical's noise-resistant combing scorer.
 484                                The check is done on a bob+blur convolution. */
 485                             int combing = abs( current_frame[0]
 486                                              + ( 4 * current_frame[2] )
 487                                              + current_frame[4]
 488                                              - ( 3 * ( current_frame[1]
 489                                                      + current_frame[3] ) ) );
 490
 491                             /* If the frame is sufficiently combed,
 492                                then mark it down on the mask as 255. */
 493                             if( combing > athresh6 )
 494                                 pv->mask[k][y*ref_stride + x] = 255;
 495                             else
 496                                 pv->mask[k][y*ref_stride + x] = 0;
 497                         }
 498                     }
 499                     else
 500                     {
 501                         pv->mask[k][y*ref_stride + x] = 0;
 502                     }
 503                 }
 504                 else
 505                 {
 506                     pv->mask[k][y*ref_stride + x] = 0;
 507                 }
 508             }
 509         }
 510     }
 511 }
 512
 513 typedef struct decomb_thread_arg_s {
 514     hb_filter_private_t *pv;
 515     int segment;
 516 } decomb_thread_arg_t;
 517
 518 /*
 519  * comb detect this segment of all three planes in a single thread.
 520  */
 521 void decomb_filter_thread( void *thread_args_v )
 522 {
 523     decomb_arguments_t *decomb_work = NULL;
 524     hb_filter_private_t * pv;
 525     int run = 1;
 526     int segment, segment_start, segment_stop, plane;
 527     decomb_thread_arg_t *thread_args = thread_args_v;
 528
 529     pv = thread_args->pv;
 530     segment = thread_args->segment;
 531
 532     hb_log("decomb thread started for segment %d", segment);
 533
 534     while( run )
 535     {
 536         /*
 537          * Wait here until there is work to do. hb_lock() blocks until
 538          * render releases it to say that there is more work to do.
 539          */
 540         hb_lock( pv->decomb_begin_lock[segment] );
 541
 542         decomb_work = &pv->decomb_arguments[segment];
 543
 544         if( decomb_work->stop )
 545         {
 546             /*
 547              * No more work to do, exit this thread.
 548              */
 549             run = 0;
 550             continue;
 551         }
 552
 553         /*
 554          * Process segment (for now just from luma)
 555          */
 556         for( plane = 0; plane < 1; plane++)
 557         {
 558
 559             int w = pv->width[plane];
 560             int h = pv->height[plane];
 561             int ref_stride = pv->ref_stride[plane];
 562             segment_start = ( h / pv->cpu_count ) * segment;
 563             if( segment == pv->cpu_count - 1 )
 564             {
 565                 /*
 566                  * Final segment
 567                  */
 568                 segment_stop = h;
 569             } else {
 570                 segment_stop = ( h / pv->cpu_count ) * ( segment + 1 );
 571             }
 572
 573             detect_combed_segment( pv, segment_start, segment_stop );
 574         }
 575         /*
 576          * Finished this segment, let everyone know.
 577          */
 578         hb_unlock( pv->decomb_complete_lock[segment] );
 579     }
 580     free( thread_args_v );
 581 }
 582
 583 int comb_segmenter( hb_filter_private_t * pv )
 584 {
 585     int segment;
 586
 587     for( segment = 0; segment < pv->cpu_count; segment++ )
 588     {
 589         /*
 590          * Let the thread for this plane know that we've setup work
 591          * for it by releasing the begin lock (ensuring that the
 592          * complete lock is already locked so that we block when
 593          * we try to lock it again below).
 594          */
 595         hb_lock( pv->decomb_complete_lock[segment] );
 596         hb_unlock( pv->decomb_begin_lock[segment] );
 597     }
 598
 599     /*
 600      * Wait until all three threads have completed by trying to get
 601      * the complete lock that we locked earlier for each thread, which
 602      * will block until that thread has completed the work on that
 603      * plane.
 604      */
 605     for( segment = 0; segment < pv->cpu_count; segment++ )
 606     {
 607         hb_lock( pv->decomb_complete_lock[segment] );
 608         hb_unlock( pv->decomb_complete_lock[segment] );
 609     }
 610
 611     return check_combing_mask( pv );
 612 }
 613
 614 static void yadif_filter_line( uint8_t *dst,
 615                                uint8_t *prev,
 616                                uint8_t *cur,
 617                                uint8_t *next,
 618                                int plane,
 619                                int parity,
 620                                int y,
 621                                hb_filter_private_t * pv )
 622 {
 623     /* While prev and next point to the previous and next frames,
 624        prev2 and next2 will shift depending on the parity, usually 1.
 625        They are the previous and next fields, the fields temporally adjacent
 626        to the other field in the current frame--the one not being filtered.  */
 627     uint8_t *prev2 = parity ? prev : cur ;
 628     uint8_t *next2 = parity ? cur  : next;
 629     int w = pv->width[plane];
 630     int refs = pv->ref_stride[plane];
 631     int x;
 632
 633     /* Decomb's cubic interpolation can only function when there are
 634        three samples above and below, so regress to yadif's traditional
 635        two-tap interpolation when filtering at the top and bottom edges. */
 636     int edge = 0;
 637     if( ( y < 3 ) || ( y > ( pv->height[plane] - 4 ) )  )
 638         edge = 1;
 639
 640     for( x = 0; x < w; x++)
 641     {
 642         /* Pixel above*/
 643         int c              = cur[-refs];
 644         /* Temporal average: the current location in the adjacent fields */
 645         int d              = (prev2[0] + next2[0])>>1;
 646         /* Pixel below */
 647         int e              = cur[+refs];
 648
 649         /* How the current pixel changes between the adjacent fields */
 650         int temporal_diff0 = ABS(prev2[0] - next2[0]);
 651         /* The average of how much the pixels above and below change from the frame before to now. */
 652         int temporal_diff1 = ( ABS(prev[-refs] - cur[-refs]) + ABS(prev[+refs] - cur[+refs]) ) >> 1;
 653         /* The average of how much the pixels above and below change from now to the next frame. */
 654         int temporal_diff2 = ( ABS(next[-refs] - cur[-refs]) + ABS(next[+refs] - cur[+refs]) ) >> 1;
 655         /* For the actual difference, use the largest of the previous average diffs. */
 656         int diff           = MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
 657
 658         /* SAD of how the pixel-1, the pixel, and the pixel+1 change from the line above to below. */
 659         int spatial_score  = ABS(cur[-refs-1] - cur[+refs-1]) + ABS(cur[-refs]-cur[+refs]) +
 660                                      ABS(cur[-refs+1] - cur[+refs+1]) - 1;
 661         int spatial_pred;
 662
 663         /* Spatial pred is either a bilinear or cubic vertical interpolation. */
 664         if( pv->mode > 0 && !edge)
 665         {
 666             spatial_pred = cubic_interpolate( cur[-3*refs], cur[-refs], cur[+refs], cur[3*refs] );
 667         }
 668         else
 669         {
 670             spatial_pred = (c+e)>>1;
 671         }
 672
 673 /* EDDI: Edge Directed Deinterlacing Interpolation
 674    Uses the Martinez-Lim Line Shift Parametric Modeling algorithm...I think.
 675    Checks 4 different slopes to see if there is more similarity along a diagonal
 676    than there was vertically. If a diagonal is more similar, then it indicates
 677    an edge, so interpolate along that instead of a vertical line, using either
 678    linear or cubic interpolation depending on mode. */
 679 #define YADIF_CHECK(j)\
 680         {   int score = ABS(cur[-refs-1+j] - cur[+refs-1-j])\
 681                       + ABS(cur[-refs  +j] - cur[+refs  -j])\
 682                       + ABS(cur[-refs+1+j] - cur[+refs+1-j]);\
 683             if( score < spatial_score ){\
 684                 spatial_score = score;\
 685                 if( pv->mode > 0 && !edge )\
 686                 {\
 687                     switch(j)\
 688                     {\
 689                         case -1:\
 690                             spatial_pred = cubic_interpolate(cur[-3 * refs - 3], cur[-refs -1], cur[+refs + 1], cur[3* refs + 3] );\
 691                         break;\
 692                         case -2:\
 693                             spatial_pred = cubic_interpolate( ( ( cur[-3*refs - 4] + cur[-refs - 4] ) / 2 ) , cur[-refs -2], cur[+refs + 2], ( ( cur[3*refs + 4] + cur[refs + 4] ) / 2 ) );\
 694                         break;\
 695                         case 1:\
 696                             spatial_pred = cubic_interpolate(cur[-3 * refs +3], cur[-refs +1], cur[+refs - 1], cur[3* refs -3] );\
 697                         break;\
 698                         case 2:\
 699                             spatial_pred = cubic_interpolate(( ( cur[-3*refs + 4] + cur[-refs + 4] ) / 2 ), cur[-refs +2], cur[+refs - 2], ( ( cur[3*refs - 4] + cur[refs - 4] ) / 2 ) );\
 700                         break;\
 701                     }\
 702                 }\
 703                 else\
 704                 {\
 705                     spatial_pred = ( cur[-refs +j] + cur[+refs -j] ) >>1;\
 706                 }\
 707
 708                 YADIF_CHECK(-1) YADIF_CHECK(-2) }} }}
 709                 YADIF_CHECK( 1) YADIF_CHECK( 2) }} }}
 710
 711         /* Temporally adjust the spatial prediction by
 712            comparing against lines in the adjacent fields. */
 713         int b = (prev2[-2*refs] + next2[-2*refs])>>1;
 714         int f = (prev2[+2*refs] + next2[+2*refs])>>1;
 715
 716         /* Find the median value */
 717         int max = MAX3(d-e, d-c, MIN(b-c, f-e));
 718         int min = MIN3(d-e, d-c, MAX(b-c, f-e));
 719         diff = MAX3( diff, min, -max );
 720
 721         if( spatial_pred > d + diff )
 722         {
 723             spatial_pred = d + diff;
 724         }
 725         else if( spatial_pred < d - diff )
 726         {
 727             spatial_pred = d - diff;
 728         }
 729
 730         dst[0] = spatial_pred;
 731
 732         dst++;
 733         cur++;
 734         prev++;
 735         next++;
 736         prev2++;
 737         next2++;
 738     }
 739 }
 740
 741 typedef struct yadif_thread_arg_s {
 742     hb_filter_private_t *pv;
 743     int segment;
 744 } yadif_thread_arg_t;
 745
 746 /*
 747  * deinterlace this segment of all three planes in a single thread.
 748  */
 749 void yadif_decomb_filter_thread( void *thread_args_v )
 750 {
 751     yadif_arguments_t *yadif_work = NULL;
 752     hb_filter_private_t * pv;
 753     int run = 1;
 754     int plane;
 755     int segment, segment_start, segment_stop;
 756     yadif_thread_arg_t *thread_args = thread_args_v;
 757     uint8_t **dst;
 758     int parity, tff, y, w, h, penultimate, ultimate, ref_stride, is_combed;
 759
 760     pv = thread_args->pv;
 761     segment = thread_args->segment;
 762
 763     hb_log("yadif thread started for segment %d", segment);
 764
 765     while( run )
 766     {
 767         /*
 768          * Wait here until there is work to do. hb_lock() blocks until
 769          * render releases it to say that there is more work to do.
 770          */
 771         hb_lock( pv->yadif_begin_lock[segment] );
 772
 773         yadif_work = &pv->yadif_arguments[segment];
 774
 775         if( yadif_work->stop )
 776         {
 777             /*
 778              * No more work to do, exit this thread.
 779              */
 780             run = 0;
 781             continue;
 782         }
 783
 784         if( yadif_work->dst == NULL )
 785         {
 786             hb_error( "thread started when no work available" );
 787             hb_snooze(500);
 788             continue;
 789         }
 790
 791         is_combed = pv->yadif_arguments[segment].is_combed;
 792
 793         /*
 794          * Process all three planes, but only this segment of it.
 795          */
 796         for( plane = 0; plane < 3; plane++)
 797         {
 798
 799             dst = yadif_work->dst;
 800             parity = yadif_work->parity;
 801             tff = yadif_work->tff;
 802             w = pv->width[plane];
 803             h = pv->height[plane];
 804             penultimate = h - 2;
 805             ultimate = h - 1;
 806             ref_stride = pv->ref_stride[plane];
 807             segment_start = ( h / pv->cpu_count ) * segment;
 808             if( segment == pv->cpu_count - 1 )
 809             {
 810                 /*
 811                  * Final segment
 812                  */
 813                 segment_stop = h;
 814             } else {
 815                 segment_stop = ( h / pv->cpu_count ) * ( segment + 1 );
 816             }
 817
 818             for( y = segment_start; y < segment_stop; y++ )
 819             {
 820                 if( ( pv->mode == 4 && is_combed ) || is_combed == 2 )
 821                 {
 822                     /* This line gets blend filtered, not yadif filtered. */
 823                     uint8_t *prev = &pv->ref[0][plane][y*ref_stride];
 824                     uint8_t *cur  = &pv->ref[1][plane][y*ref_stride];
 825                     uint8_t *next = &pv->ref[2][plane][y*ref_stride];
 826                     uint8_t *dst2 = &dst[plane][y*w];
 827
 828                     blend_filter_line( dst2, cur, plane, y, pv );
 829                 }
 830                 else if( ( ( y ^ parity ) &  1 )  && ( is_combed == 1 ) )
 831                 {
 832                     /* This line gets yadif filtered. It is the bottom field
 833                        when TFF and vice-versa. It's the field that gets
 834                        filtered. Because yadif needs 2 lines above and below
 835                        the one being filtered, we need to mirror the edges.
 836                        When TFF, this means replacing the 2nd line with a
 837                        copy of the 1st, and the last with the second-to-last. */
 838                     if( y > 1 && y < ( h -2 ) )
 839                     {
 840                         /* This isn't the top or bottom, proceed as normal to yadif. */
 841                         uint8_t *prev = &pv->ref[0][plane][y*ref_stride];
 842                         uint8_t *cur  = &pv->ref[1][plane][y*ref_stride];
 843                         uint8_t *next = &pv->ref[2][plane][y*ref_stride];
 844                         uint8_t *dst2 = &dst[plane][y*w];
 845
 846                         yadif_filter_line( dst2,
 847                                            prev,
 848                                            cur,
 849                                            next,
 850                                            plane,
 851                                            parity ^ tff,
 852                                            y,
 853                                            pv );
 854                     }
 855                     else if( y == 0 )
 856                     {
 857                         /* BFF, so y0 = y1 */
 858                         memcpy( &dst[plane][y*w],
 859                                 &pv->ref[1][plane][1*ref_stride],
 860                                 w * sizeof(uint8_t) );
 861                     }
 862                     else if( y == 1 )
 863                     {
 864                         /* TFF, so y1 = y0 */
 865                         memcpy( &dst[plane][y*w],
 866                                 &pv->ref[1][plane][0],
 867                                 w * sizeof(uint8_t) );
 868                     }
 869                     else if( y == penultimate )
 870                     {
 871                         /* BFF, so penultimate y = ultimate y */
 872                         memcpy( &dst[plane][y*w],
 873                                 &pv->ref[1][plane][ultimate*ref_stride],
 874                                 w * sizeof(uint8_t) );
 875                     }
 876                     else if( y == ultimate )
 877                     {
 878                         /* TFF, so ultimate y = penultimate y */
 879                         memcpy( &dst[plane][y*w],
 880                                 &pv->ref[1][plane][penultimate*ref_stride],
 881                                 w * sizeof(uint8_t) );
 882                     }
 883                 }
 884                 else
 885                 {
 886                     memcpy( &dst[plane][y*w],
 887                             &pv->ref[1][plane][y*ref_stride],
 888                             w * sizeof(uint8_t) );
 889                 }
 890             }
 891         }
 892         /*
 893          * Finished this segment, let everyone know.
 894          */
 895         hb_unlock( pv->yadif_complete_lock[segment] );
 896     }
 897     free( thread_args_v );
 898 }
 899
 900 static void yadif_filter( uint8_t ** dst,
 901                           int parity,
 902                           int tff,
 903                           hb_filter_private_t * pv )
 904 {
 905
 906     int is_combed = comb_segmenter( pv );
 907
 908     if( is_combed == 1 )
 909     {
 910         pv->yadif_deinterlaced_frames++;
 911     }
 912     else if( is_combed == 2 )
 913     {
 914         pv->blend_deinterlaced_frames++;
 915     }
 916     else
 917     {
 918         pv->unfiltered_frames++;
 919     }
 920
 921     if( is_combed )
 922     {
 923         int segment;
 924
 925         for( segment = 0; segment < pv->cpu_count; segment++ )
 926         {
 927             /*
 928              * Setup the work for this plane.
 929              */
 930             pv->yadif_arguments[segment].parity = parity;
 931             pv->yadif_arguments[segment].tff = tff;
 932             pv->yadif_arguments[segment].dst = dst;
 933             pv->yadif_arguments[segment].is_combed = is_combed;
 934
 935             /*
 936              * Let the thread for this plane know that we've setup work
 937              * for it by releasing the begin lock (ensuring that the
 938              * complete lock is already locked so that we block when
 939              * we try to lock it again below).
 940              */
 941             hb_lock( pv->yadif_complete_lock[segment] );
 942             hb_unlock( pv->yadif_begin_lock[segment] );
 943         }
 944
 945         /*
 946          * Wait until all three threads have completed by trying to get
 947          * the complete lock that we locked earlier for each thread, which
 948          * will block until that thread has completed the work on that
 949          * plane.
 950          */
 951         for( segment = 0; segment < pv->cpu_count; segment++ )
 952         {
 953             hb_lock( pv->yadif_complete_lock[segment] );
 954             hb_unlock( pv->yadif_complete_lock[segment] );
 955         }
 956
 957         /*
 958          * Entire frame is now deinterlaced.
 959          */
 960     }
 961     else
 962     {
 963         /*  Just passing through... */
 964         int i;
 965         for( i = 0; i < 3; i++ )
 966         {
 967             uint8_t * ref = pv->ref[1][i];
 968             uint8_t * dest = dst[i];
 969
 970             int w = pv->width[i];
 971             int ref_stride = pv->ref_stride[i];
 972
 973             int y;
 974             for( y = 0; y < pv->height[i]; y++ )
 975             {
 976                 memcpy(dest, ref, w);
 977                 dest += w;
 978                 ref += ref_stride;
 979             }
 980         }
 981     }
 982 }
 983
 984 static void mcdeint_filter( uint8_t ** dst,
 985                             uint8_t ** src,
 986                             int parity,
 987                             hb_filter_private_t * pv )
 988 {
 989     int x, y, i;
 990     int out_size;
 991
 992 #ifdef SUPPRESS_AV_LOG
 993     /* TODO: temporarily change log level to suppress obnoxious debug output */
 994     int loglevel = av_log_get_level();
 995     av_log_set_level( AV_LOG_QUIET );
 996 #endif
 997
 998     for( i=0; i<3; i++ )
 999     {
1000         pv->mcdeint_frame->data[i] = src[i];
1001         pv->mcdeint_frame->linesize[i] = pv->width[i];
1002     }
1003     pv->mcdeint_avctx_enc->me_cmp     = FF_CMP_SAD;
1004     pv->mcdeint_avctx_enc->me_sub_cmp = FF_CMP_SAD;
1005     pv->mcdeint_frame->quality        = pv->mcdeint_qp * FF_QP2LAMBDA;
1006
1007     out_size = avcodec_encode_video( pv->mcdeint_avctx_enc,
1008                                      pv->mcdeint_outbuf,
1009                                      pv->mcdeint_outbuf_size,
1010                                      pv->mcdeint_frame );
1011
1012     pv->mcdeint_frame_dec = pv->mcdeint_avctx_enc->coded_frame;
1013
1014     for( i = 0; i < 3; i++ )
1015     {
1016         int w    = pv->width[i];
1017         int h    = pv->height[i];
1018         int fils = pv->mcdeint_frame_dec->linesize[i];
1019         int srcs = pv->width[i];
1020
1021         for( y = 0; y < h; y++ )
1022         {
1023             if( (y ^ parity) & 1 )
1024             {
1025                 for( x = 0; x < w; x++ )
1026                 {
1027                     if( (x-2)+(y-1)*w >= 0 && (x+2)+(y+1)*w < w*h )
1028                     {
1029                         uint8_t * filp =
1030                             &pv->mcdeint_frame_dec->data[i][x + y*fils];
1031                         uint8_t * srcp = &src[i][x + y*srcs];
1032
1033                         int diff0 = filp[-fils] - srcp[-srcs];
1034                         int diff1 = filp[+fils] - srcp[+srcs];
1035
1036                         int spatial_score =
1037                               ABS(srcp[-srcs-1] - srcp[+srcs-1])
1038                             + ABS(srcp[-srcs  ] - srcp[+srcs  ])
1039                             + ABS(srcp[-srcs+1] - srcp[+srcs+1]) - 1;
1040
1041                         int temp = filp[0];
1042
1043 #define MCDEINT_CHECK(j)\
1044                         {   int score = ABS(srcp[-srcs-1+j] - srcp[+srcs-1-j])\
1045                                       + ABS(srcp[-srcs  +j] - srcp[+srcs  -j])\
1046                                       + ABS(srcp[-srcs+1+j] - srcp[+srcs+1-j]);\
1047                             if( score < spatial_score ) {\
1048                                 spatial_score = score;\
1049                                 diff0 = filp[-fils+j] - srcp[-srcs+j];\
1050                                 diff1 = filp[+fils-j] - srcp[+srcs-j];
1051
1052                         MCDEINT_CHECK(-1) MCDEINT_CHECK(-2) }} }}
1053                         MCDEINT_CHECK( 1) MCDEINT_CHECK( 2) }} }}
1054
1055                         if(diff0 + diff1 > 0)
1056                         {
1057                             temp -= (diff0 + diff1 -
1058                                      ABS( ABS(diff0) - ABS(diff1) ) / 2) / 2;
1059                         }
1060                         else
1061                         {
1062                             temp -= (diff0 + diff1 +
1063                                      ABS( ABS(diff0) - ABS(diff1) ) / 2) / 2;
1064                         }
1065
1066                         filp[0] = dst[i][x + y*w] =
1067                             temp > 255U ? ~(temp>>31) : temp;
1068                     }
1069                     else
1070                     {
1071                         dst[i][x + y*w] =
1072                             pv->mcdeint_frame_dec->data[i][x + y*fils];
1073                     }
1074                 }
1075             }
1076         }
1077
1078         for( y = 0; y < h; y++ )
1079         {
1080             if( !((y ^ parity) & 1) )
1081             {
1082                 for( x = 0; x < w; x++ )
1083                 {
1084                     pv->mcdeint_frame_dec->data[i][x + y*fils] =
1085                         dst[i][x + y*w]= src[i][x + y*srcs];
1086                 }
1087             }
1088         }
1089     }
1090
1091 #ifdef SUPPRESS_AV_LOG
1092     /* TODO: restore previous log level */
1093     av_log_set_level(loglevel);
1094 #endif
1095 }
1096
1097 hb_filter_private_t * hb_decomb_init( int pix_fmt,
1098                                            int width,
1099                                            int height,
1100                                            char * settings )
1101 {
1102     if( pix_fmt != PIX_FMT_YUV420P )
1103     {
1104         return 0;
1105     }
1106
1107     hb_filter_private_t * pv = calloc( 1, sizeof(struct hb_filter_private_s) );
1108
1109     pv->pix_fmt = pix_fmt;
1110
1111     pv->width[0]  = width;
1112     pv->height[0] = height;
1113     pv->width[1]  = pv->width[2]  = width >> 1;
1114     pv->height[1] = pv->height[2] = height >> 1;
1115
1116     int buf_size = 3 * width * height / 2;
1117     pv->buf_out[0] = hb_buffer_init( buf_size );
1118     pv->buf_out[1] = hb_buffer_init( buf_size );
1119     pv->buf_settings = hb_buffer_init( 0 );
1120
1121     pv->yadif_deinterlaced_frames = 0;
1122     pv->blend_deinterlaced_frames = 0;
1123     pv->unfiltered_frames = 0;
1124
1125     pv->yadif_ready    = 0;
1126
1127     pv->mode     = MODE_DEFAULT;
1128     pv->spatial_metric = 2;
1129     pv->motion_threshold = 6;
1130     pv->spatial_threshold = 9;
1131     pv->block_threshold = 80;
1132     pv->block_width = 16;
1133     pv->block_height = 16;
1134
1135     pv->parity   = PARITY_DEFAULT;
1136
1137     pv->mcdeint_mode   = MCDEINT_MODE_DEFAULT;
1138     pv->mcdeint_qp     = MCDEINT_QP_DEFAULT;
1139
1140     if( settings )
1141     {
1142         sscanf( settings, "%d:%d:%d:%d:%d:%d:%d",
1143                 &pv->mode,
1144                 &pv->spatial_metric,
1145                 &pv->motion_threshold,
1146                 &pv->spatial_threshold,
1147                 &pv->block_threshold,
1148                 &pv->block_width,
1149                 &pv->block_height );
1150     }
1151
1152     pv->cpu_count = hb_get_cpu_count();
1153
1154
1155     if( pv->mode == 2 || pv->mode == 3 )
1156     {
1157         pv->mcdeint_mode = 0;
1158     }
1159
1160     /* Allocate yadif specific buffers */
1161     int i, j;
1162     for( i = 0; i < 3; i++ )
1163     {
1164         int is_chroma = !!i;
1165         int w = ((width   + 31) & (~31))>>is_chroma;
1166         int h = ((height+6+ 31) & (~31))>>is_chroma;
1167
1168         pv->ref_stride[i] = w;
1169
1170         for( j = 0; j < 3; j++ )
1171         {
1172             pv->ref[j][i] = malloc( w*h*sizeof(uint8_t) ) + 3*w;
1173         }
1174     }
1175
1176     /* Allocate a buffer to store a comb mask. */
1177     for( i = 0; i < 3; i++ )
1178     {
1179         int is_chroma = !!i;
1180         int w = ((pv->width[0]   + 31) & (~31))>>is_chroma;
1181         int h = ((pv->height[0]+6+ 31) & (~31))>>is_chroma;
1182
1183         pv->mask[i] = calloc( 1, w*h*sizeof(uint8_t) ) + 3*w;
1184     }
1185
1186      /*
1187       * Create yadif threads and locks.
1188       */
1189      pv->yadif_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count );
1190      pv->yadif_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1191      pv->yadif_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1192      pv->yadif_arguments = malloc( sizeof( yadif_arguments_t ) * pv->cpu_count );
1193
1194      for( i = 0; i < pv->cpu_count; i++ )
1195      {
1196          yadif_thread_arg_t *thread_args;
1197
1198          thread_args = malloc( sizeof( yadif_thread_arg_t ) );
1199
1200          if( thread_args )
1201          {
1202              thread_args->pv = pv;
1203              thread_args->segment = i;
1204
1205              pv->yadif_begin_lock[i] = hb_lock_init();
1206              pv->yadif_complete_lock[i] = hb_lock_init();
1207
1208              /*
1209               * Important to start off with the threads locked waiting
1210               * on input.
1211               */
1212              hb_lock( pv->yadif_begin_lock[i] );
1213
1214              pv->yadif_arguments[i].stop = 0;
1215              pv->yadif_arguments[i].dst = NULL;
1216
1217              pv->yadif_threads[i] = hb_thread_init( "yadif_filter_segment",
1218                                                     yadif_decomb_filter_thread,
1219                                                     thread_args,
1220                                                     HB_NORMAL_PRIORITY );
1221          }
1222          else
1223          {
1224              hb_error( "yadif could not create threads" );
1225          }
1226     }
1227
1228     /*
1229      * Create decomb threads and locks.
1230      */
1231     pv->decomb_threads = malloc( sizeof( hb_thread_t* ) * pv->cpu_count );
1232     pv->decomb_begin_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1233     pv->decomb_complete_lock = malloc( sizeof( hb_lock_t * ) * pv->cpu_count );
1234     pv->decomb_arguments = malloc( sizeof( decomb_arguments_t ) * pv->cpu_count );
1235
1236     for( i = 0; i < pv->cpu_count; i++ )
1237     {
1238         decomb_thread_arg_t *decomb_thread_args;
1239
1240         decomb_thread_args = malloc( sizeof( decomb_thread_arg_t ) );
1241
1242         if( decomb_thread_args )
1243         {
1244             decomb_thread_args->pv = pv;
1245             decomb_thread_args->segment = i;
1246
1247             pv->decomb_begin_lock[i] = hb_lock_init();
1248             pv->decomb_complete_lock[i] = hb_lock_init();
1249
1250             /*
1251              * Important to start off with the threads locked waiting
1252              * on input.
1253              */
1254             hb_lock( pv->decomb_begin_lock[i] );
1255
1256             pv->decomb_arguments[i].stop = 0;
1257
1258             pv->decomb_threads[i] = hb_thread_init( "decomb_filter_segment",
1259                                                    decomb_filter_thread,
1260                                                    decomb_thread_args,
1261                                                    HB_NORMAL_PRIORITY );
1262         }
1263         else
1264         {
1265             hb_error( "decomb could not create threads" );
1266         }
1267     }
1268
1269
1270
1271     /* Allocate mcdeint specific buffers */
1272     if( pv->mcdeint_mode >= 0 )
1273     {
1274         avcodec_init();
1275         avcodec_register_all();
1276
1277         AVCodec * enc = avcodec_find_encoder( CODEC_ID_SNOW );
1278
1279         int i;
1280         for (i = 0; i < 3; i++ )
1281         {
1282             AVCodecContext * avctx_enc;
1283
1284             avctx_enc = pv->mcdeint_avctx_enc = avcodec_alloc_context();
1285
1286             avctx_enc->width                    = width;
1287             avctx_enc->height                   = height;
1288             avctx_enc->time_base                = (AVRational){1,25};  // meaningless
1289             avctx_enc->gop_size                 = 300;
1290             avctx_enc->max_b_frames             = 0;
1291             avctx_enc->pix_fmt                  = PIX_FMT_YUV420P;
1292             avctx_enc->flags                    = CODEC_FLAG_QSCALE | CODEC_FLAG_LOW_DELAY;
1293             avctx_enc->strict_std_compliance    = FF_COMPLIANCE_EXPERIMENTAL;
1294             avctx_enc->global_quality           = 1;
1295             avctx_enc->flags2                   = CODEC_FLAG2_MEMC_ONLY;
1296             avctx_enc->me_cmp                   = FF_CMP_SAD; //SSE;
1297             avctx_enc->me_sub_cmp               = FF_CMP_SAD; //SSE;
1298             avctx_enc->mb_cmp                   = FF_CMP_SSE;
1299
1300             switch( pv->mcdeint_mode )
1301             {
1302                 case 3:
1303                     avctx_enc->refs = 3;
1304                 case 2:
1305                     avctx_enc->me_method = ME_UMH;
1306                 case 1:
1307                     avctx_enc->flags |= CODEC_FLAG_4MV;
1308                     avctx_enc->dia_size =2;
1309                 case 0:
1310                     avctx_enc->flags |= CODEC_FLAG_QPEL;
1311             }
1312
1313             avcodec_open(avctx_enc, enc);
1314         }
1315
1316         pv->mcdeint_frame       = avcodec_alloc_frame();
1317         pv->mcdeint_outbuf_size = width * height * 10;
1318         pv->mcdeint_outbuf      = malloc( pv->mcdeint_outbuf_size );
1319     }
1320
1321     return pv;
1322 }
1323
1324 void hb_decomb_close( hb_filter_private_t * pv )
1325 {
1326     if( !pv )
1327     {
1328         return;
1329     }
1330
1331     hb_log("decomb: yadif deinterlaced %i | blend deinterlaced %i | unfiltered %i | total %i", pv->yadif_deinterlaced_frames, pv->blend_deinterlaced_frames, pv->unfiltered_frames, pv->yadif_deinterlaced_frames + pv->blend_deinterlaced_frames + pv->unfiltered_frames);
1332
1333     /* Cleanup frame buffers */
1334     if( pv->buf_out[0] )
1335     {
1336         hb_buffer_close( &pv->buf_out[0] );
1337     }
1338     if( pv->buf_out[1] )
1339     {
1340         hb_buffer_close( &pv->buf_out[1] );
1341     }
1342     if (pv->buf_settings )
1343     {
1344         hb_buffer_close( &pv->buf_settings );
1345     }
1346
1347     /* Cleanup yadif specific buffers */
1348     int i;
1349     for( i = 0; i<3*3; i++ )
1350     {
1351         uint8_t **p = &pv->ref[i%3][i/3];
1352         if (*p)
1353         {
1354             free( *p - 3*pv->ref_stride[i/3] );
1355             *p = NULL;
1356         }
1357     }
1358
1359     /* Cleanup combing mask. */
1360     for( i = 0; i<3*3; i++ )
1361     {
1362         uint8_t **p = &pv->mask[i/3];
1363         if (*p)
1364         {
1365             free( *p - 3*pv->ref_stride[i/3] );
1366             *p = NULL;
1367         }
1368     }
1369
1370     for( i = 0; i < pv->cpu_count; i++)
1371     {
1372         /*
1373          * Tell each yadif thread to stop, and then cleanup.
1374          */
1375         pv->yadif_arguments[i].stop = 1;
1376         hb_unlock(  pv->yadif_begin_lock[i] );
1377
1378         hb_thread_close( &pv->yadif_threads[i] );
1379         hb_lock_close( &pv->yadif_begin_lock[i] );
1380         hb_lock_close( &pv->yadif_complete_lock[i] );
1381     }
1382
1383     /*
1384      * free memory for yadif structs
1385      */
1386     free( pv->yadif_threads );
1387     free( pv->yadif_begin_lock );
1388     free( pv->yadif_complete_lock );
1389     free( pv->yadif_arguments );
1390
1391     for( i = 0; i < pv->cpu_count; i++)
1392     {
1393         /*
1394          * Tell each decomb thread to stop, and then cleanup.
1395          */
1396         pv->decomb_arguments[i].stop = 1;
1397         hb_unlock(  pv->decomb_begin_lock[i] );
1398
1399         hb_thread_close( &pv->decomb_threads[i] );
1400         hb_lock_close( &pv->decomb_begin_lock[i] );
1401         hb_lock_close( &pv->decomb_complete_lock[i] );
1402     }
1403
1404     /*
1405      * free memory for decomb structs
1406      */
1407     free( pv->decomb_threads );
1408     free( pv->decomb_begin_lock );
1409     free( pv->decomb_complete_lock );
1410     free( pv->decomb_arguments );
1411
1412     /* Cleanup mcdeint specific buffers */
1413     if( pv->mcdeint_mode >= 0 )
1414     {
1415         if( pv->mcdeint_avctx_enc )
1416         {
1417             avcodec_close( pv->mcdeint_avctx_enc );
1418             av_freep( &pv->mcdeint_avctx_enc );
1419         }
1420         if( pv->mcdeint_outbuf )
1421         {
1422             free( pv->mcdeint_outbuf );
1423         }
1424     }
1425
1426     free( pv );
1427 }
1428
1429 int hb_decomb_work( const hb_buffer_t * cbuf_in,
1430                     hb_buffer_t ** buf_out,
1431                     int pix_fmt,
1432                     int width,
1433                     int height,
1434                     hb_filter_private_t * pv )
1435 {
1436     hb_buffer_t * buf_in = (hb_buffer_t *)cbuf_in;
1437
1438     if( !pv ||
1439         pix_fmt != pv->pix_fmt ||
1440         width   != pv->width[0] ||
1441         height  != pv->height[0] )
1442     {
1443         return FILTER_FAILED;
1444     }
1445
1446     avpicture_fill( &pv->pic_in, buf_in->data,
1447                     pix_fmt, width, height );
1448
1449     /* Determine if top-field first layout */
1450     int tff;
1451     if( pv->parity < 0 )
1452     {
1453         tff = !!(buf_in->flags & PIC_FLAG_TOP_FIELD_FIRST);
1454     }
1455     else
1456     {
1457         tff = (pv->parity & 1) ^ 1;
1458     }
1459
1460     /* Store current frame in yadif cache */
1461     store_ref( (const uint8_t**)pv->pic_in.data, pv );
1462
1463     /* If yadif is not ready, store another ref and return FILTER_DELAY */
1464     if( pv->yadif_ready == 0 )
1465     {
1466         store_ref( (const uint8_t**)pv->pic_in.data, pv );
1467
1468         hb_buffer_copy_settings( pv->buf_settings, buf_in );
1469
1470         /* don't let 'work_loop' send a chapter mark upstream */
1471         buf_in->new_chap  = 0;
1472
1473         pv->yadif_ready = 1;
1474
1475         return FILTER_DELAY;
1476     }
1477
1478     /* Perform yadif filtering */
1479     int frame;
1480     for( frame = 0; frame <= ( ( pv->mode == 2 || pv->mode == 3 )? 1 : 0 ) ; frame++ )
1481     {
1482         int parity = frame ^ tff ^ 1;
1483
1484         avpicture_fill( &pv->pic_out, pv->buf_out[!(frame^1)]->data,
1485                         pix_fmt, width, height );
1486
1487         yadif_filter( pv->pic_out.data, parity, tff, pv );
1488
1489         if( pv->mcdeint_mode >= 0 )
1490         {
1491             /* Perform mcdeint filtering */
1492             avpicture_fill( &pv->pic_in,  pv->buf_out[(frame^1)]->data,
1493                             pix_fmt, width, height );
1494
1495             mcdeint_filter( pv->pic_in.data, pv->pic_out.data, parity, pv );
1496         }
1497
1498         *buf_out = pv->buf_out[!(frame^1)];
1499     }
1500
1501     /* Copy buffered settings to output buffer settings */
1502     hb_buffer_copy_settings( *buf_out, pv->buf_settings );
1503
1504     /* Replace buffered settings with input buffer settings */
1505     hb_buffer_copy_settings( pv->buf_settings, buf_in );
1506
1507     /* don't let 'work_loop' send a chapter mark upstream */
1508     buf_in->new_chap  = 0;
1509
1510     return FILTER_OK;
1511 }