libhb/decssasub.c

   1 /*
   2    This file is part of the HandBrake source code.
   3    Homepage: <http://handbrake.fr/>.
   4    It may be used under the terms of the GNU General Public License. */
   5
   6 /*
   7  * Converts SSA subtitles to either:
   8  * (1) TEXTSUB format: UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>), or
   9  * (2) PICTURESUB format, using libass.
  10  *
  11  * SSA format references:
  12  *   http://www.matroska.org/technical/specs/subtitles/ssa.html
  13  *   http://moodub.free.fr/video/ass-specs.doc
  14  *   vlc-1.0.4/modules/codec/subtitles/subsass.c:ParseSSAString
  15  *
  16  * libass references:
  17  *   libass-0.9.9/ass.h
  18  *   vlc-1.0.4/modules/codec/libass.c
  19  *
  20  * @author David Foster (davidfstr)
  21  */
  22
  23 #include <stdlib.h>
  24 #include <stdio.h>
  25 #include "hb.h"
  26
  27 #include <ass/ass.h>
  28
  29 struct hb_work_private_s
  30 {
  31     // If decoding to PICTURESUB format:
  32     ASS_Library *ssa;
  33     ASS_Renderer *renderer;
  34     ASS_Track *ssaTrack;
  35     int readOrder;
  36 };
  37
  38 typedef enum {
  39     BOLD        = 0x01,
  40     ITALIC      = 0x02,
  41     UNDERLINE   = 0x04
  42 } StyleSet;
  43
  44 // "<b></b>".len + "<i></i>".len + "<u></u>".len
  45 #define MAX_OVERHEAD_PER_OVERRIDE (7 * 3)
  46
  47 #define SSA_2_HB_TIME(hr,min,sec,centi) \
  48     ( 90L * ( hr    * 1000L * 60 * 60 +\
  49               min   * 1000L * 60 +\
  50               sec   * 1000L +\
  51               centi * 10L ) )
  52
  53 static StyleSet ssa_parse_style_override( uint8_t *pos, StyleSet prevStyles )
  54 {
  55     StyleSet nextStyles = prevStyles;
  56     for (;;)
  57     {
  58         // Skip over leading '{' or last '\\'
  59         pos++;
  60
  61         // Scan for next \code
  62         while ( *pos != '\\' && *pos != '}' && *pos != '\0' ) pos++;
  63         if ( *pos != '\\' )
  64         {
  65             // End of style override block
  66             break;
  67         }
  68
  69         // If next chars are \[biu][01], interpret it
  70         if ( strchr("biu", pos[1]) && strchr("01", pos[2]) )
  71         {
  72             StyleSet styleID =
  73                 pos[1] == 'b' ? BOLD :
  74                 pos[1] == 'i' ? ITALIC :
  75                 pos[1] == 'u' ? UNDERLINE : 0;
  76             int enabled = (pos[2] == '1');
  77
  78             if (enabled)
  79             {
  80                 nextStyles |= styleID;
  81             }
  82             else
  83             {
  84                 nextStyles &= ~styleID;
  85             }
  86         }
  87     }
  88     return nextStyles;
  89 }
  90
  91 static void ssa_append_html_tags_for_style_change(
  92     uint8_t **dst, StyleSet prevStyles, StyleSet nextStyles )
  93 {
  94     #define APPEND(str) { \
  95         char *src = str; \
  96         while (*src) { *(*dst)++ = *src++; } \
  97     }
  98
  99     // Reverse-order close all previous styles
 100     if (prevStyles & UNDERLINE) APPEND("</u>");
 101     if (prevStyles & ITALIC)    APPEND("</i>");
 102     if (prevStyles & BOLD)      APPEND("</b>");
 103
 104     // Forward-order open all next styles
 105     if (nextStyles & BOLD)      APPEND("<b>");
 106     if (nextStyles & ITALIC)    APPEND("<i>");
 107     if (nextStyles & UNDERLINE) APPEND("<u>");
 108
 109     #undef APPEND
 110 }
 111
 112 static hb_buffer_t *ssa_decode_line_to_utf8( uint8_t *in_data, int in_size, int in_sequence );
 113 static hb_buffer_t *ssa_decode_line_to_picture( hb_work_object_t * w, uint8_t *in_data, int in_size, int in_sequence );
 114
 115 /*
 116  * Decodes a single SSA packet to one or more TEXTSUB or PICTURESUB subtitle packets.
 117  *
 118  * SSA packet format:
 119  * ( Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text CR LF ) +
 120  *             1      2     3   4     5    6       7       8       9      10
 121  */
 122 static hb_buffer_t *ssa_decode_packet( hb_work_object_t * w, hb_buffer_t *in )
 123 {
 124     // Store NULL after the end of the buffer to make using string processing safe
 125     hb_buffer_realloc( in, in->size + 1 );
 126     in->data[in->size] = '\0';
 127
 128     hb_buffer_t *out_list = NULL;
 129     hb_buffer_t **nextPtr = &out_list;
 130
 131     const char *EOL = "\r\n";
 132     char *curLine, *curLine_parserData;
 133     for ( curLine = strtok_r( (char *) in->data, EOL, &curLine_parserData );
 134           curLine;
 135           curLine = strtok_r( NULL, EOL, &curLine_parserData ) )
 136     {
 137         // Skip empty lines and spaces between adjacent CR and LF
 138         if (curLine[0] == '\0')
 139             continue;
 140
 141         // Decode an individual SSA line
 142         hb_buffer_t *out;
 143         if ( w->subtitle->config.dest == PASSTHRUSUB ) {
 144             out = ssa_decode_line_to_utf8( (uint8_t *) curLine, strlen( curLine ), in->sequence );
 145             if ( out == NULL )
 146                 continue;
 147
 148             // We shouldn't be storing the extra NULL character,
 149             // but the MP4 muxer expects this, unfortunately.
 150             if ( out->size > 0 && out->data[out->size - 1] != '\0' ) {
 151                 // NOTE: out->size remains unchanged
 152                 hb_buffer_realloc( out, out->size + 1 );
 153                 out->data[out->size] = '\0';
 154             }
 155
 156             // If the input packet was non-empty, do not pass through
 157             // an empty output packet (even if the subtitle was empty),
 158             // as this would be interpreted as an end-of-stream
 159             if ( in->size > 0 && out->size == 0 ) {
 160                 hb_buffer_close(&out);
 161                 continue;
 162             }
 163         } else if ( w->subtitle->config.dest == RENDERSUB ) {
 164             out = ssa_decode_line_to_picture( w, (uint8_t *) curLine, strlen( curLine ), in->sequence );
 165             if ( out == NULL )
 166                 continue;
 167         }
 168
 169         // Append 'out' to 'out_list'
 170         *nextPtr = out;
 171         nextPtr = &out->next;
 172     }
 173
 174     // For point-to-point encoding, when the start time of the stream
 175     // may be offset, the timestamps of the subtitles must be offset as well.
 176     //
 177     // HACK: Here we are making the assumption that, under normal circumstances,
 178     //       the output display time of the first output packet is equal to the
 179     //       display time of the input packet.
 180     //
 181     //       During point-to-point encoding, the display time of the input
 182     //       packet will be offset to compensate.
 183     //
 184     //       Therefore we offset all of the output packets by a slip amount
 185     //       such that first output packet's display time aligns with the
 186     //       input packet's display time. This should give the correct time
 187     //       when point-to-point encoding is in effect.
 188     if (out_list && out_list->start > in->start)
 189     {
 190         int64_t slip = out_list->start - in->start;
 191         hb_buffer_t *out;
 192
 193         out = out_list;
 194         while (out)
 195         {
 196             out->start -= slip;
 197             out->stop -= slip;
 198             out = out->next;
 199         }
 200     }
 201
 202     return out_list;
 203 }
 204
 205 /*
 206  * Parses the start and stop time from the specified SSA packet.
 207  *
 208  * Returns true if parsing failed; false otherwise.
 209  */
 210 static int parse_timing_from_ssa_packet( char *in_data, int64_t *in_start, int64_t *in_stop )
 211 {
 212     /*
 213      * Parse Start and End fields for timing information
 214      */
 215     int start_hr, start_min, start_sec, start_centi;
 216     int   end_hr,   end_min,   end_sec,   end_centi;
 217     int numPartsRead = sscanf( (char *) in_data, "Dialogue: %*128[^,],"
 218         "%d:%d:%d.%d,"  // Start
 219         "%d:%d:%d.%d,", // End
 220         &start_hr, &start_min, &start_sec, &start_centi,
 221           &end_hr,   &end_min,   &end_sec,   &end_centi );
 222     if ( numPartsRead != 8 )
 223         return 1;
 224
 225     *in_start = SSA_2_HB_TIME(start_hr, start_min, start_sec, start_centi);
 226     *in_stop  = SSA_2_HB_TIME(  end_hr,   end_min,   end_sec,   end_centi);
 227
 228     return 0;
 229 }
 230
 231 static uint8_t *find_field( uint8_t *pos, uint8_t *end, int fieldNum )
 232 {
 233     int curFieldID = 1;
 234     while (pos < end)
 235     {
 236         if ( *pos++ == ',' )
 237         {
 238             curFieldID++;
 239             if ( curFieldID == fieldNum )
 240                 return pos;
 241         }
 242     }
 243     return NULL;
 244 }
 245
 246 /*
 247  * SSA line format:
 248  *   Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
 249  *             1      2     3   4     5    6       7       8       9      10
 250  */
 251 static hb_buffer_t *ssa_decode_line_to_utf8( uint8_t *in_data, int in_size, int in_sequence )
 252 {
 253     uint8_t *pos = in_data;
 254     uint8_t *end = in_data + in_size;
 255
 256     // Parse values for in->start and in->stop
 257     int64_t in_start, in_stop;
 258     if ( parse_timing_from_ssa_packet( (char *) in_data, &in_start, &in_stop ) )
 259         goto fail;
 260
 261     uint8_t *textFieldPos = find_field( pos, end, 10 );
 262     if ( textFieldPos == NULL )
 263         goto fail;
 264
 265     // Count the number of style overrides in the Text field
 266     int numStyleOverrides = 0;
 267     pos = textFieldPos;
 268     while ( pos < end )
 269     {
 270         if (*pos++ == '{')
 271         {
 272             numStyleOverrides++;
 273         }
 274     }
 275
 276     int maxOutputSize = (end - textFieldPos) + ((numStyleOverrides + 1) * MAX_OVERHEAD_PER_OVERRIDE);
 277     hb_buffer_t *out = hb_buffer_init( maxOutputSize );
 278     if ( out == NULL )
 279         return NULL;
 280
 281     /*
 282      * The Text field contains plain text marked up with:
 283      * (1) '\n' -> space
 284      * (2) '\N' -> newline
 285      * (3) curly-brace control codes like '{\k44}' -> HTML tags / strip
 286      *
 287      * Perform the above conversions and copy it to the output packet
 288      */
 289     StyleSet prevStyles = 0;
 290     uint8_t *dst = out->data;
 291     pos = textFieldPos;
 292     while ( pos < end )
 293     {
 294         if ( pos[0] == '\\' && pos[1] == 'n' )
 295         {
 296             *dst++ = ' ';
 297             pos += 2;
 298         }
 299         else if ( pos[0] == '\\' && pos[1] == 'N' )
 300         {
 301             *dst++ = '\n';
 302             pos += 2;
 303         }
 304         else if ( pos[0] == '{' )
 305         {
 306             // Parse SSA style overrides and append appropriate HTML style tags
 307             StyleSet nextStyles = ssa_parse_style_override( pos, prevStyles );
 308             ssa_append_html_tags_for_style_change( &dst, prevStyles, nextStyles );
 309             prevStyles = nextStyles;
 310
 311             // Skip past SSA control code
 312             while ( pos < end && *pos != '}' ) pos++;
 313             if    ( pos < end && *pos == '}' ) pos++;
 314         }
 315         else
 316         {
 317             // Copy raw character
 318             *dst++ = *pos++;
 319         }
 320     }
 321
 322     // Append closing HTML style tags
 323     ssa_append_html_tags_for_style_change( &dst, prevStyles, 0 );
 324
 325     // Trim output buffer to the actual amount of data written
 326     out->size = dst - out->data;
 327
 328     // Copy metadata from the input packet to the output packet
 329     out->start = in_start;
 330     out->stop = in_stop;
 331     out->sequence = in_sequence;
 332
 333     return out;
 334
 335 fail:
 336     hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data );
 337     return NULL;
 338 }
 339
 340 /*
 341  * SSA line format:
 342  *   Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
 343  *             1      2     3   4     5    6       7       8       9      10
 344  *
 345  * MKV-SSA packet format:
 346  *   ReadOrder,Marked,          Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
 347  *   1         2                3     4    5       6       7       8      9
 348  */
 349 static hb_buffer_t *ssa_decode_line_to_picture( hb_work_object_t * w, uint8_t *in_data, int in_size, int in_sequence )
 350 {
 351     hb_work_private_t * pv = w->private_data;
 352
 353     // Parse values for in->start and in->stop
 354     int64_t in_start, in_stop;
 355     if ( parse_timing_from_ssa_packet( (char *) in_data, &in_start, &in_stop ) )
 356         goto fail;
 357
 358     // Convert the SSA packet to MKV-SSA format, which is what libass expects
 359     char *mkvIn;
 360     int mkvInSize;
 361     {
 362         char *layerField = malloc( in_size );
 363         int numPartsRead = sscanf( (char *) in_data, "Dialogue: %128[^,],", layerField );
 364         if ( numPartsRead != 1 )
 365             goto fail;
 366
 367         char *styleToTextFields = (char *) find_field( in_data, in_data + in_size, 4 );
 368         if ( styleToTextFields == NULL ) {
 369             free( layerField );
 370             goto fail;
 371         }
 372
 373         mkvIn = malloc( in_size + 1 );
 374         mkvIn[0] = '\0';
 375         sprintf(mkvIn, "%d", pv->readOrder++);    // ReadOrder: make this up
 376         strcat( mkvIn, "," );
 377         strcat( mkvIn, layerField );
 378         strcat( mkvIn, "," );
 379         strcat( mkvIn, (char *) styleToTextFields );
 380
 381         mkvInSize = strlen(mkvIn);
 382
 383         free( layerField );
 384     }
 385
 386     // Parse MKV-SSA packet
 387     ass_process_chunk( pv->ssaTrack, mkvIn, mkvInSize, in_start / 90, (in_stop - in_start) / 90 );
 388
 389     free( mkvIn );
 390
 391     // TODO: To support things like karaoke, it won't be sufficient to only generate
 392     //       new subtitle pictures when there are subtitle packets. Rather, pictures will
 393     //       need to be generated potentially continuously.
 394     //
 395     //       Until "karaoke support" is implemented, make an educated guess about the
 396     //       timepoint within the subtitle that should be rendered. I guess the midpoint.
 397     int64_t renderTime = ( in_start + in_stop ) / 2;
 398
 399     int changed;
 400     ASS_Image *frameList = ass_render_frame( pv->renderer, pv->ssaTrack, renderTime / 90, &changed );
 401     if ( !changed || !frameList )
 402         return NULL;
 403
 404     int numFrames = 0;
 405     ASS_Image *curFrame;
 406     for (curFrame = frameList; curFrame; curFrame = curFrame->next)
 407         numFrames++;
 408
 409     hb_buffer_t *outSubpictureList = NULL;
 410     hb_buffer_t **outSubpictureListTailPtr = &outSubpictureList;
 411
 412     // Generate a PICTURESUB packet from the frames
 413     ASS_Image *frame;
 414     for (frame = frameList; frame; frame = frame->next) {
 415         // Allocate pixmap where drawing will be done
 416         uint8_t *rgba = calloc(frame->w * frame->h * 4, 1);
 417
 418         unsigned r = (frame->color >> 24) & 0xff;
 419         unsigned g = (frame->color >> 16) & 0xff;
 420         unsigned b = (frame->color >>  8) & 0xff;
 421         unsigned a = (frame->color      ) & 0xff;
 422
 423         int x, y;
 424         for (y = 0; y < frame->h; y++) {
 425             for (x = 0; x < frame->w; x++) {
 426                 unsigned srcAlphaPrenormalized = frame->bitmap[y*frame->stride + x];
 427                 unsigned srcAlpha = (255 - a) * srcAlphaPrenormalized / 255;
 428
 429                 uint8_t *dst = &rgba[(y*frame->w + x) * 4];
 430                 unsigned oldDstAlpha = dst[3];
 431
 432                 if (oldDstAlpha == 0) {
 433                     // Optimized version
 434                     dst[0] = r;
 435                     dst[1] = g;
 436                     dst[2] = b;
 437                     dst[3] = srcAlpha;
 438                 } else {
 439                     dst[3] = 255 - ( 255 - dst[3] ) * ( 255 - srcAlpha ) / 255;
 440                     if (dst[3] != 0) {
 441                         dst[0] = ( dst[0] * oldDstAlpha * (255-srcAlpha) / 255 + r * srcAlpha ) / dst[3];
 442                         dst[1] = ( dst[1] * oldDstAlpha * (255-srcAlpha) / 255 + g * srcAlpha ) / dst[3];
 443                         dst[2] = ( dst[2] * oldDstAlpha * (255-srcAlpha) / 255 + b * srcAlpha ) / dst[3];
 444                     }
 445                 }
 446             }
 447         }
 448
 449         // Generate output subpicture (in PICTURESUB format)
 450         hb_buffer_t *out = hb_buffer_init(frame->w * frame->h * 4);
 451         out->x = frame->dst_x;
 452         out->y = frame->dst_y;
 453         out->width = frame->w;
 454         out->height = frame->h;
 455
 456         int i;
 457         int numPixels = frame->w * frame->h;
 458         for (i = 0; i < numPixels; i++) {
 459             uint8_t *srcRgba = &rgba[i * 4];
 460
 461             uint8_t *dstY = &out->data[(numPixels * 0) + i];
 462             uint8_t *dstA = &out->data[(numPixels * 1) + i];
 463             uint8_t *dstU = &out->data[(numPixels * 2) + i];
 464             uint8_t *dstV = &out->data[(numPixels * 3) + i];
 465
 466             int srcYuv = hb_rgb2yuv((srcRgba[0] << 16) | (srcRgba[1] << 8) | (srcRgba[2] << 0));
 467             int srcA = srcRgba[3];
 468
 469             *dstY = (srcYuv >> 16) & 0xff;
 470             *dstU = (srcYuv >> 8 ) & 0xff;
 471             *dstV = (srcYuv >> 0 ) & 0xff;
 472             *dstA = srcA / 16;  // HB's max alpha value is 16
 473         }
 474
 475         free(rgba);
 476
 477         *outSubpictureListTailPtr = out;
 478         outSubpictureListTailPtr = &out->next_subpicture;
 479     }
 480
 481     // NOTE: The subpicture list is actually considered a single packet by most other code
 482     hb_buffer_t *out = outSubpictureList;
 483
 484     // Copy metadata from the input packet to the output packet
 485     out->start = in_start;
 486     out->stop = in_stop;
 487     out->sequence = in_sequence;
 488
 489     return out;
 490
 491 fail:
 492     hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data );
 493     return NULL;
 494 }
 495
 496 static void ssa_log(int level, const char *fmt, va_list args, void *data)
 497 {
 498     if ( level < 5 )      // same as default verbosity when no callback is set
 499     {
 500         char *msg;
 501         if ( vasprintf( &msg, fmt, args ) < 0 )
 502         {
 503             hb_log( "decssasub: could not report libass message\n" );
 504             return;
 505         }
 506         hb_log( "[ass] %s", msg );  // no need for extra '\n' because libass sends it
 507
 508         free( msg );
 509     }
 510 }
 511
 512 static int decssaInit( hb_work_object_t * w, hb_job_t * job )
 513 {
 514     hb_work_private_t * pv;
 515
 516     pv              = calloc( 1, sizeof( hb_work_private_t ) );
 517     w->private_data = pv;
 518
 519     if ( w->subtitle->config.dest == RENDERSUB ) {
 520         pv->ssa = ass_library_init();
 521         if ( !pv->ssa ) {
 522             hb_log( "decssasub: libass initialization failed\n" );
 523             return 1;
 524         }
 525
 526         // Redirect libass output to hb_log
 527         ass_set_message_cb( pv->ssa, ssa_log, NULL );
 528
 529         // Load embedded fonts
 530         hb_list_t * list_attachment = job->title->list_attachment;
 531         int i;
 532         for ( i = 0; i < hb_list_count(list_attachment); i++ )
 533         {
 534             hb_attachment_t * attachment = hb_list_item( list_attachment, i );
 535
 536             if ( attachment->type == FONT_TTF_ATTACH )
 537             {
 538                 ass_add_font(
 539                     pv->ssa,
 540                     attachment->name,
 541                     attachment->data,
 542                     attachment->size );
 543             }
 544         }
 545
 546         ass_set_extract_fonts( pv->ssa, 1 );
 547         ass_set_style_overrides( pv->ssa, NULL );
 548
 549         pv->renderer = ass_renderer_init( pv->ssa );
 550         if ( !pv->renderer ) {
 551             hb_log( "decssasub: renderer initialization failed\n" );
 552             return 1;
 553         }
 554
 555         ass_set_use_margins( pv->renderer, 0 );
 556         ass_set_hinting( pv->renderer, ASS_HINTING_LIGHT );     // VLC 1.0.4 uses this
 557         ass_set_font_scale( pv->renderer, 1.0 );
 558         ass_set_line_spacing( pv->renderer, 1.0 );
 559
 560         // Setup default font family
 561         //
 562         // SSA v4.00 requires that "Arial" be the default font
 563         const char *font = NULL;
 564         const char *family = "Arial";
 565         // NOTE: This can sometimes block for several *seconds*.
 566         //       It seems that process_fontdata() for some embedded fonts is slow.
 567         ass_set_fonts( pv->renderer, font, family, /*haveFontConfig=*/1, NULL, 1 );
 568
 569         // Setup track state
 570         pv->ssaTrack = ass_new_track( pv->ssa );
 571         if ( !pv->ssaTrack ) {
 572             hb_log( "decssasub: ssa track initialization failed\n" );
 573             return 1;
 574         }
 575
 576         // NOTE: The codec extradata is expected to be in MKV format
 577         ass_process_codec_private( pv->ssaTrack,
 578             (char *) w->subtitle->extradata, w->subtitle->extradata_size );
 579
 580         int originalWidth = job->title->width;
 581         int originalHeight = job->title->height;
 582         ass_set_frame_size( pv->renderer, originalWidth, originalHeight);
 583         ass_set_aspect_ratio( pv->renderer, /*dar=*/1.0, /*sar=*/1.0 );
 584     }
 585
 586     return 0;
 587 }
 588
 589 static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 590                         hb_buffer_t ** buf_out )
 591 {
 592     hb_buffer_t * in = *buf_in;
 593     hb_buffer_t * out_list = NULL;
 594
 595     if ( in->size > 0 ) {
 596         out_list = ssa_decode_packet(w, in);
 597     } else {
 598         out_list = hb_buffer_init( 0 );
 599     }
 600
 601     // Dispose the input packet, as it is no longer needed
 602     hb_buffer_close(&in);
 603
 604     *buf_in = NULL;
 605     *buf_out = out_list;
 606     return HB_WORK_OK;
 607 }
 608
 609 static void decssaClose( hb_work_object_t * w )
 610 {
 611     hb_work_private_t * pv = w->private_data;
 612
 613     if ( pv->ssaTrack )
 614         ass_free_track( pv->ssaTrack );
 615     if ( pv->renderer )
 616         ass_renderer_done( pv->renderer );
 617     if ( pv->ssa )
 618         ass_library_done( pv->ssa );
 619
 620     free( w->private_data );
 621 }
 622
 623 hb_work_object_t hb_decssasub =
 624 {
 625     WORK_DECSSASUB,
 626     "SSA Subtitle Decoder",
 627     decssaInit,
 628     decssaWork,
 629     decssaClose
 630 };