libhb/decssasub.c

   1 /*
   2    This file is part of the HandBrake source code.
   3    Homepage: <http://handbrake.fr/>.
   4    It may be used under the terms of the GNU General Public License. */
   5
   6 /*
   7  * Converts SSA subtitles to either:
   8  * (1) TEXTSUB format: UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>), or
   9  * (2) PICTURESUB format, using libass.
  10  *
  11  * SSA format references:
  12  *   http://www.matroska.org/technical/specs/subtitles/ssa.html
  13  *   http://moodub.free.fr/video/ass-specs.doc
  14  *   vlc-1.0.4/modules/codec/subtitles/subsass.c:ParseSSAString
  15  *
  16  * libass references:
  17  *   libass-0.9.9/ass.h
  18  *   vlc-1.0.4/modules/codec/libass.c
  19  *
  20  * @author David Foster (davidfstr)
  21  */
  22
  23 #include <stdlib.h>
  24 #include <stdio.h>
  25 #include "hb.h"
  26
  27 #include <ass/ass.h>
  28
  29 struct hb_work_private_s
  30 {
  31     // If decoding to PICTURESUB format:
  32     ASS_Library *ssa;
  33     ASS_Renderer *renderer;
  34     ASS_Track *ssaTrack;
  35     int readOrder;
  36 };
  37
  38 typedef enum {
  39     BOLD        = 0x01,
  40     ITALIC      = 0x02,
  41     UNDERLINE   = 0x04
  42 } StyleSet;
  43
  44 // "<b></b>".len + "<i></i>".len + "<u></u>".len
  45 #define MAX_OVERHEAD_PER_OVERRIDE (7 * 3)
  46
  47 #define SSA_2_HB_TIME(hr,min,sec,centi) \
  48     ( 90L * ( hr    * 1000L * 60 * 60 +\
  49               min   * 1000L * 60 +\
  50               sec   * 1000L +\
  51               centi * 10L ) )
  52
  53 #define SSA_VERBOSE_PACKETS 0
  54
  55 static StyleSet ssa_parse_style_override( uint8_t *pos, StyleSet prevStyles )
  56 {
  57     StyleSet nextStyles = prevStyles;
  58     for (;;)
  59     {
  60         // Skip over leading '{' or last '\\'
  61         pos++;
  62
  63         // Scan for next \code
  64         while ( *pos != '\\' && *pos != '}' && *pos != '\0' ) pos++;
  65         if ( *pos != '\\' )
  66         {
  67             // End of style override block
  68             break;
  69         }
  70
  71         // If next chars are \[biu][01], interpret it
  72         if ( strchr("biu", pos[1]) && strchr("01", pos[2]) )
  73         {
  74             StyleSet styleID =
  75                 pos[1] == 'b' ? BOLD :
  76                 pos[1] == 'i' ? ITALIC :
  77                 pos[1] == 'u' ? UNDERLINE : 0;
  78             int enabled = (pos[2] == '1');
  79
  80             if (enabled)
  81             {
  82                 nextStyles |= styleID;
  83             }
  84             else
  85             {
  86                 nextStyles &= ~styleID;
  87             }
  88         }
  89     }
  90     return nextStyles;
  91 }
  92
  93 static void ssa_append_html_tags_for_style_change(
  94     uint8_t **dst, StyleSet prevStyles, StyleSet nextStyles )
  95 {
  96     #define APPEND(str) { \
  97         char *src = str; \
  98         while (*src) { *(*dst)++ = *src++; } \
  99     }
 100
 101     // Reverse-order close all previous styles
 102     if (prevStyles & UNDERLINE) APPEND("</u>");
 103     if (prevStyles & ITALIC)    APPEND("</i>");
 104     if (prevStyles & BOLD)      APPEND("</b>");
 105
 106     // Forward-order open all next styles
 107     if (nextStyles & BOLD)      APPEND("<b>");
 108     if (nextStyles & ITALIC)    APPEND("<i>");
 109     if (nextStyles & UNDERLINE) APPEND("<u>");
 110
 111     #undef APPEND
 112 }
 113
 114 static hb_buffer_t *ssa_decode_line_to_utf8( uint8_t *in_data, int in_size, int in_sequence );
 115 static hb_buffer_t *ssa_decode_line_to_picture( hb_work_object_t * w, uint8_t *in_data, int in_size, int in_sequence );
 116
 117 /*
 118  * Decodes a single SSA packet to one or more TEXTSUB or PICTURESUB subtitle packets.
 119  *
 120  * SSA packet format:
 121  * ( Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text CR LF ) +
 122  *             1      2     3   4     5    6       7       8       9      10
 123  */
 124 static hb_buffer_t *ssa_decode_packet( hb_work_object_t * w, hb_buffer_t *in )
 125 {
 126     // Store NULL after the end of the buffer to make using string processing safe
 127     hb_buffer_realloc( in, in->size + 1 );
 128     in->data[in->size] = '\0';
 129
 130     hb_buffer_t *out_list = NULL;
 131     hb_buffer_t **nextPtr = &out_list;
 132
 133     const char *EOL = "\r\n";
 134     char *curLine, *curLine_parserData;
 135     for ( curLine = strtok_r( (char *) in->data, EOL, &curLine_parserData );
 136           curLine;
 137           curLine = strtok_r( NULL, EOL, &curLine_parserData ) )
 138     {
 139         // Skip empty lines and spaces between adjacent CR and LF
 140         if (curLine[0] == '\0')
 141             continue;
 142
 143         // Decode an individual SSA line
 144         hb_buffer_t *out;
 145         if ( w->subtitle->config.dest == PASSTHRUSUB ) {
 146             out = ssa_decode_line_to_utf8( (uint8_t *) curLine, strlen( curLine ), in->sequence );
 147             if ( out == NULL )
 148                 continue;
 149
 150             // We shouldn't be storing the extra NULL character,
 151             // but the MP4 muxer expects this, unfortunately.
 152             if ( out->size > 0 && out->data[out->size - 1] != '\0' ) {
 153                 // NOTE: out->size remains unchanged
 154                 hb_buffer_realloc( out, out->size + 1 );
 155                 out->data[out->size] = '\0';
 156             }
 157
 158             // If the input packet was non-empty, do not pass through
 159             // an empty output packet (even if the subtitle was empty),
 160             // as this would be interpreted as an end-of-stream
 161             if ( in->size > 0 && out->size == 0 ) {
 162                 hb_buffer_close(&out);
 163                 continue;
 164             }
 165         } else if ( w->subtitle->config.dest == RENDERSUB ) {
 166             out = ssa_decode_line_to_picture( w, (uint8_t *) curLine, strlen( curLine ), in->sequence );
 167             if ( out == NULL )
 168                 continue;
 169         }
 170
 171         // Append 'out' to 'out_list'
 172         *nextPtr = out;
 173         nextPtr = &out->next;
 174     }
 175
 176     // For point-to-point encoding, when the start time of the stream
 177     // may be offset, the timestamps of the subtitles must be offset as well.
 178     //
 179     // HACK: Here we are making the assumption that, under normal circumstances,
 180     //       the output display time of the first output packet is equal to the
 181     //       display time of the input packet.
 182     //
 183     //       During point-to-point encoding, the display time of the input
 184     //       packet will be offset to compensate.
 185     //
 186     //       Therefore we offset all of the output packets by a slip amount
 187     //       such that first output packet's display time aligns with the
 188     //       input packet's display time. This should give the correct time
 189     //       when point-to-point encoding is in effect.
 190     if (out_list && out_list->start > in->start)
 191     {
 192         int64_t slip = out_list->start - in->start;
 193         hb_buffer_t *out;
 194
 195         out = out_list;
 196         while (out)
 197         {
 198             out->start -= slip;
 199             out->stop -= slip;
 200             out = out->next;
 201         }
 202     }
 203
 204     return out_list;
 205 }
 206
 207 /*
 208  * Parses the start and stop time from the specified SSA packet.
 209  *
 210  * Returns true if parsing failed; false otherwise.
 211  */
 212 static int parse_timing_from_ssa_packet( char *in_data, int64_t *in_start, int64_t *in_stop )
 213 {
 214     /*
 215      * Parse Start and End fields for timing information
 216      */
 217     int start_hr, start_min, start_sec, start_centi;
 218     int   end_hr,   end_min,   end_sec,   end_centi;
 219     int numPartsRead = sscanf( (char *) in_data, "Dialogue: %*128[^,],"
 220         "%d:%d:%d.%d,"  // Start
 221         "%d:%d:%d.%d,", // End
 222         &start_hr, &start_min, &start_sec, &start_centi,
 223           &end_hr,   &end_min,   &end_sec,   &end_centi );
 224     if ( numPartsRead != 8 )
 225         return 1;
 226
 227     *in_start = SSA_2_HB_TIME(start_hr, start_min, start_sec, start_centi);
 228     *in_stop  = SSA_2_HB_TIME(  end_hr,   end_min,   end_sec,   end_centi);
 229
 230     return 0;
 231 }
 232
 233 static uint8_t *find_field( uint8_t *pos, uint8_t *end, int fieldNum )
 234 {
 235     int curFieldID = 1;
 236     while (pos < end)
 237     {
 238         if ( *pos++ == ',' )
 239         {
 240             curFieldID++;
 241             if ( curFieldID == fieldNum )
 242                 return pos;
 243         }
 244     }
 245     return NULL;
 246 }
 247
 248 /*
 249  * SSA line format:
 250  *   Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
 251  *             1      2     3   4     5    6       7       8       9      10
 252  */
 253 static hb_buffer_t *ssa_decode_line_to_utf8( uint8_t *in_data, int in_size, int in_sequence )
 254 {
 255     uint8_t *pos = in_data;
 256     uint8_t *end = in_data + in_size;
 257
 258     // Parse values for in->start and in->stop
 259     int64_t in_start, in_stop;
 260     if ( parse_timing_from_ssa_packet( (char *) in_data, &in_start, &in_stop ) )
 261         goto fail;
 262
 263     uint8_t *textFieldPos = find_field( pos, end, 10 );
 264     if ( textFieldPos == NULL )
 265         goto fail;
 266
 267     // Count the number of style overrides in the Text field
 268     int numStyleOverrides = 0;
 269     pos = textFieldPos;
 270     while ( pos < end )
 271     {
 272         if (*pos++ == '{')
 273         {
 274             numStyleOverrides++;
 275         }
 276     }
 277
 278     int maxOutputSize = (end - textFieldPos) + ((numStyleOverrides + 1) * MAX_OVERHEAD_PER_OVERRIDE);
 279     hb_buffer_t *out = hb_buffer_init( maxOutputSize );
 280     if ( out == NULL )
 281         return NULL;
 282
 283     /*
 284      * The Text field contains plain text marked up with:
 285      * (1) '\n' -> space
 286      * (2) '\N' -> newline
 287      * (3) curly-brace control codes like '{\k44}' -> HTML tags / strip
 288      *
 289      * Perform the above conversions and copy it to the output packet
 290      */
 291     StyleSet prevStyles = 0;
 292     uint8_t *dst = out->data;
 293     pos = textFieldPos;
 294     while ( pos < end )
 295     {
 296         if ( pos[0] == '\\' && pos[1] == 'n' )
 297         {
 298             *dst++ = ' ';
 299             pos += 2;
 300         }
 301         else if ( pos[0] == '\\' && pos[1] == 'N' )
 302         {
 303             *dst++ = '\n';
 304             pos += 2;
 305         }
 306         else if ( pos[0] == '{' )
 307         {
 308             // Parse SSA style overrides and append appropriate HTML style tags
 309             StyleSet nextStyles = ssa_parse_style_override( pos, prevStyles );
 310             ssa_append_html_tags_for_style_change( &dst, prevStyles, nextStyles );
 311             prevStyles = nextStyles;
 312
 313             // Skip past SSA control code
 314             while ( pos < end && *pos != '}' ) pos++;
 315             if    ( pos < end && *pos == '}' ) pos++;
 316         }
 317         else
 318         {
 319             // Copy raw character
 320             *dst++ = *pos++;
 321         }
 322     }
 323
 324     // Append closing HTML style tags
 325     ssa_append_html_tags_for_style_change( &dst, prevStyles, 0 );
 326
 327     // Trim output buffer to the actual amount of data written
 328     out->size = dst - out->data;
 329
 330     // Copy metadata from the input packet to the output packet
 331     out->start = in_start;
 332     out->stop = in_stop;
 333     out->sequence = in_sequence;
 334
 335     return out;
 336
 337 fail:
 338     hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data );
 339     return NULL;
 340 }
 341
 342 /*
 343  * SSA line format:
 344  *   Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
 345  *             1      2     3   4     5    6       7       8       9      10
 346  *
 347  * MKV-SSA packet format:
 348  *   ReadOrder,Marked,          Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
 349  *   1         2                3     4    5       6       7       8      9
 350  */
 351 static hb_buffer_t *ssa_decode_line_to_picture( hb_work_object_t * w, uint8_t *in_data, int in_size, int in_sequence )
 352 {
 353     hb_work_private_t * pv = w->private_data;
 354
 355     // Parse values for in->start and in->stop
 356     int64_t in_start, in_stop;
 357     if ( parse_timing_from_ssa_packet( (char *) in_data, &in_start, &in_stop ) )
 358         goto fail;
 359
 360     // Convert the SSA packet to MKV-SSA format, which is what libass expects
 361     char *mkvIn;
 362     int mkvInSize;
 363     {
 364         char *layerField = malloc( in_size );
 365         int numPartsRead = sscanf( (char *) in_data, "Dialogue: %128[^,],", layerField );
 366         if ( numPartsRead != 1 )
 367             goto fail;
 368
 369         char *styleToTextFields = (char *) find_field( in_data, in_data + in_size, 4 );
 370         if ( styleToTextFields == NULL ) {
 371             free( layerField );
 372             goto fail;
 373         }
 374
 375         mkvIn = malloc( in_size + 1 );
 376         mkvIn[0] = '\0';
 377         sprintf(mkvIn, "%d", pv->readOrder++);    // ReadOrder: make this up
 378         strcat( mkvIn, "," );
 379         strcat( mkvIn, layerField );
 380         strcat( mkvIn, "," );
 381         strcat( mkvIn, (char *) styleToTextFields );
 382
 383         mkvInSize = strlen(mkvIn);
 384
 385         free( layerField );
 386     }
 387
 388     // Parse MKV-SSA packet
 389     ass_process_chunk( pv->ssaTrack, mkvIn, mkvInSize, in_start / 90, (in_stop - in_start) / 90 );
 390
 391     free( mkvIn );
 392
 393     // TODO: To support things like karaoke, it won't be sufficient to only generate
 394     //       new subtitle pictures when there are subtitle packets. Rather, pictures will
 395     //       need to be generated potentially continuously.
 396     //
 397     //       Until "karaoke support" is implemented, make an educated guess about the
 398     //       timepoint within the subtitle that should be rendered. I guess the midpoint.
 399     int64_t renderTime = ( in_start + in_stop ) / 2;
 400
 401     int changed;
 402     ASS_Image *frameList = ass_render_frame( pv->renderer, pv->ssaTrack, renderTime / 90, &changed );
 403     if ( !changed || !frameList )
 404         return NULL;
 405
 406     int numFrames = 0;
 407     ASS_Image *curFrame;
 408     for (curFrame = frameList; curFrame; curFrame = curFrame->next)
 409         numFrames++;
 410
 411     hb_buffer_t *outSubpictureList = NULL;
 412     hb_buffer_t **outSubpictureListTailPtr = &outSubpictureList;
 413
 414     // Generate a PICTURESUB packet from the frames
 415     ASS_Image *frame;
 416     for (frame = frameList; frame; frame = frame->next) {
 417         // Allocate pixmap where drawing will be done
 418         uint8_t *rgba = calloc(frame->w * frame->h * 4, 1);
 419
 420         unsigned r = (frame->color >> 24) & 0xff;
 421         unsigned g = (frame->color >> 16) & 0xff;
 422         unsigned b = (frame->color >>  8) & 0xff;
 423         unsigned a = (frame->color      ) & 0xff;
 424
 425         int x, y;
 426         for (y = 0; y < frame->h; y++) {
 427             for (x = 0; x < frame->w; x++) {
 428                 unsigned srcAlphaPrenormalized = frame->bitmap[y*frame->stride + x];
 429                 unsigned srcAlpha = (255 - a) * srcAlphaPrenormalized / 255;
 430
 431                 uint8_t *dst = &rgba[(y*frame->w + x) * 4];
 432                 unsigned oldDstAlpha = dst[3];
 433
 434                 if (oldDstAlpha == 0) {
 435                     // Optimized version
 436                     dst[0] = r;
 437                     dst[1] = g;
 438                     dst[2] = b;
 439                     dst[3] = srcAlpha;
 440                 } else {
 441                     dst[3] = 255 - ( 255 - dst[3] ) * ( 255 - srcAlpha ) / 255;
 442                     if (dst[3] != 0) {
 443                         dst[0] = ( dst[0] * oldDstAlpha * (255-srcAlpha) / 255 + r * srcAlpha ) / dst[3];
 444                         dst[1] = ( dst[1] * oldDstAlpha * (255-srcAlpha) / 255 + g * srcAlpha ) / dst[3];
 445                         dst[2] = ( dst[2] * oldDstAlpha * (255-srcAlpha) / 255 + b * srcAlpha ) / dst[3];
 446                     }
 447                 }
 448             }
 449         }
 450
 451         // Generate output subpicture (in PICTURESUB format)
 452         hb_buffer_t *out = hb_buffer_init(frame->w * frame->h * 4);
 453         out->x = frame->dst_x;
 454         out->y = frame->dst_y;
 455         out->width = frame->w;
 456         out->height = frame->h;
 457
 458         int i;
 459         int numPixels = frame->w * frame->h;
 460         for (i = 0; i < numPixels; i++) {
 461             uint8_t *srcRgba = &rgba[i * 4];
 462
 463             uint8_t *dstY = &out->data[(numPixels * 0) + i];
 464             uint8_t *dstA = &out->data[(numPixels * 1) + i];
 465             uint8_t *dstU = &out->data[(numPixels * 2) + i];
 466             uint8_t *dstV = &out->data[(numPixels * 3) + i];
 467
 468             int srcYuv = hb_rgb2yuv((srcRgba[0] << 16) | (srcRgba[1] << 8) | (srcRgba[2] << 0));
 469             int srcA = srcRgba[3];
 470
 471             *dstY = (srcYuv >> 16) & 0xff;
 472             *dstU = (srcYuv >> 8 ) & 0xff;
 473             *dstV = (srcYuv >> 0 ) & 0xff;
 474             *dstA = srcA / 16;  // HB's max alpha value is 16
 475         }
 476
 477         free(rgba);
 478
 479         *outSubpictureListTailPtr = out;
 480         outSubpictureListTailPtr = &out->next_subpicture;
 481     }
 482
 483     // NOTE: The subpicture list is actually considered a single packet by most other code
 484     hb_buffer_t *out = outSubpictureList;
 485
 486     // Copy metadata from the input packet to the output packet
 487     out->start = in_start;
 488     out->stop = in_stop;
 489     out->sequence = in_sequence;
 490
 491     return out;
 492
 493 fail:
 494     hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data );
 495     return NULL;
 496 }
 497
 498 static void ssa_log(int level, const char *fmt, va_list args, void *data)
 499 {
 500     if ( level < 5 )      // same as default verbosity when no callback is set
 501     {
 502         char *msg;
 503         if ( vasprintf( &msg, fmt, args ) < 0 )
 504         {
 505             hb_log( "decssasub: could not report libass message\n" );
 506             return;
 507         }
 508         hb_log( "[ass] %s", msg );  // no need for extra '\n' because libass sends it
 509
 510         free( msg );
 511     }
 512 }
 513
 514 static int decssaInit( hb_work_object_t * w, hb_job_t * job )
 515 {
 516     hb_work_private_t * pv;
 517
 518     pv              = calloc( 1, sizeof( hb_work_private_t ) );
 519     w->private_data = pv;
 520
 521     if ( w->subtitle->config.dest == RENDERSUB ) {
 522         pv->ssa = ass_library_init();
 523         if ( !pv->ssa ) {
 524             hb_log( "decssasub: libass initialization failed\n" );
 525             return 1;
 526         }
 527
 528         // Redirect libass output to hb_log
 529         ass_set_message_cb( pv->ssa, ssa_log, NULL );
 530
 531         // Load embedded fonts
 532         hb_list_t * list_attachment = job->title->list_attachment;
 533         int i;
 534         for ( i = 0; i < hb_list_count(list_attachment); i++ )
 535         {
 536             hb_attachment_t * attachment = hb_list_item( list_attachment, i );
 537
 538             if ( attachment->type == FONT_TTF_ATTACH )
 539             {
 540                 ass_add_font(
 541                     pv->ssa,
 542                     attachment->name,
 543                     attachment->data,
 544                     attachment->size );
 545             }
 546         }
 547
 548         ass_set_extract_fonts( pv->ssa, 1 );
 549         ass_set_style_overrides( pv->ssa, NULL );
 550
 551         pv->renderer = ass_renderer_init( pv->ssa );
 552         if ( !pv->renderer ) {
 553             hb_log( "decssasub: renderer initialization failed\n" );
 554             return 1;
 555         }
 556
 557         ass_set_use_margins( pv->renderer, 0 );
 558         ass_set_hinting( pv->renderer, ASS_HINTING_LIGHT );     // VLC 1.0.4 uses this
 559         ass_set_font_scale( pv->renderer, 1.0 );
 560         ass_set_line_spacing( pv->renderer, 1.0 );
 561
 562         // Setup default font family
 563         //
 564         // SSA v4.00 requires that "Arial" be the default font
 565         const char *font = NULL;
 566         const char *family = "Arial";
 567         // NOTE: This can sometimes block for several *seconds*.
 568         //       It seems that process_fontdata() for some embedded fonts is slow.
 569         ass_set_fonts( pv->renderer, font, family, /*haveFontConfig=*/1, NULL, 1 );
 570
 571         // Setup track state
 572         pv->ssaTrack = ass_new_track( pv->ssa );
 573         if ( !pv->ssaTrack ) {
 574             hb_log( "decssasub: ssa track initialization failed\n" );
 575             return 1;
 576         }
 577
 578         // NOTE: The codec extradata is expected to be in MKV format
 579         ass_process_codec_private( pv->ssaTrack,
 580             (char *) w->subtitle->extradata, w->subtitle->extradata_size );
 581
 582         int originalWidth = job->title->width;
 583         int originalHeight = job->title->height;
 584         ass_set_frame_size( pv->renderer, originalWidth, originalHeight);
 585         ass_set_aspect_ratio( pv->renderer, /*dar=*/1.0, /*sar=*/1.0 );
 586     }
 587
 588     return 0;
 589 }
 590
 591 static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 592                         hb_buffer_t ** buf_out )
 593 {
 594     hb_buffer_t * in = *buf_in;
 595     hb_buffer_t * out_list = NULL;
 596
 597 #if SSA_VERBOSE_PACKETS
 598     printf("\nPACKET(%"PRId64",%"PRId64"): %.*s\n", in->start/90, in->stop/90, in->size, in->data);
 599 #endif
 600
 601     if ( in->size > 0 ) {
 602         out_list = ssa_decode_packet(w, in);
 603     } else {
 604         out_list = hb_buffer_init( 0 );
 605     }
 606
 607     // Dispose the input packet, as it is no longer needed
 608     hb_buffer_close(&in);
 609
 610     *buf_in = NULL;
 611     *buf_out = out_list;
 612     return HB_WORK_OK;
 613 }
 614
 615 static void decssaClose( hb_work_object_t * w )
 616 {
 617     hb_work_private_t * pv = w->private_data;
 618
 619     if ( pv->ssaTrack )
 620         ass_free_track( pv->ssaTrack );
 621     if ( pv->renderer )
 622         ass_renderer_done( pv->renderer );
 623     if ( pv->ssa )
 624         ass_library_done( pv->ssa );
 625
 626     free( w->private_data );
 627 }
 628
 629 hb_work_object_t hb_decssasub =
 630 {
 631     WORK_DECSSASUB,
 632     "SSA Subtitle Decoder",
 633     decssaInit,
 634     decssaWork,
 635     decssaClose
 636 };