2 This file is part of the HandBrake source code.
3 Homepage: <http://handbrake.fr/>.
4 It may be used under the terms of the GNU General Public License. */
7 * Converts SSA subtitles to UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>).
9 * SSA format references:
10 * http://www.matroska.org/technical/specs/subtitles/ssa.html
11 * http://moodub.free.fr/video/ass-specs.doc
12 * vlc-1.0.4/modules/codec/subtitles/subsass.c:ParseSSAString
14 * @author David Foster (davidfstr)
27 // "<b></b>".len + "<i></i>".len + "<u></u>".len
28 #define MAX_OVERHEAD_PER_OVERRIDE (7 * 3)
30 #define SSA_2_HB_TIME(hr,min,sec,centi) \
31 ( 90L * ( hr * 1000L * 60 * 60 +\
36 static StyleSet ssa_parse_style_override( uint8_t *pos, StyleSet prevStyles )
38 StyleSet nextStyles = prevStyles;
41 // Skip over leading '{' or last '\\'
44 // Scan for next \code
45 while ( *pos != '\\' && *pos != '}' && *pos != '\0' ) pos++;
48 // End of style override block
52 // If next chars are \[biu][01], interpret it
53 if ( strchr("biu", pos[1]) && strchr("01", pos[2]) )
56 pos[1] == 'b' ? BOLD :
57 pos[1] == 'i' ? ITALIC :
58 pos[1] == 'u' ? UNDERLINE : 0;
59 int enabled = (pos[2] == '1');
63 nextStyles |= styleID;
67 nextStyles &= ~styleID;
74 static void ssa_append_html_tags_for_style_change(
75 uint8_t **dst, StyleSet prevStyles, StyleSet nextStyles )
77 #define APPEND(str) { \
79 while (*src) { *(*dst)++ = *src++; } \
82 // Reverse-order close all previous styles
83 if (prevStyles & UNDERLINE) APPEND("</u>");
84 if (prevStyles & ITALIC) APPEND("</i>");
85 if (prevStyles & BOLD) APPEND("</b>");
87 // Forward-order open all next styles
88 if (nextStyles & BOLD) APPEND("<b>");
89 if (nextStyles & ITALIC) APPEND("<i>");
90 if (nextStyles & UNDERLINE) APPEND("<u>");
95 static hb_buffer_t *ssa_decode_to_utf8_line( uint8_t *in_data, int in_size );
99 * ( Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text CR LF ) +
100 * 1 2 3 4 5 6 7 8 9 10
102 static hb_buffer_t *ssa_decode_to_utf8( hb_buffer_t *in )
104 // Store NULL after the end of the buffer to make using string processing safe
105 hb_buffer_realloc( in, in->size + 1 );
106 in->data[in->size] = '\0';
108 hb_buffer_t *out_list = NULL;
109 hb_buffer_t **nextPtr = &out_list;
111 const char *EOL = "\r\n";
112 char *curLine, *curLine_parserData;
113 for ( curLine = strtok_r( (char *) in->data, EOL, &curLine_parserData );
115 curLine = strtok_r( NULL, EOL, &curLine_parserData ) )
117 // Skip empty lines and spaces between adjacent CR and LF
118 if (curLine[0] == '\0')
121 // Decode an individual SSA line
122 hb_buffer_t *out = ssa_decode_to_utf8_line( (uint8_t*)curLine, strlen( curLine ) );
124 // We shouldn't be storing the extra NULL character,
125 // but the MP4 muxer expects this, unfortunately.
126 if ( out->size > 0 && out->data[out->size - 1] != '\0' ) {
127 // NOTE: out->size remains unchanged
128 hb_buffer_realloc( out, out->size + 1 );
129 out->data[out->size] = '\0';
132 // If the input packet was non-empty, do not pass through
133 // an empty output packet (even if the subtitle was empty),
134 // as this would be interpreted as an end-of-stream
135 if ( in->size > 0 && out->size == 0 ) {
136 hb_buffer_close(&out);
140 // Append 'out' to 'out_list'
142 nextPtr = &out->next;
150 * Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
151 * 1 2 3 4 5 6 7 8 9 10
153 static hb_buffer_t *ssa_decode_to_utf8_line( uint8_t *in_data, int in_size )
155 uint8_t *pos = in_data;
156 uint8_t *end = in_data + in_size;
159 * Parse Start and End fields for timing information
161 int start_hr, start_min, start_sec, start_centi;
162 int end_hr, end_min, end_sec, end_centi;
163 int numPartsRead = sscanf( (char *) in_data, "%*128[^,],"
164 "%d:%d:%d.%d," // Start
165 "%d:%d:%d.%d,", // End
166 &start_hr, &start_min, &start_sec, &start_centi,
167 &end_hr, &end_min, &end_sec, &end_centi );
168 if ( numPartsRead != 8 )
171 int64_t in_start = SSA_2_HB_TIME(start_hr, start_min, start_sec, start_centi);
172 int64_t in_stop = SSA_2_HB_TIME( end_hr, end_min, end_sec, end_centi);
175 * Advance 'pos' to the beginning of the Text field
183 if ( curFieldID == 10 ) // Text
187 if ( curFieldID != 10 )
189 uint8_t *textFieldPos = pos;
191 // Count the number of style overrides in the Text field
192 int numStyleOverrides = 0;
201 int maxOutputSize = (end - textFieldPos) + ((numStyleOverrides + 1) * MAX_OVERHEAD_PER_OVERRIDE);
202 hb_buffer_t *out = hb_buffer_init( maxOutputSize );
207 * The Text field contains plain text marked up with:
209 * (2) '\N' -> newline
210 * (3) curly-brace control codes like '{\k44}' -> empty (strip them)
212 * Perform the above conversions and copy it to the output packet
214 StyleSet prevStyles = 0;
215 uint8_t *dst = out->data;
219 if ( pos[0] == '\\' && pos[1] == 'n' )
224 else if ( pos[0] == '\\' && pos[1] == 'N' )
229 else if ( pos[0] == '{' )
231 // Parse SSA style overrides and append appropriate HTML style tags
232 StyleSet nextStyles = ssa_parse_style_override( pos, prevStyles );
233 ssa_append_html_tags_for_style_change( &dst, prevStyles, nextStyles );
234 prevStyles = nextStyles;
236 // Skip past SSA control code
237 while ( pos < end && *pos != '}' ) pos++;
238 if ( pos < end && *pos == '}' ) pos++;
242 // Copy raw character
247 // Append closing HTML style tags
248 ssa_append_html_tags_for_style_change( &dst, prevStyles, 0 );
250 // Trim output buffer to the actual amount of data written
251 out->size = dst - out->data;
253 // Copy metadata from the input packet to the output packet
254 out->start = in_start;
260 hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data );
264 static int decssaInit( hb_work_object_t * w, hb_job_t * job )
269 static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
270 hb_buffer_t ** buf_out )
272 hb_buffer_t * in = *buf_in;
273 hb_buffer_t * out_list = NULL;
275 if ( in->size > 0 ) {
276 out_list = ssa_decode_to_utf8(in);
278 out_list = hb_buffer_init( 0 );
281 // Dispose the input packet, as it is no longer needed
282 hb_buffer_close(&in);
289 static void decssaClose( hb_work_object_t * w )
294 hb_work_object_t hb_decssasub =
297 "SSA Subtitle Decoder",