2 This file is part of the HandBrake source code.
3 Homepage: <http://handbrake.fr/>.
4 It may be used under the terms of the GNU General Public License. */
7 * Converts TX3G subtitles to UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>).
9 * TX3G == MPEG 4, Part 17 (ISO/IEC 14496-17) == 3GPP Timed Text (26.245)
10 * A full reference to the format can be found here:
11 * http://www.3gpp.org/ftp/Specs/html-info/26245.htm
13 * @author David Foster (davidfstr)
26 #define NUM_FACE_STYLE_FLAGS 3
27 #define MAX_OPEN_TAG_SIZE 3 // "<b>"
28 #define MAX_CLOSE_TAG_SIZE 4 // "</b>"
31 uint16_t startChar; // NOTE: indices in terms of *character* (not: byte) positions
34 uint8_t faceStyleFlags; // FaceStyleFlag
36 uint32_t textColorRGBA;
39 // NOTE: None of these macros check for buffer overflow
40 #define READ_U8() *pos; pos += 1;
41 #define READ_U16() (pos[0] << 8) | pos[1]; pos += 2;
42 #define READ_U32() (pos[0] << 24) | (pos[1] << 16) | (pos[2] << 8) | pos[3]; pos += 4;
43 #define READ_ARRAY(n) pos; pos += n;
45 #define WRITE_CHAR(c) {dst[0]=c; dst += 1;}
46 #define WRITE_START_TAG(c) {dst[0]='<'; dst[1]=c; dst[2]='>'; dst += 3;}
47 #define WRITE_END_TAG(c) {dst[0]='<'; dst[1]='/'; dst[2]=c; dst[3]='>'; dst += 4;}
49 #define FOURCC(str) ((((uint32_t) str[0]) << 24) | \
50 (((uint32_t) str[1]) << 16) | \
51 (((uint32_t) str[2]) << 8) | \
52 (((uint32_t) str[3]) << 0))
53 #define IS_10xxxxxx(c) ((c & 0xC0) == 0x80)
55 static hb_buffer_t *tx3g_decode_to_utf8( hb_buffer_t *in )
57 uint8_t *pos = in->data;
58 uint8_t *end = in->data + in->size;
60 uint16_t numStyleRecords = 0;
66 * Parse the packet as a TX3G TextSample.
68 * Look for a single StyleBox ('styl') and read all contained StyleRecords.
69 * Ignore all other box types.
71 * NOTE: Buffer overflows on read are not checked.
73 uint16_t textLength = READ_U16();
74 uint8_t *text = READ_ARRAY(textLength);
75 startStyle = calloc( textLength, 1 );
76 endStyle = calloc( textLength, 1 );
79 * Read TextSampleModifierBox
81 uint32_t size = READ_U32();
83 size = pos - end; // extends to end of packet
86 hb_log( "dectx3gsub: TextSampleModifierBox has unsupported large size" );
89 uint32_t type = READ_U32();
90 if ( type == FOURCC("uuid") ) {
91 hb_log( "dectx3gsub: TextSampleModifierBox has unsupported extended type" );
95 if ( type == FOURCC("styl") ) {
96 // Found a StyleBox. Parse the contained StyleRecords
98 if ( numStyleRecords != 0 ) {
99 hb_log( "dectx3gsub: found additional StyleBoxes on subtitle; skipping" );
104 numStyleRecords = READ_U16();
107 for (i=0; i<numStyleRecords; i++) {
108 StyleRecord curRecord;
109 curRecord.startChar = READ_U16();
110 curRecord.endChar = READ_U16();
111 curRecord.fontID = READ_U16();
112 curRecord.faceStyleFlags = READ_U8();
113 curRecord.fontSize = READ_U8();
114 curRecord.textColorRGBA = READ_U32();
116 startStyle[curRecord.startChar] |= curRecord.faceStyleFlags;
117 endStyle[curRecord.endChar] |= curRecord.faceStyleFlags;
120 // Found some other kind of TextSampleModifierBox. Skip it.
126 * Copy text to output buffer, and add HTML markup for the style records
128 int maxOutputSize = textLength + (numStyleRecords * NUM_FACE_STYLE_FLAGS * (MAX_OPEN_TAG_SIZE + MAX_CLOSE_TAG_SIZE));
129 hb_buffer_t *out = hb_buffer_init( maxOutputSize );
130 uint8_t *dst = out->data;
132 for ( pos = text, end = text + textLength; pos < end; pos++ ) {
133 if (IS_10xxxxxx(*pos)) {
134 // Is a non-first byte of a multi-byte UTF-8 character
136 continue; // ...without incrementing 'charIndex'
139 uint8_t plusStyles = startStyle[charIndex];
140 uint8_t minusStyles = endStyle[charIndex];
142 if (minusStyles & UNDERLINE)
144 if (minusStyles & ITALIC)
146 if (minusStyles & BOLD)
149 if (plusStyles & BOLD)
150 WRITE_START_TAG('b');
151 if (plusStyles & ITALIC)
152 WRITE_START_TAG('i');
153 if (plusStyles & UNDERLINE)
154 WRITE_START_TAG('u');
160 // Trim output buffer to the actual amount of data written
161 out->size = dst - out->data;
163 // Copy metadata from the input packet to the output packet
164 out->start = in->start;
165 out->stop = in->stop;
179 #undef WRITE_START_TAG
182 static int dectx3gInit( hb_work_object_t * w, hb_job_t * job )
187 static int dectx3gWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
188 hb_buffer_t ** buf_out )
190 hb_buffer_t * in = *buf_in;
191 hb_buffer_t * out = NULL;
193 // Warn if the subtitle's duration has not been passed through by the demuxer,
194 // which will prevent the subtitle from displaying at all
195 if ( in->stop == 0 ) {
196 hb_log( "dectx3gsub: subtitle packet lacks duration" );
199 if ( in->size > 0 ) {
200 out = tx3g_decode_to_utf8(in);
202 out = hb_buffer_init( 0 );
205 // We shouldn't be storing the extra NULL character,
206 // but the MP4 muxer expects this, unfortunately.
207 if ( out->size > 0 && out->data[out->size - 1] != '\0' ) {
208 // NOTE: out->size remains unchanged
209 hb_buffer_realloc( out, out->size + 1 );
210 out->data[out->size] = '\0';
213 // If the input packet was non-empty, do not pass through
214 // an empty output packet (even if the subtitle was empty),
215 // as this would be interpreted as an end-of-stream
216 if ( in->size > 0 && out->size == 0 ) {
217 hb_buffer_close(&out);
220 // Dispose the input packet, as it is no longer needed
221 hb_buffer_close(&in);
228 static void dectx3gClose( hb_work_object_t * w )
233 hb_work_object_t hb_dectx3gsub =
236 "TX3G Subtitle Decoder",