OSDN Git Service

x264 bump to r1339-82b80ef
[handbrake-jp/handbrake-jp-git.git] / libhb / decsrtsub.c
1 /* 
2    This file is part of the HandBrake source code.
3    Homepage: <http://handbrake.fr/>.
4    It may be used under the terms of the GNU General Public License. */
5
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <iconv.h>
10 #include <errno.h>
11 #include "hb.h"
12
13 struct start_and_end {
14     unsigned long start, end;
15 };
16
17 enum
18 {
19     k_state_inEntry,
20     k_state_potential_new_entry,
21     k_state_timecode,
22 };
23
24 typedef struct srt_entry_s {
25     long offset, duration;
26     long start, stop;
27     char text[1024];
28     int  pos;
29 } srt_entry_t;
30
31 /*
32  * Store all context in the work private struct,
33  */
34 struct hb_work_private_s
35 {
36     hb_job_t * job;
37     FILE     * file;
38     char       buf[1024];
39     int        pos;
40     int        end;
41     char       utf8_buf[2048];
42     int        utf8_pos;
43     int        utf8_end;
44     unsigned long current_time;
45     unsigned long number_of_entries;
46     unsigned long current_state;
47     srt_entry_t current_entry;
48     iconv_t *iconv_context;
49     hb_subtitle_t *subtitle;
50     uint64_t start_time;              // In HB time
51     uint64_t stop_time;               // In HB time
52 };
53
54 static struct start_and_end read_time_from_string( const char* timeString ) 
55 {
56     // for ex. 00:00:15,248 --> 00:00:16,545
57     
58     long houres1, minutes1, seconds1, milliseconds1,
59         houres2, minutes2, seconds2, milliseconds2;
60     
61     sscanf(timeString, "%ld:%ld:%ld,%ld --> %ld:%ld:%ld,%ld\n", &houres1, &minutes1, &seconds1, &milliseconds1,
62            &houres2, &minutes2, &seconds2, &milliseconds2);
63     
64     struct start_and_end result = {
65         milliseconds1 + seconds1*1000 + minutes1*60*1000 + houres1*60*60*1000,
66         milliseconds2 + seconds2*1000 + minutes2*60*1000 + houres2*60*60*1000};
67     return result;
68 }
69
70 static int utf8_fill( hb_work_private_t * pv )
71 {
72     int bytes, conversion = 0;
73     size_t out_size;
74
75     /* Align utf8 data to beginning of the buffer so that we can
76      * fill the buffer to its maximum */
77     memmove( pv->utf8_buf, pv->utf8_buf + pv->utf8_pos, pv->utf8_end - pv->utf8_pos );
78     pv->utf8_end -= pv->utf8_pos;
79     pv->utf8_pos = 0;
80     out_size = 2048 - pv->utf8_end;
81     while( out_size )
82     {
83         char *p, *q;
84         size_t in_size, retval;
85
86         if( pv->end == pv->pos )
87         {
88             bytes = fread( pv->buf, 1, 1024, pv->file );
89             pv->pos = 0;
90             pv->end = bytes;
91             if( bytes == 0 )
92             {
93                 if( conversion )
94                     return 1;
95                 else
96                     return 0;
97             }
98         }
99
100         p = pv->buf + pv->pos;
101         q = pv->utf8_buf + pv->utf8_end;
102         in_size = pv->end - pv->pos;
103
104         retval = iconv( pv->iconv_context, &p, &in_size, &q, &out_size);
105         if( q != pv->utf8_buf + pv->utf8_pos )
106             conversion = 1;
107
108         pv->utf8_end = q - pv->utf8_buf;
109         pv->pos = p - pv->buf;
110
111         if( ( retval == -1 ) && ( errno == EINVAL ) )
112         {
113             /* Incomplete multibyte sequence, read more data */
114             memmove( pv->buf, p, pv->end - pv->pos );
115             pv->end -= pv->pos;
116             pv->pos = 0;
117             bytes = fread( pv->buf + pv->end, 1, 1024 - pv->end, pv->file );
118             if( bytes == 0 )
119             {
120                 if( !conversion )
121                     return 0;
122                 else
123                     return 1;
124             }
125             pv->end += bytes;
126         } else if ( ( retval == -1 ) && ( errno == EILSEQ ) )
127         {
128             hb_error( "Invalid byte for codeset in input, discard byte" );
129             /* Try the next byte of the input */
130             pv->pos++;
131         } else if ( ( retval == -1 ) && ( errno == E2BIG ) )
132         {
133             /* buffer full */
134             return conversion;
135         }
136     }
137     return 1;
138 }
139
140 static int get_line( hb_work_private_t * pv, char *buf, int size )
141 {
142     int i;
143     char c;
144
145     /* Find newline in converted UTF-8 buffer */
146     for( i = 0; i < size - 1; i++ )
147     {
148         if( pv->utf8_pos >= pv->utf8_end )
149         {
150             if( !utf8_fill( pv ) )
151             {
152                 if( i )
153                     return 1;
154                 else
155                     return 0;
156             }
157         }
158         c = pv->utf8_buf[pv->utf8_pos++];
159         if( c == '\n' )
160         {
161             buf[i] = '\n';
162             buf[i+1] = '\0';
163             return 1;
164         }
165         buf[i] = c;
166     }
167     buf[0] = '\0';
168     return 1;
169 }
170
171 /*
172  * Read the SRT file and put the entries into the subtitle fifo for all to read
173  */
174 static hb_buffer_t *srt_read( hb_work_private_t *pv )
175 {
176     char line_buffer[1024];
177
178     if( !pv->file )
179     {
180         return NULL;
181     }
182     
183     while( get_line( pv, line_buffer, sizeof( line_buffer ) ) ) 
184     {
185         switch (pv->current_state)
186         {
187         case k_state_timecode:
188         {
189             struct start_and_end timing = read_time_from_string( line_buffer );
190             pv->current_entry.duration = timing.end - timing.start;
191             pv->current_entry.offset = timing.start - pv->current_time;
192             
193             pv->current_time = timing.end;
194
195             pv->current_entry.start = timing.start;
196             pv->current_entry.stop = timing.end;
197
198             pv->current_state = k_state_inEntry;
199             continue;                           
200         }
201         
202         case k_state_inEntry:
203         {
204             char *q;
205             int  size, len;
206
207             // If the current line is empty, we assume this is the
208             //  seperation betwene two entries. In case we are wrong,
209             //  the mistake is corrected in the next state.
210             if (strcmp(line_buffer, "\n") == 0 || strcmp(line_buffer, "\r\n") == 0) {
211                 pv->current_state = k_state_potential_new_entry;
212                 continue;
213             }
214             
215             q = pv->current_entry.text + pv->current_entry.pos;
216             len = strlen( line_buffer );
217             size = MIN(1024 - pv->current_entry.pos - 1, len );
218             memcpy(q, line_buffer, size);
219             pv->current_entry.pos += size;
220             pv->current_entry.text[pv->current_entry.pos] = '\0';
221             break;                              
222         }
223         
224         case k_state_potential_new_entry:
225         {
226             const char endpoint[] = "\0";
227             const unsigned long potential_entry_number = strtol(line_buffer, (char**)&endpoint, 10);
228             hb_buffer_t *buffer = NULL;
229             /*
230              * Is this really new next entry begin?
231              */
232             if (potential_entry_number == pv->number_of_entries + 1) 
233             {
234                 /*
235                  * We found the next entry - or a really rare error condition
236                  */
237                 if( *pv->current_entry.text )
238                 {
239                     long length;
240                     char *p, *q;
241                     int  line = 1;
242                     uint64_t start_time = ( pv->current_entry.start + 
243                                             pv->subtitle->config.offset ) * 90;
244                     uint64_t stop_time = ( pv->current_entry.stop + 
245                                            pv->subtitle->config.offset ) * 90;
246
247                     if( !( start_time > pv->start_time && stop_time < pv->stop_time ) )
248                     {
249                         hb_deep_log( 3, "Discarding SRT at time start %"PRId64", stop %"PRId64, start_time, stop_time);
250                         memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
251                         ++(pv->number_of_entries);
252                         pv->current_state = k_state_timecode;
253                         continue;
254                     }
255
256                     length = strlen( pv->current_entry.text );
257
258                     for( q = p = pv->current_entry.text; *p; p++)
259                     {
260                         if( *p == '\n' )
261                         {
262                             if ( line == 1 )
263                             {
264                                 *q = *p;
265                                 line = 2;
266                             }
267                             else
268                             {
269                                 *q = ' ';
270                             }
271                             q++;
272                         }
273                         else if( *p != '\r' )
274                         {
275                             *q = *p;
276                             q++;
277                         }
278                         else
279                         {
280                             length--;
281                         }
282                     }
283                     *q = '\0';
284
285                     buffer = hb_buffer_init( length + 1 );
286
287                     if( buffer )
288                     {
289                         buffer->start = start_time - pv->start_time;
290                         buffer->stop = stop_time - pv->start_time;
291
292                         memcpy( buffer->data, pv->current_entry.text, length + 1 );
293                     }
294                 }
295                 memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
296                 ++(pv->number_of_entries);
297                 pv->current_state = k_state_timecode;
298                 if( buffer )
299                 {
300                     return buffer;
301                 }
302                 continue;
303             } 
304             else 
305             {
306                 /*
307                  * Well.. looks like we are in the wrong mode.. lets add the
308                  * newline we misinterpreted...
309                  */
310                 strncat(pv->current_entry.text, " ", 1024);
311                 pv->current_state = k_state_inEntry;
312             }
313             
314             break;
315         }
316         }
317     }
318
319     hb_buffer_t *buffer = NULL;
320     if( *pv->current_entry.text )
321     {
322         long length;
323         char *p, *q;
324         int  line = 1;
325         uint64_t start_time = ( pv->current_entry.start + 
326                                 pv->subtitle->config.offset ) * 90;
327         uint64_t stop_time = ( pv->current_entry.stop + 
328                                pv->subtitle->config.offset ) * 90;
329
330         if( !( start_time > pv->start_time && stop_time < pv->stop_time ) )
331         {
332             hb_deep_log( 3, "Discarding SRT at time start %"PRId64", stop %"PRId64, start_time, stop_time);
333             memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
334             return NULL;
335         }
336
337         length = strlen( pv->current_entry.text );
338
339         for( q = p = pv->current_entry.text; *p; p++)
340         {
341             if( *p == '\n' )
342             {
343                 if ( line == 1 )
344                 {
345                     *q = *p;
346                     line = 2;
347                 }
348                 else
349                 {
350                     *q = ' ';
351                 }
352                 q++;
353             }
354             else if( *p != '\r' )
355             {
356                 *q = *p;
357                 q++;
358             }
359             else
360             {
361                 length--;
362             }
363         }
364         *q = '\0';
365
366         buffer = hb_buffer_init( length + 1 );
367
368         if( buffer )
369         {
370             buffer->start = start_time - pv->start_time;
371             buffer->stop = stop_time - pv->start_time;
372
373             memcpy( buffer->data, pv->current_entry.text, length + 1 );
374         }
375     }
376     memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
377     if( buffer )
378     {
379         return buffer;
380     }
381     
382     return NULL;
383 }
384
385 static int decsrtInit( hb_work_object_t * w, hb_job_t * job )
386 {
387     int retval = 1;
388     hb_work_private_t * pv;
389     hb_buffer_t *buffer;
390     int i;
391     hb_chapter_t * chapter;
392     hb_title_t *title = job->title;
393
394     pv = calloc( 1, sizeof( hb_work_private_t ) );
395     if( pv )
396     {
397         w->private_data = pv;
398
399         pv->job = job;
400
401         buffer = hb_buffer_init( 0 );
402         hb_fifo_push( w->fifo_in, buffer);
403         
404         pv->file = fopen( w->subtitle->config.src_filename, "r" );
405         
406         pv->current_state = k_state_potential_new_entry;
407         pv->number_of_entries = 0;
408         pv->current_time = 0;
409         pv->subtitle = w->subtitle;
410
411         /*
412          * Figure out the start and stop times from teh chapters being
413          * encoded - drop subtitle not in this range.
414          */
415         pv->start_time = 0;
416         for( i = 1; i < job->chapter_start; ++i )
417         {
418             chapter = hb_list_item( title->list_chapter, i - 1 );
419             if( chapter )
420             {
421                 pv->start_time += chapter->duration;
422             } else {
423                 hb_error( "Could not locate chapter %d for SRT start time", i );
424                 retval = 0;
425             }
426         }
427         pv->stop_time = pv->start_time;
428         for( i = job->chapter_start; i <= job->chapter_end; ++i )
429         {
430             chapter = hb_list_item( title->list_chapter, i - 1 );
431             if( chapter )
432             {
433                 pv->stop_time += chapter->duration;
434             } else {
435                 hb_error( "Could not locate chapter %d for SRT start time", i );
436                 retval = 0;
437             }
438         }
439
440         hb_deep_log( 3, "SRT Start time %"PRId64", stop time %"PRId64, pv->start_time, pv->stop_time);
441
442         pv->iconv_context = iconv_open( "utf-8", pv->subtitle->config.src_codeset );
443
444
445         if( pv->iconv_context == (iconv_t) -1 )
446         {
447             hb_error("Could not open the iconv library with those file formats\n");
448
449         } else {
450             memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
451             
452             pv->file = fopen( w->subtitle->config.src_filename, "r" );
453             
454             if( !pv->file )
455             {
456                 hb_error("Could not open the SRT subtitle file '%s'\n", 
457                          w->subtitle->config.src_filename);
458             } else {
459                 retval = 0;
460             }
461         }
462     } 
463
464     return retval;
465 }
466
467 static int decsrtWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
468                        hb_buffer_t ** buf_out )
469 {
470     hb_work_private_t * pv = w->private_data;
471     hb_buffer_t * in = *buf_in;
472     hb_buffer_t * out = NULL;
473
474     out = srt_read( pv );
475
476     if( out )
477     {
478         /*
479          * Keep a buffer in our input fifo so that we get run.
480          */
481         hb_fifo_push( w->fifo_in, in);
482         *buf_in = NULL;
483         *buf_out = out;
484     } else {
485         *buf_out = NULL;
486         return HB_WORK_OK;
487     }
488
489     return HB_WORK_OK;  
490 }
491
492 static void decsrtClose( hb_work_object_t * w )
493 {
494     hb_work_private_t * pv = w->private_data;
495     fclose( pv->file );
496     iconv_close(pv->iconv_context);
497     free( w->private_data );
498 }
499
500 hb_work_object_t hb_decsrtsub =
501 {
502     WORK_DECSRTSUB,
503     "SRT Subtitle Decoder",
504     decsrtInit,
505     decsrtWork,
506     decsrtClose
507 };