X-Git-Url: http://git.osdn.jp/view?a=blobdiff_plain;f=libhb%2Fdecsrtsub.c;h=15015aae21ddd7c001f58cb23df0dd0c4a3e316f;hb=033e32de9c380f54c7d1362a3979da205ebc3a29;hp=05fdb7646769baeed028ffb3b4f5189aeff88bd0;hpb=3548036ec5601f2a5033ca7e256ec4fe088482b8;p=handbrake-jp%2Fhandbrake-jp-git.git diff --git a/libhb/decsrtsub.c b/libhb/decsrtsub.c index 05fdb764..15015aae 100644 --- a/libhb/decsrtsub.c +++ b/libhb/decsrtsub.c @@ -25,6 +25,7 @@ typedef struct srt_entry_s { long offset, duration; long start, stop; char text[1024]; + int pos; } srt_entry_t; /* @@ -32,8 +33,14 @@ typedef struct srt_entry_s { */ struct hb_work_private_s { - hb_job_t *job; - FILE *file; + hb_job_t * job; + FILE * file; + char buf[1024]; + int pos; + int end; + char utf8_buf[2048]; + int utf8_pos; + int utf8_end; unsigned long current_time; unsigned long number_of_entries; unsigned long current_state; @@ -60,12 +67,112 @@ static struct start_and_end read_time_from_string( const char* timeString ) return result; } +static int utf8_fill( hb_work_private_t * pv ) +{ + int bytes, conversion = 0; + size_t out_size; + + /* Align utf8 data to beginning of the buffer so that we can + * fill the buffer to its maximum */ + memmove( pv->utf8_buf, pv->utf8_buf + pv->utf8_pos, pv->utf8_end - pv->utf8_pos ); + pv->utf8_end -= pv->utf8_pos; + pv->utf8_pos = 0; + out_size = 2048 - pv->utf8_end; + while( out_size ) + { + char *p, *q; + size_t in_size, retval; + + if( pv->end == pv->pos ) + { + bytes = fread( pv->buf, 1, 1024, pv->file ); + pv->pos = 0; + pv->end = bytes; + if( bytes == 0 ) + { + if( conversion ) + return 1; + else + return 0; + } + } + + p = pv->buf + pv->pos; + q = pv->utf8_buf + pv->utf8_end; + in_size = pv->end - pv->pos; + + retval = iconv( pv->iconv_context, &p, &in_size, &q, &out_size); + if( q != pv->utf8_buf + pv->utf8_pos ) + conversion = 1; + + pv->utf8_end = q - pv->utf8_buf; + pv->pos = p - pv->buf; + + if( ( retval == -1 ) && ( errno == EINVAL ) ) + { + /* Incomplete multibyte sequence, read more data */ + memmove( pv->buf, p, pv->end - pv->pos ); + pv->end -= pv->pos; + pv->pos = 0; + bytes = fread( pv->buf + pv->end, 1, 1024 - pv->end, pv->file ); + if( bytes == 0 ) + { + if( !conversion ) + return 0; + else + return 1; + } + pv->end += bytes; + } else if ( ( retval == -1 ) && ( errno == EILSEQ ) ) + { + hb_error( "Invalid byte for codeset in input, discard byte" ); + /* Try the next byte of the input */ + pv->pos++; + } else if ( ( retval == -1 ) && ( errno == E2BIG ) ) + { + /* buffer full */ + return conversion; + } + } + return 1; +} + +static int get_line( hb_work_private_t * pv, char *buf, int size ) +{ + int i; + char c; + + /* Find newline in converted UTF-8 buffer */ + for( i = 0; i < size - 1; i++ ) + { + if( pv->utf8_pos >= pv->utf8_end ) + { + if( !utf8_fill( pv ) ) + { + if( i ) + return 1; + else + return 0; + } + } + c = pv->utf8_buf[pv->utf8_pos++]; + if( c == '\n' ) + { + buf[i] = '\n'; + buf[i+1] = '\0'; + return 1; + } + buf[i] = c; + } + buf[0] = '\0'; + return 1; +} + /* * Read the SRT file and put the entries into the subtitle fifo for all to read */ static hb_buffer_t *srt_read( hb_work_private_t *pv ) { - char line_buffer[1024]; if( !pv->file ) @@ -73,7 +180,7 @@ static hb_buffer_t *srt_read( hb_work_private_t *pv ) return NULL; } - while( fgets( line_buffer, sizeof( line_buffer ), pv->file ) ) + while( get_line( pv, line_buffer, sizeof( line_buffer ) ) ) { switch (pv->current_state) { @@ -94,10 +201,8 @@ static hb_buffer_t *srt_read( hb_work_private_t *pv ) case k_state_inEntry: { - char *p, *q; - size_t in_size; - size_t out_size; - size_t retval; + char *q; + int size, len; // If the current line is empty, we assume this is the // seperation betwene two entries. In case we are wrong, @@ -107,28 +212,12 @@ static hb_buffer_t *srt_read( hb_work_private_t *pv ) continue; } - - for( q = pv->current_entry.text; (q < pv->current_entry.text+1024) && *q; q++); - - p = line_buffer; - - in_size = strlen(line_buffer); - out_size = (pv->current_entry.text+1024) - q; - - retval = iconv( pv->iconv_context, &p, &in_size, &q, &out_size); - *q = '\0'; - - if( ( retval == -1 ) && ( errno == EINVAL ) ) - { - hb_error( "Invalid shift sequence" ); - } else if ( ( retval == -1 ) && ( errno == EILSEQ ) ) - { - hb_error( "Invalid byte for codeset in input, %"PRId64" bytes discarded", (int64_t)in_size); - } else if ( ( retval == -1 ) && ( errno == E2BIG ) ) - { - hb_error( "Not enough space in output buffer"); - } - + q = pv->current_entry.text + pv->current_entry.pos; + len = strlen( line_buffer ); + size = MIN(1024 - pv->current_entry.pos - 1, len ); + memcpy(q, line_buffer, size); + pv->current_entry.pos += size; + pv->current_entry.text[pv->current_entry.pos] = '\0'; break; } @@ -140,14 +229,16 @@ static hb_buffer_t *srt_read( hb_work_private_t *pv ) /* * Is this really new next entry begin? */ - if (potential_entry_number == pv->number_of_entries + 1) { + if (potential_entry_number == pv->number_of_entries + 1) + { /* * We found the next entry - or a really rare error condition */ if( *pv->current_entry.text ) { long length; - char *p; + char *p, *q; + int line = 1; uint64_t start_time = ( pv->current_entry.start + pv->subtitle->config.offset ) * 90; uint64_t stop_time = ( pv->current_entry.stop + @@ -164,13 +255,32 @@ static hb_buffer_t *srt_read( hb_work_private_t *pv ) length = strlen( pv->current_entry.text ); - for( p = pv->current_entry.text; *p; p++) + for( q = p = pv->current_entry.text; *p; p++) { - if( *p == '\n' || *p == '\r' ) + if( *p == '\n' ) + { + if ( line == 1 ) + { + *q = *p; + line = 2; + } + else + { + *q = ' '; + } + q++; + } + else if( *p != '\r' ) { - *p = ' '; + *q = *p; + q++; + } + else + { + length--; } } + *q = '\0'; buffer = hb_buffer_init( length + 1 ); @@ -190,7 +300,9 @@ static hb_buffer_t *srt_read( hb_work_private_t *pv ) return buffer; } continue; - } else { + } + else + { /* * Well.. looks like we are in the wrong mode.. lets add the * newline we misinterpreted... @@ -203,6 +315,69 @@ static hb_buffer_t *srt_read( hb_work_private_t *pv ) } } } + + hb_buffer_t *buffer = NULL; + if( *pv->current_entry.text ) + { + long length; + char *p, *q; + int line = 1; + uint64_t start_time = ( pv->current_entry.start + + pv->subtitle->config.offset ) * 90; + uint64_t stop_time = ( pv->current_entry.stop + + pv->subtitle->config.offset ) * 90; + + if( !( start_time > pv->start_time && stop_time < pv->stop_time ) ) + { + hb_deep_log( 3, "Discarding SRT at time start %"PRId64", stop %"PRId64, start_time, stop_time); + memset( &pv->current_entry, 0, sizeof( srt_entry_t ) ); + return NULL; + } + + length = strlen( pv->current_entry.text ); + + for( q = p = pv->current_entry.text; *p; p++) + { + if( *p == '\n' ) + { + if ( line == 1 ) + { + *q = *p; + line = 2; + } + else + { + *q = ' '; + } + q++; + } + else if( *p != '\r' ) + { + *q = *p; + q++; + } + else + { + length--; + } + } + *q = '\0'; + + buffer = hb_buffer_init( length + 1 ); + + if( buffer ) + { + buffer->start = start_time - pv->start_time; + buffer->stop = stop_time - pv->start_time; + + memcpy( buffer->data, pv->current_entry.text, length + 1 ); + } + } + memset( &pv->current_entry, 0, sizeof( srt_entry_t ) ); + if( buffer ) + { + return buffer; + } return NULL; } @@ -249,19 +424,22 @@ static int decsrtInit( hb_work_object_t * w, hb_job_t * job ) retval = 0; } } - chapter = hb_list_item( title->list_chapter, i - 1 ); - - if( chapter ) + pv->stop_time = pv->start_time; + for( i = job->chapter_start; i <= job->chapter_end; ++i ) { - pv->stop_time = pv->start_time + chapter->duration; - } else { - hb_error( "Could not locate chapter %d for SRT stop time", i ); - retval = 0; + chapter = hb_list_item( title->list_chapter, i - 1 ); + if( chapter ) + { + pv->stop_time += chapter->duration; + } else { + hb_error( "Could not locate chapter %d for SRT start time", i ); + retval = 0; + } } hb_deep_log( 3, "SRT Start time %"PRId64", stop time %"PRId64, pv->start_time, pv->stop_time); - pv->iconv_context = iconv_open( "utf8", pv->subtitle->config.src_codeset ); + pv->iconv_context = iconv_open( "utf-8", pv->subtitle->config.src_codeset ); if( pv->iconv_context == (iconv_t) -1 )