enum
{
k_state_inEntry,
+ k_state_inEntry_or_new,
k_state_potential_new_entry,
k_state_timecode,
};
long offset, duration;
long start, stop;
char text[1024];
+ int pos;
} srt_entry_t;
/*
*/
struct hb_work_private_s
{
- hb_job_t *job;
- FILE *file;
+ hb_job_t * job;
+ FILE * file;
+ char buf[1024];
+ int pos;
+ int end;
+ char utf8_buf[2048];
+ int utf8_pos;
+ int utf8_end;
unsigned long current_time;
unsigned long number_of_entries;
+ unsigned long last_entry_number;
unsigned long current_state;
srt_entry_t current_entry;
iconv_t *iconv_context;
uint64_t stop_time; // In HB time
};
-static struct start_and_end read_time_from_string( const char* timeString )
+static int
+read_time_from_string( const char* timeString, struct start_and_end *result )
{
// for ex. 00:00:15,248 --> 00:00:16,545
long houres1, minutes1, seconds1, milliseconds1,
- houres2, minutes2, seconds2, milliseconds2;
+ houres2, minutes2, seconds2, milliseconds2;
+ int scanned;
- sscanf(timeString, "%ld:%ld:%ld,%ld --> %ld:%ld:%ld,%ld\n", &houres1, &minutes1, &seconds1, &milliseconds1,
- &houres2, &minutes2, &seconds2, &milliseconds2);
-
- struct start_and_end result = {
- milliseconds1 + seconds1*1000 + minutes1*60*1000 + houres1*60*60*1000,
- milliseconds2 + seconds2*1000 + minutes2*60*1000 + houres2*60*60*1000};
- return result;
+ scanned = sscanf(timeString, "%ld:%ld:%ld,%ld --> %ld:%ld:%ld,%ld\n",
+ &houres1, &minutes1, &seconds1, &milliseconds1,
+ &houres2, &minutes2, &seconds2, &milliseconds2);
+ if (scanned != 8)
+ {
+ return 0;
+ }
+ result->start =
+ milliseconds1 + seconds1*1000 + minutes1*60*1000 + houres1*60*60*1000;
+ result->end =
+ milliseconds2 + seconds2*1000 + minutes2*60*1000 + houres2*60*60*1000;
+ return 1;
+}
+
+static int utf8_fill( hb_work_private_t * pv )
+{
+ int bytes, conversion = 0;
+ size_t out_size;
+
+ /* Align utf8 data to beginning of the buffer so that we can
+ * fill the buffer to its maximum */
+ memmove( pv->utf8_buf, pv->utf8_buf + pv->utf8_pos, pv->utf8_end - pv->utf8_pos );
+ pv->utf8_end -= pv->utf8_pos;
+ pv->utf8_pos = 0;
+ out_size = 2048 - pv->utf8_end;
+ while( out_size )
+ {
+ char *p, *q;
+ size_t in_size, retval;
+
+ if( pv->end == pv->pos )
+ {
+ bytes = fread( pv->buf, 1, 1024, pv->file );
+ pv->pos = 0;
+ pv->end = bytes;
+ if( bytes == 0 )
+ {
+ if( conversion )
+ return 1;
+ else
+ return 0;
+ }
+ }
+
+ p = pv->buf + pv->pos;
+ q = pv->utf8_buf + pv->utf8_end;
+ in_size = pv->end - pv->pos;
+
+ retval = iconv( pv->iconv_context, &p, &in_size, &q, &out_size);
+ if( q != pv->utf8_buf + pv->utf8_pos )
+ conversion = 1;
+
+ pv->utf8_end = q - pv->utf8_buf;
+ pv->pos = p - pv->buf;
+
+ if( ( retval == -1 ) && ( errno == EINVAL ) )
+ {
+ /* Incomplete multibyte sequence, read more data */
+ memmove( pv->buf, p, pv->end - pv->pos );
+ pv->end -= pv->pos;
+ pv->pos = 0;
+ bytes = fread( pv->buf + pv->end, 1, 1024 - pv->end, pv->file );
+ if( bytes == 0 )
+ {
+ if( !conversion )
+ return 0;
+ else
+ return 1;
+ }
+ pv->end += bytes;
+ } else if ( ( retval == -1 ) && ( errno == EILSEQ ) )
+ {
+ hb_error( "Invalid byte for codeset in input, discard byte" );
+ /* Try the next byte of the input */
+ pv->pos++;
+ } else if ( ( retval == -1 ) && ( errno == E2BIG ) )
+ {
+ /* buffer full */
+ return conversion;
+ }
+ }
+ return 1;
+}
+
+static int get_line( hb_work_private_t * pv, char *buf, int size )
+{
+ int i;
+ char c;
+
+ /* Find newline in converted UTF-8 buffer */
+ for( i = 0; i < size - 1; i++ )
+ {
+ if( pv->utf8_pos >= pv->utf8_end )
+ {
+ if( !utf8_fill( pv ) )
+ {
+ if( i )
+ return 1;
+ else
+ return 0;
+ }
+ }
+ c = pv->utf8_buf[pv->utf8_pos++];
+ if( c == '\n' )
+ {
+ buf[i] = '\n';
+ buf[i+1] = '\0';
+ return 1;
+ }
+ buf[i] = c;
+ }
+ buf[0] = '\0';
+ return 1;
}
/*
*/
static hb_buffer_t *srt_read( hb_work_private_t *pv )
{
-
char line_buffer[1024];
+ int reprocess = 0, resync = 0;
if( !pv->file )
{
return NULL;
}
- while( fgets( line_buffer, sizeof( line_buffer ), pv->file ) )
+ while( reprocess || get_line( pv, line_buffer, sizeof( line_buffer ) ) )
{
+ reprocess = 0;
switch (pv->current_state)
{
case k_state_timecode:
{
- struct start_and_end timing = read_time_from_string( line_buffer );
+ struct start_and_end timing;
+ int result;
+
+ result = read_time_from_string( line_buffer, &timing );
+ if (!result)
+ {
+ resync = 1;
+ pv->current_state = k_state_potential_new_entry;
+ continue;
+ }
pv->current_entry.duration = timing.end - timing.start;
pv->current_entry.offset = timing.start - pv->current_time;
pv->current_entry.stop = timing.end;
pv->current_state = k_state_inEntry;
- continue;
+ continue;
+ }
+
+ case k_state_inEntry_or_new:
+ {
+ char *endpoint;
+ long entry_number;
+ /*
+ * Is this really new next entry begin?
+ */
+ entry_number = strtol(line_buffer, &endpoint, 10);
+ if (endpoint == line_buffer ||
+ (endpoint && *endpoint != '\n' && *endpoint != '\r'))
+ {
+ /*
+ * Doesn't resemble an entry number
+ * must still be in an entry
+ */
+ if (!resync)
+ {
+ reprocess = 1;
+ pv->current_state = k_state_inEntry;
+ }
+ continue;
+ }
+ reprocess = 1;
+ pv->current_state = k_state_potential_new_entry;
+ break;
}
-
+
case k_state_inEntry:
{
- char *p, *q;
- size_t in_size;
- size_t out_size;
- size_t retval;
+ char *q;
+ int size, len;
// If the current line is empty, we assume this is the
// seperation betwene two entries. In case we are wrong,
continue;
}
-
- for( q = pv->current_entry.text; (q < pv->current_entry.text+1024) && *q; q++);
-
- p = line_buffer;
-
- in_size = strlen(line_buffer);
- out_size = (pv->current_entry.text+1024) - q;
-
- retval = iconv( pv->iconv_context, &p, &in_size, &q, &out_size);
- *q = '\0';
-
- if( ( retval == -1 ) && ( errno == EINVAL ) )
- {
- hb_error( "Invalid shift sequence" );
- } else if ( ( retval == -1 ) && ( errno == EILSEQ ) )
- {
- hb_error( "Invalid byte for codeset in input, %"PRId64" bytes discarded", (int64_t)in_size);
- } else if ( ( retval == -1 ) && ( errno == E2BIG ) )
- {
- hb_error( "Not enough space in output buffer");
- }
-
- break;
+ q = pv->current_entry.text + pv->current_entry.pos;
+ len = strlen( line_buffer );
+ size = MIN(1024 - pv->current_entry.pos - 1, len );
+ memcpy(q, line_buffer, size);
+ pv->current_entry.pos += size;
+ pv->current_entry.text[pv->current_entry.pos] = '\0';
+ break;
}
-
+
case k_state_potential_new_entry:
{
- const char endpoint[] = "\0";
- const unsigned long potential_entry_number = strtol(line_buffer, (char**)&endpoint, 10);
+ char *endpoint;
+ long entry_number;
hb_buffer_t *buffer = NULL;
/*
* Is this really new next entry begin?
*/
- if (potential_entry_number == pv->number_of_entries + 1) {
+ entry_number = strtol(line_buffer, &endpoint, 10);
+ if (!resync && (*line_buffer == '\n' || *line_buffer == '\r'))
+ {
/*
- * We found the next entry - or a really rare error condition
+ * Well.. looks like we are in the wrong mode.. lets add the
+ * newline we misinterpreted...
*/
- if( *pv->current_entry.text )
+ strncat(pv->current_entry.text, " ", 1024);
+ pv->current_state = k_state_inEntry_or_new;
+ continue;
+ }
+ if (endpoint == line_buffer ||
+ (endpoint && *endpoint != '\n' && *endpoint != '\r'))
+ {
+ /*
+ * Well.. looks like we are in the wrong mode.. lets add the
+ * line we misinterpreted...
+ */
+ if (!resync)
{
- long length;
- char *p;
- uint64_t start_time = ( pv->current_entry.start +
- pv->subtitle->config.offset ) * 90;
- uint64_t stop_time = ( pv->current_entry.stop +
- pv->subtitle->config.offset ) * 90;
-
- if( !( start_time > pv->start_time && stop_time < pv->stop_time ) )
- {
- hb_deep_log( 3, "Discarding SRT at time start %lld, stop %lld", start_time, stop_time);
- memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
- ++(pv->number_of_entries);
- pv->current_state = k_state_timecode;
- continue;
- }
+ reprocess = 1;
+ pv->current_state = k_state_inEntry;
+ }
+ continue;
+ }
+ /*
+ * We found the next entry - or a really rare error condition
+ */
+ pv->last_entry_number = entry_number;
+ resync = 0;
+ if( *pv->current_entry.text )
+ {
+ long length;
+ char *p, *q;
+ int line = 1;
+ uint64_t start_time = ( pv->current_entry.start +
+ pv->subtitle->config.offset ) * 90;
+ uint64_t stop_time = ( pv->current_entry.stop +
+ pv->subtitle->config.offset ) * 90;
+
+ if( !( start_time > pv->start_time && stop_time < pv->stop_time ) )
+ {
+ hb_deep_log( 3, "Discarding SRT at time start %"PRId64", stop %"PRId64, start_time, stop_time);
+ memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
+ ++(pv->number_of_entries);
+ pv->current_state = k_state_timecode;
+ continue;
+ }
- length = strlen( pv->current_entry.text );
+ length = strlen( pv->current_entry.text );
- for( p = pv->current_entry.text; *p; p++)
+ for( q = p = pv->current_entry.text; *p; p++)
+ {
+ if( *p == '\n' )
{
- if( *p == '\n' || *p == '\r' )
+ if ( line == 1 )
+ {
+ *q = *p;
+ line = 2;
+ }
+ else
{
- *p = ' ';
+ *q = ' ';
}
+ q++;
}
-
- buffer = hb_buffer_init( length + 1 );
-
- if( buffer )
+ else if( *p != '\r' )
{
- buffer->start = start_time - pv->start_time;
- buffer->stop = stop_time - pv->start_time;
-
- memcpy( buffer->data, pv->current_entry.text, length + 1 );
+ *q = *p;
+ q++;
+ }
+ else
+ {
+ length--;
}
}
- memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
- ++(pv->number_of_entries);
- pv->current_state = k_state_timecode;
+ *q = '\0';
+
+ buffer = hb_buffer_init( length + 1 );
+
if( buffer )
{
- return buffer;
+ buffer->start = start_time - pv->start_time;
+ buffer->stop = stop_time - pv->start_time;
+
+ memcpy( buffer->data, pv->current_entry.text, length + 1 );
}
- continue;
- } else {
- /*
- * Well.. looks like we are in the wrong mode.. lets add the
- * newline we misinterpreted...
- */
- strncat(pv->current_entry.text, " ", 1024);
- pv->current_state = k_state_inEntry;
}
-
- break;
+ memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
+ ++(pv->number_of_entries);
+ pv->current_state = k_state_timecode;
+ if( buffer )
+ {
+ return buffer;
+ }
+ continue;
+ }
+ }
+ }
+
+ hb_buffer_t *buffer = NULL;
+ if( *pv->current_entry.text )
+ {
+ long length;
+ char *p, *q;
+ int line = 1;
+ uint64_t start_time = ( pv->current_entry.start +
+ pv->subtitle->config.offset ) * 90;
+ uint64_t stop_time = ( pv->current_entry.stop +
+ pv->subtitle->config.offset ) * 90;
+
+ if( !( start_time > pv->start_time && stop_time < pv->stop_time ) )
+ {
+ hb_deep_log( 3, "Discarding SRT at time start %"PRId64", stop %"PRId64, start_time, stop_time);
+ memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
+ return NULL;
+ }
+
+ length = strlen( pv->current_entry.text );
+
+ for( q = p = pv->current_entry.text; *p; p++)
+ {
+ if( *p == '\n' )
+ {
+ if ( line == 1 )
+ {
+ *q = *p;
+ line = 2;
+ }
+ else
+ {
+ *q = ' ';
+ }
+ q++;
+ }
+ else if( *p != '\r' )
+ {
+ *q = *p;
+ q++;
+ }
+ else
+ {
+ length--;
+ }
}
+ *q = '\0';
+
+ buffer = hb_buffer_init( length + 1 );
+
+ if( buffer )
+ {
+ buffer->start = start_time - pv->start_time;
+ buffer->stop = stop_time - pv->start_time;
+
+ memcpy( buffer->data, pv->current_entry.text, length + 1 );
}
}
+ memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
+ if( buffer )
+ {
+ return buffer;
+ }
return NULL;
}
pv->current_state = k_state_potential_new_entry;
pv->number_of_entries = 0;
+ pv->last_entry_number = 0;
pv->current_time = 0;
pv->subtitle = w->subtitle;
retval = 0;
}
}
- chapter = hb_list_item( title->list_chapter, i - 1 );
-
- if( chapter )
+ pv->stop_time = pv->start_time;
+ for( i = job->chapter_start; i <= job->chapter_end; ++i )
{
- pv->stop_time = pv->start_time + chapter->duration;
- } else {
- hb_error( "Could not locate chapter %d for SRT stop time", i );
- retval = 0;
+ chapter = hb_list_item( title->list_chapter, i - 1 );
+ if( chapter )
+ {
+ pv->stop_time += chapter->duration;
+ } else {
+ hb_error( "Could not locate chapter %d for SRT start time", i );
+ retval = 0;
+ }
}
- hb_deep_log( 3, "SRT Start time %lld, stop time %lld", pv->start_time, pv->stop_time);
+ hb_deep_log( 3, "SRT Start time %"PRId64", stop time %"PRId64, pv->start_time, pv->stop_time);
- pv->iconv_context = iconv_open( "utf8", pv->subtitle->config.src_codeset );
+ pv->iconv_context = iconv_open( "utf-8", pv->subtitle->config.src_codeset );
if( pv->iconv_context == (iconv_t) -1 )
*buf_in = NULL;
*buf_out = out;
} else {
- printf("\nSRT Done\n");
*buf_out = NULL;
return HB_WORK_OK;
}