- /* The first video packet defines 'time zero' so discard
- data until we get a video packet with a PTS & DTS */
- if ( buf->id == r->title->video_id && buf->start != -1 &&
- buf->renderOffset != -1 )
+ // The first data packet with a PTS from an audio or video stream
+ // that we're decoding defines 'time zero'. Discard packets until
+ // we get one.
+ if ( buf->start != -1 && buf->renderOffset != -1 &&
+ ( buf->id == r->title->video_id || is_audio( r, buf->id ) ) )