// mythsub.c: Extract closed caption data from MythTV NuppleVideo // files and save to JacoSub format. // Copyright (C) 2004 Aaron L. Frerichs // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // The starting point for this program was mythframes.c, taken from // Michael J. Pedersen's distribution // of mythmkmovie-1.1.3 // // This program only works with US Closed Caption data, and as far as I // know, only with non-live broadcasts, as I have not tested using // the scrolling data that appears in live broadcasts #include #include #include // The following four structs are taken from libs/libmythtv/format.h typedef struct rtfileheader { char finfo[12]; char version[5]; int width; int height; int desiredwidth; int desiredheight; char pimode; double aspect; double fps; int videoblocks; int audioblocks; int textsblocks; int keyframedist; } rtfileheader; typedef struct rtframeheader { char frametype; char comptype; char keyframe; char filters; int timecode; int packetlength; } rtframeheader; typedef struct seektable_entry { long long file_offset; int keyframe_number; } seektable_entry; typedef struct ccsubtitle { unsigned char row; unsigned char rowcount; unsigned char resumedirect; unsigned char resumetext; unsigned char clr; unsigned char len; } ccsubtitle; #define MAXTXTBUFFER 256 #define OUTLINES 25 /* Some debugging functions */ int printccsubtitle(ccsubtitle *cc) { printf("row: %d\t rc: %d\t rd: %d\t rt: %d\t clr: %d\t len: %d\t\n", cc->row, cc->rowcount, cc->resumedirect, cc->resumetext, cc->clr, cc->len ); return 0; } int printframeheader(rtframeheader *current) { printf("\nrtframeheader\n"); printf("\tframetype = %c\n", current->frametype); printf("\tcomptype = %c\n", current->comptype); printf("\tkeyframe = %d\n", current->keyframe); printf("\ttimecode = %d\n", current->timecode); printf("\tpacketlength = %d\n", current->packetlength); return 0; } int printrtfileheader(rtfileheader *info) { printf("rtfileheader\n"); printf("\tfinfo = %s\n", info->finfo); printf("\tversion = %s\n", info->version); printf("\twidth = %d\n", info->width); printf("\theight = %d\n", info->height); printf("\tdesiredwidth = %d\n", info->desiredwidth); printf("\tdesiredheight = %d\n", info->desiredheight); printf("\tpimode = %c\n", info->pimode); printf("\taspect = %f\n", info->aspect); printf("\tfps = %f\n", info->fps); printf("\tvideoblocks = %d\n", info->videoblocks); printf("\taudioblocks = %d\n", info->audioblocks); printf("\ttextsblocks = %d\n", info->textsblocks); printf("\tkeyframedist = %d\n", info->keyframedist); } /* Return true if txt is all spaces, 0 otherwise */ int isallwhitespace( char *txt ) { int i=strlen(txt); int retval=-1; while(i--) if( txt[i] != ' ' && txt[i] != '\t' && txt[i] != '\n' ) { retval=0; break; } return retval; } int main (int argc, char **argv) { rtfileheader info; rtframeheader current; seektable_entry lastent; int extras = 0; FILE *nuv; FILE *out; FILE *debug; if (argc != 3) { printf ("Usage: %s \n", argv[0]); exit(1); } nuv = fopen(argv[1], "r"); if (nuv == (FILE*)NULL) { printf ("ERROR! Unable to read file!\n"); exit(1); } fread(&info, sizeof(info), 1, nuv); if (info.keyframedist < 1) { printf("ERROR! Corrupted file! Bad Header!\n"); exit(1); } if (strncmp(info.finfo, "MythTVVideo",11) != 0) { printf("ERROR! Corrupted file! Wrong Format!\n"); exit(1); } if (strncmp(info.version, "0.07",4) != 0) { printf("ERROR! Corrupted file! Wrong Version!\n"); exit(1); } /* Do not overwrite an existing file. */ out = fopen(argv[2], "r"); if (out != (FILE*)NULL) { printf("ERROR! %s exists! Will not overwrite!\n", argv[2]); exit(1); } out = fopen(argv[2], "w"); if (out == (FILE*)NULL) { printf ("ERROR! Unable to open %s for writing.\n", argv[2]); exit(1); } fprintf( out, "#TIMERES 1000\n"); ccsubtitle cc; memset( &cc, 0, sizeof(cc) ); char txtbuffer[MAXTXTBUFFER]; memset( txtbuffer, 0, MAXTXTBUFFER ); char outbuffer[OUTLINES][MAXTXTBUFFER]; memset( outbuffer, 0, OUTLINES * MAXTXTBUFFER); int timestart[OUTLINES]; int timeend[OUTLINES]; memset( timestart, 0, OUTLINES * sizeof(int)); memset( timeend, 0, OUTLINES * sizeof(int)); int timecodeprev = 0; int timecodeoffset = 0; int i; /*debug = fopen( "test.debug", "w" );*/ do { fread(¤t, sizeof(rtframeheader), 1, nuv); /* checking the timecode to detect cutpoints * * Assuming that cutpoints are >= 1 second */ /*fprintf( debug, "%c,%d\n", current.frametype, current.timecode );*/ if ( current.frametype == 'A' || current.frametype == 'V' || current.frametype == 'T' ) { /* Assuming that no timecode will truly be over 12 hours */ if ( current.timecode > (12*60*60*1000) ) current.timecode = 0; if ( current.timecode > (timecodeprev + 1000) ) { timecodeoffset = timecodeoffset + (current.timecode - timecodeprev); printf( "Cut-detected at time %dms, new offset %dms\n", timecodeprev, timecodeoffset ); } timecodeprev = current.timecode; } if ( current.frametype == 'T' ) { if ( current.packetlength < MAXTXTBUFFER ) { /* read the next packet */ fread( &cc, sizeof(ccsubtitle), 1, nuv); memset( txtbuffer, 0, MAXTXTBUFFER ); fread( txtbuffer, current.packetlength - sizeof(ccsubtitle), 1, nuv); /*fprintf( debug, "%d\t%d\t%d\t%s\n", current.timecode, cc.row, cc.clr, txtbuffer );*/ if( cc.clr == 0 && cc.row > 0 ) { for(i=0;i 0 && outbuffer[i][0] != 0 && current.packetlength > sizeof(ccsubtitle) ) { timeend[i] = current.timecode - timecodeoffset; if( !isallwhitespace(outbuffer[i]) ) fprintf( out, "@%d @%d VB%d %s\n", timestart[i], timeend[i], i, outbuffer[i]); timestart[i] = 0; timeend[i] = 0; memset( outbuffer[i], 0, MAXTXTBUFFER); } } } if( cc.clr == 1 ) { for(i=0;i 0 && outbuffer[i][0] != 0 ) { if( !isallwhitespace(outbuffer[i]) ) fprintf( out, "@%d @%d VB%d %s\n", timestart[i], timeend[i], i, outbuffer[i]); timestart[i] = 0; timeend[i] = 0; memset( outbuffer[i], 0, MAXTXTBUFFER); } } } if( current.packetlength > sizeof(ccsubtitle) ) { /* search for newline in the txtbuffer */ char *pch; pch = strtok( txtbuffer, "\n" ); i = cc.row; while( pch != NULL ) { strcpy(outbuffer[i], pch); timestart[i] = current.timecode - timecodeoffset; i++; pch = strtok(NULL,"\n"); } } } else { printf( "Packet size error: %d > %d\n", current.packetlength, MAXTXTBUFFER ); /* Just skip this packet and try to continue */ fseek(nuv, current.packetlength, SEEK_CUR); } } if ( (current.frametype != 'R') && (current.frametype != 'T') ) fseek(nuv, current.packetlength, SEEK_CUR); } while ( (current.frametype != 'Q') && (!feof(nuv)) ); /* catch any uncleared lines so that they output to the screen */ for(i=0;i 0 && outbuffer[i][0] != 0 ) { if ( !isallwhitespace( outbuffer[i] ) ) fprintf( out, "@%d @%d VB%d %s\n", timestart[i], timeend[i], i, outbuffer[i]); timestart[i] = 0; timeend[i] = 0; memset( outbuffer[i], 0, MAXTXTBUFFER); } } /*fclose(debug);*/ fclose(out); return(0); }