schopfer@cui.unige.ch (SCHOPFER Olivier) (04/17/89)
Word2mif Version 1.4 Filter to convert word processor files from Microsoft Word 4.0 to Framemaker "MIF" (Maker Interchange Format) Calling sequence: un1sun1% word2mif input_file word2mif.data > output_file.mif word2mif.data is a data file containing the character translation information If there is a style sheet, it's supposed to be on the same directory than the input_file. Couper ici ______________________________ # This is a shell archive. Remove anything before this line, then # unpack it by saving it in a file and typing "sh file". (Files # unpacked will be owned by you and have default permissions.) # # This archive contains: # word2mif.c word2mif.data word4.h echo x - word2mif.c cat > "word2mif.c" << '//E*O*F word2mif.c//' /* @(#) word2mif.c 1.4 8/25/88 Release 1.4 Olivier Schopfer, University of Geneva, Switzerland schopfer@cui.uucp schopfer@cgeuge51.BITNET Filter to convert word processor files from Microsoft Word 4.0 to Framemaker "MIF" (Maker Interchange Format) Calling sequence: un1sun1% word2mif input_file word2mif.data > output_file.mif word2mif.data is a data file containing the character translation information If there is a style sheet, it's supposed to be on the same directory than the input_file. */ #include <ctype.h> #include <stdio.h> #include <sys/file.h> #include "word4.h" #define STRING_LENGTH 80 struct Header header, /* File header */ s_header; /* Style sheet header */ char *text_array; /* The text itself */ Uint text_array_size; /* # of text bytes */ struct Page *CharPage, *ParPage, *SecTablePage, *SecPage; /* Pointers to format info pages */ Uint CharPageNb, ParPageNb, FntbPageNb, /* Number of pages */ SecTablePageNb, SecPageNb; /* Style sheet data */ struct Page *s_CharPage, *s_ParPage, *s_SecTablePage, *s_SecPage; /* Pointers to format info pages */ Uint s_CharPageNb, s_ParPageNb, s_FntbPageNb, /* Number of pages */ s_SecTablePageNb, s_SecPageNb, /* End of style sheet data */ last_text_byte; /* Offset of last real text byte (Except footnotes) */ struct FOD_ptr /* Internal structure for FOD's pointers */ { Uint i_page, /* Page number of FOD */ i_FOD, /* FOD number inside current page */ fcLimit; /* Byte after last one concerned with this FOD */ char *PROP; /* Pointer to CHP or PAP */ char cch; /* Nb of defined bytes in CHP or PAP */ }; struct FOD_ptr ch_FOD, /* Current character FOD */ prev_ch_FOD, pa_FOD, /* Current paragraph FOD */ prev_pa_FOD; struct STYLE_KEY { Uchar KEY[2]; /* Two letters style code */ char *PROP; /* Pointer to pgf's font */ char cch; }; struct STYLE_KEY *style_key; /* Pointer to array of keys */ struct STYLE_PTR { char *PROP; /* Pointer to CHP */ char cch; /* Nb of defined bytes in CHP */ }; struct STYLE_PTR styles[30]; /* Pointers to styled CHP's */ struct FormatPROP *FPROP; struct PROPerty_of_CHaracter *Chp, /* Pointer to Char Property */ Current_CHP, /* Current char. settings */ s_Current_CHP, /* Current char. settings (styled) */ Default_CHP, /* Default values */ Temp_CHP; /* Temporary storage */ struct PAragraphProperty *PAP, /* Pointer to Paragraph Property */ Current_PAP, /* Current parag. settings */ Default_PAP, /* Default values */ Temp_PAP; /* Temporary storage */ struct TaBDescriptor *TBD; struct FootNoteTaBle *FNTB; struct FootNoteDescriptor *FND; struct SEctionProperty *SEP; struct SEctionTaBle *SETB; struct SEctionDescriptor *SED; char new_paragraph, /* Boolean, to tell if we are at the beginning of */ new_page, /* a Paragraph (i.e. the last byte was the end */ /* of one) */ processing_notes, /* Flag to tell if processing main text or notes */ styled_flag, /* Has a style sheet been read? */ styled_paragraph, /* Is current paragraph styled? */ current_string[81]; /* String for output */ Uint string_ptr; /* Index into array */ Uint Current_footnote = 1; /* Number of current footnote */ /* Conversion table for characters */ char *DataFileName; /* Data filename */ Uchar char_def[256]; /* Char translation table */ /* Miscellaneous conversion functions */ short Short(var) /* PC 16 bits to 68000 16 bits conversion */ PCshort var; { Ushort res; res = 0; res = (Ushort) ((var.byte[1]) << 8) | ((Ushort) var.byte[0]) ; return(res); } unsigned int Int(var) /* PC 32 bits to 68000 32 bits conversion */ PCint var; { unsigned int res; res = ((Uint)(Ushort)Short(var.word[1]) <<16 ) | ((Uint)(Ushort)Short(var.word[0])) ; return(res); } unsigned long Long(var) /* 64 bits conversion */ PClong var; { unsigned int res; res = ((Ulong)Int(var.lword[0]) << 32) | ((Ulong)Int(var.lword[1])); return(res); } /* ********************************************************************** Functions to handle formating properties ======================================== */ /* Set paragraph properties */ void Set_PAP(destPAP, srcPAP, defPAP, bytes_defined) struct PAragraphProperty *destPAP, /* Destination */ *srcPAP, /* Source PAP */ *defPAP; /* Default PAP, for undef. bytes */ Uchar bytes_defined; /* Number of defined bytes in srcPAP */ { char *dest_ptr, *src_ptr, *def_ptr; /* Pointer to corresponding char arrays */ int i; dest_ptr = (char *) destPAP; src_ptr = (char *) srcPAP; def_ptr = (char *) defPAP; /* Copy defined bytes from srcPAP, and others from defPAP */ for (i=0; i<sizeof(struct PAragraphProperty); i++) if (i < bytes_defined) dest_ptr[i] = src_ptr[i]; else dest_ptr[i] = def_ptr[i]; return; } /* Set character properties */ void Set_CHP(destCHP, srcCHP, defCHP, bytes_defined) struct PROPerty_of_CHaracter *destCHP, /* Destination */ *srcCHP, /* Source CHP */ *defCHP; /* Default CHP, for undef. bytes */ Uchar bytes_defined; /* Number of defined bytes in srcCHP */ { char *dest_ptr, *src_ptr, *def_ptr; /* Pointer to corresponding char arrays */ int i; dest_ptr = (char *) destCHP; src_ptr = (char *) srcCHP; def_ptr = (char *) defCHP; /* Copy defined bytes from srcCHP, and others from defCHP */ for (i=0; i<sizeof(struct PROPerty_of_CHaracter); i++) if (i < bytes_defined) dest_ptr[i] = src_ptr[i]; else dest_ptr[i] = def_ptr[i]; /* If character is styled, copy styled char data instead of standard data */ if (destCHP->word00.fStyled && styled_flag) Set_CHP(destCHP, styles[destCHP->word00.stc].PROP, defCHP, styles[destCHP->word00.stc].cch); /* Recursive call */ return; } /* ************************************************************* Function to set FOD pointer arguments to descr prev_descr gets the old values */ void set_FOD(page, descr, prev_descr) struct Page *page; /* Formatting information page (CharPage or ParPage) */ struct FOD_ptr *descr, /* pointer to current format descriptor to be set */ *prev_descr; /* old value of descr */ { struct FOrmatDescriptor *FOD; prev_descr->i_page = descr->i_page; prev_descr->i_FOD = descr->i_FOD; prev_descr->fcLimit = descr->fcLimit; prev_descr->PROP = descr->PROP; prev_descr->cch = descr->cch; FOD = (struct FOrmatDescriptor *) &page[descr->i_page].FODs[descr->i_FOD * sizeof(struct FOrmatDescriptor)]; descr->fcLimit = Int(FOD->fcLim); if (((Ushort)Short(FOD->bfprop)) != 0xFFFF) /* Property defined? */ { FPROP = (struct FormatPROP *) &page[descr->i_page].FODs[(Ushort)Short(FOD->bfprop)]; descr->cch = FPROP->cch; descr->PROP = (char *) &page[descr->i_page].FODs[((Ushort)Short(FOD->bfprop)+1)]; } else { descr->cch = 0; descr->PROP = (char *) NULL; } return; } /* Find FOD associated with offset, into specified formatting page, set descr to point the Format descriptor, prev_descr the last descriptor Returns 0 if format didn't change, 1 if it did */ int find_FOD(page, offset, descr, prev_descr, pageNb) struct Page *page; /* Pointer to formatting info page */ Uint offset; /* current offset into file */ struct FOD_ptr *descr, /* Descriptor of current FOD */ *prev_descr; /* Last descriptor */ Uint pageNb; /* Number of available pages */ { if (descr->fcLimit == 127) set_FOD(page, descr, prev_descr); /* Force reading of 1st FOD */ if (offset == descr->fcLimit) /* Right at beginning of new section */ { if (page[descr->i_page].cfod <= (descr->i_FOD + 1)) { if (descr->i_page+1 >= pageNb) /* Thers is no more FOD */ { descr->PROP=(char *) NULL; descr->cch =0; descr->fcLimit = offset+1; return(0); } descr->i_page++; descr->i_FOD = 0; /* Beginning of a new page */ } else descr->i_FOD++; /* Skip to next FOD in the same page */ set_FOD(page, descr, prev_descr); return(1); } else /* offset <> fcLim */ { if (prev_descr->fcLimit <= offset && offset < descr->fcLimit) { if (prev_descr->fcLimit == 127) /* First section */ { prev_descr->fcLimit = 0; return(1); } else return(0); /* prev_lim <= offset < lim */ } if (offset < descr->fcLimit) { /* offset < prev_lim < lim */ descr->i_page=0; descr->i_FOD=0; descr->fcLimit=0; descr->cch = 0; descr->PROP = (char *) NULL; set_FOD(page, descr, prev_descr); } while ((prev_descr->fcLimit) < offset) { if (page[descr->i_page].cfod <= (descr->i_FOD + 1)) { if (descr->i_page+1 >= pageNb) /* Thers is no more FOD */ { descr->PROP=(char *) NULL; descr->cch =0; descr->fcLimit = offset+1; return(0); } descr->i_page++; descr->i_FOD = 0; /* Beginning of a new page */ } else descr->i_FOD++; /* Skip to next FOD in the same page */ set_FOD(page, descr, prev_descr); } return(1); } } /* Initialisation ************** */ void init_defaults() { char *ptr; int n; /* Paragraphe par defaut */ ptr = (char *) &Default_PAP; for (n=0; n<sizeof(struct PAragraphProperty); n++) ptr[n]=0; /* Every default value are zeroes */ ptr[0]=61; ptr[2]=30; ptr[10]=240; Set_PAP(&Current_PAP, &Default_PAP, NULL, sizeof(struct PAragraphProperty)); /* Charactere par defaut */ ptr = (char *) &Default_CHP; for (n=0; n<sizeof(struct PROPerty_of_CHaracter); n++) ptr[n]=0; /* Everything is zero except... */ ptr[2]=24; Default_CHP.word00.ftc = 6; /* Default font is Times */ Set_CHP(&Current_CHP, &Default_CHP, NULL, sizeof(struct PROPerty_of_CHaracter)); /* Initialiser les pointeurs de formattage */ ch_FOD.i_page=0; ch_FOD.i_FOD=0; ch_FOD.cch=0; ch_FOD.PROP=(char *) NULL; ch_FOD.fcLimit=127; /* Force reading of FOD */ prev_ch_FOD.fcLimit=0; pa_FOD.i_page=0; pa_FOD.i_FOD=0; pa_FOD.cch=0; pa_FOD.PROP=(char *) NULL; pa_FOD.fcLimit=127; /* Force reading of FOD */ prev_pa_FOD.fcLimit=0; string_ptr = 0; /* Init string pointer */ } /* Ouput character data */ void output_CHP2(sourceCHP, currentCHP, explicit_flag) struct PROPerty_of_CHaracter *sourceCHP, *currentCHP; char explicit_flag; /* If set, every value is printed */ { if (explicit_flag || (sourceCHP->word00.ftc != currentCHP->word00.ftc)) { printf(" <FFamily "); switch(sourceCHP->word00.ftc) { case 0: case 6: puts("Courier>"); break; case 8: puts("Helvetica>"); break; case 9: puts("AvantGarde>"); break; case 10: puts("HelveticaNarrow>"); break; case 16: puts("Bookman>"); break; case 24: puts("Times>"); break; case 25: puts("NewCenturySchlbk>"); break; case 26: puts("Palatino>"); break; case 50: puts("Hebrew>"); break; case 56: puts("Symbol>"); break; case 57: puts("LineDraw>"); break; case 58: puts("SuperGreek>"); break; case 59: puts("SSuperGreek>"); break; default: printf("Courier> # Font %u unknown, using default\n", sourceCHP->word00.ftc); break; } } if (explicit_flag || (sourceCHP->word01.hps != currentCHP->word01.hps)) printf (" <FSize %u>\n", sourceCHP->word01.hps / 2); if (explicit_flag || (sourceCHP->hpsPos != currentCHP->hpsPos)) printf (" <FDY %d>\n", -(((char)sourceCHP->hpsPos) / (char)2)); if (explicit_flag || (sourceCHP->word00.fItalic != currentCHP->word00.fItalic)) { printf(" <FItalic "); if (sourceCHP->word00.fItalic) puts("Yes>"); else puts("No>"); } if (explicit_flag || (sourceCHP->word00.fBold != currentCHP->word00.fBold)) { printf(" <FBold "); if (sourceCHP->word00.fBold) puts("Yes>"); else puts("No>"); } if (explicit_flag || (sourceCHP->word01.fUline != currentCHP->word01.fUline) || (sourceCHP->word01.fDline != currentCHP->word01.fDline)) { printf(" <FUnderline "); if ((sourceCHP->word01.fUline) || (sourceCHP->word01.fDline)) puts("Yes>"); else puts("No>"); } if (explicit_flag || (sourceCHP->word01.fStrike != currentCHP->word01.fStrike)) { printf(" <FStrike "); if (sourceCHP->word01.fStrike) puts("Yes>"); else puts("No>"); } if (sourceCHP->word01.fHidden != currentCHP->word01.fHidden) { printf(" <Marker\n <MarkerType 1>\n <MText `Hidden text'>\n"); if (sourceCHP->word01.fHidden) printf(" <MStyle Start>\n"); else printf(" <MStyle End>\n"); } } /* Ouput Character properties */ void output_CHP(sourceCHP, currentCHP, explicit_flag) struct PROPerty_of_CHaracter *sourceCHP, *currentCHP; char explicit_flag; /* If set, every value is printed */ { int i, flag; char *s_ptr, *c_ptr; /* is there any difference? */ flag = 0; s_ptr = (char *) sourceCHP; c_ptr = (char *) currentCHP; for (i=0; i<sizeof(struct PROPerty_of_CHaracter); i++) if (s_ptr[i] != c_ptr[i]) flag++; if (!(explicit_flag) && flag == 0) return; /* Character format not changed */ if (sourceCHP->word00.fStyled) /* Should only appear if translation bad */ { printf("# Styled Font nb %u\n", sourceCHP->word00.stc); return; } puts("<Font"); output_CHP2(sourceCHP, currentCHP, explicit_flag); puts("> # End of Font"); } /* Ouput paragraph property data */ void output_PAP2(sourcePAP, currentPAP, explicit_flag) struct PAragraphProperty *sourcePAP, /* Source of changes */ *currentPAP; /* Current values */ char explicit_flag; /* If set, every value is printed */ { int i,k,n; if (explicit_flag || sourcePAP->word0.jc != currentPAP->word0.jc) { printf(" <PgfAlignment "); switch(sourcePAP->word0.jc) { case 0: puts("Left>"); break; case 1: puts("Center>"); break; case 2: puts("Right>"); break; case 3: puts("LeftRight>"); break; } } if (explicit_flag || sourcePAP->word0.fKeep != currentPAP->word0.fKeep) { printf(" <PgfSplit "); if (sourcePAP->word0.fKeep) puts("Yes>"); else puts("No>"); } if (explicit_flag || sourcePAP->word0.fKFollow != currentPAP->word0.fKFollow) { printf(" <PgfWithNext "); if (sourcePAP->word0.fKFollow) puts("Yes>"); else puts("No>"); } if (explicit_flag || Short(sourcePAP->dxaRight) != Short(currentPAP->dxaRight)) printf(" <PgfRIndent %dpt>\n", (Short(sourcePAP->dxaRight)/20) ); if (explicit_flag || (Short(sourcePAP->dxaLeft) != Short(currentPAP->dxaLeft)) || (Short(sourcePAP->dxaLeft1) != Short(currentPAP->dxaLeft1))) { printf(" <PgfLIndent %upt>\n", (Short(sourcePAP->dxaLeft)/20) ); printf(" <PgfFIndent %upt>\n", (Short(sourcePAP->dxaLeft)/20) + (((int)Short(sourcePAP->dxaLeft1))/20) ); } if (explicit_flag || Short(sourcePAP->dyaLine) != Short(currentPAP->dyaLine)) { i = ((short) Short(sourcePAP->dyaLine))/20 - (Current_CHP.word01.hps/2); printf(" <PgfLeading "); if (i > 0) printf("%upt>\n",i); else printf("0>\n"); } if (explicit_flag || Short(sourcePAP->dyaBefore) != Short(currentPAP->dyaBefore)) printf(" <PgfSpBefore %upt>\n", (Short(sourcePAP->dyaBefore)/20) ); if (explicit_flag || Short(sourcePAP->dyaAfter) != Short(currentPAP->dyaAfter)) printf(" <PgfSpAfter %upt>\n", (Short(sourcePAP->dyaAfter)/20) ); /* Process Tabs */ TBD = (struct TaBDescriptor *) &sourcePAP->rgTBDs[0]; n=0; for (i=0; i<20 ; i++) /* Count the tabs */ if (Short(TBD[i].dxa)) n++; if (n>0) printf(" <PgfNumTabs %u>\n", n); n=0; for (i=0; i<20; i++) { if (Short(TBD[i].dxa)) { printf(" <TabStop\n <TSX %u>\n <TSType", (Ushort)Short(TBD[i].dxa)/20); switch(TBD[i].word2.jcTab) { case 1: puts(" Center>"); break; case 2: puts(" Right>"); break; case 3: puts(" Decimal>"); break; default: puts(" Left>"); break; } switch(TBD[i].word2.tlc) /* TAB leader code */ { case 0: k=' '; break; case 1: k='.'; break; case 2: k='-'; break; case 3: k='_'; break; } printf(" <TSLeader %u>\n > # End TAB\n", k); } } return; } /* Output new paragraph properties (only those which changed) */ void output_PAP(sourcePAP, currentPAP, explicit_flag) struct PAragraphProperty *sourcePAP, /* Source of changes */ *currentPAP; /* Current values */ char explicit_flag; /* If set, every value is printed */ { int i, flag; char *s_ptr, *c_ptr, k; /* is there any difference? */ flag = 0; s_ptr = (char *) sourcePAP; c_ptr = (char *) currentPAP; for (i=0; i<sizeof(struct PAragraphProperty); i++) if (s_ptr[i] != c_ptr[i]) flag++; if ((!(explicit_flag)) && (flag == 0) && (new_page == 0)) return; /* Paragraph format not changed */ styled_paragraph=0; if (sourcePAP->word0.fStyled) /* Style code */ { if (styled_flag) { printf(" <PgfTag `Para%c%c'> # Styled paragraph\n", style_key[sourcePAP->word0.stc].KEY[0], style_key[sourcePAP->word0.stc].KEY[1]); styled_paragraph=1; return; } else { printf(" <PgfTag `Para%u'> # Styled paragraph\n",sourcePAP->word0.stc); return; } } puts("<Pgf"); /* Begin of Pgf */ if (new_page==2) /* Reset flag */ { printf(" <PgfColumnTop No>\n"); new_page=0; } if (new_page==1) { printf(" <PgfColumnTop Yes>\n"); new_page++; /* Flag will be reset next time */ } output_PAP2(sourcePAP, currentPAP, explicit_flag); puts("> # End of Pgf"); } /* ******************************************************************* */ /* String processing */ void flush_string() /* Ouput current string as it is */ { if (string_ptr>0) { current_string[string_ptr]=0; /* String must be zero terminated */ printf("<String `%s'>\n", current_string); } string_ptr=0; return; } Uint add2string(carac) /* Add character to current string */ Uchar carac; { if (string_ptr >= STRING_LENGTH) flush_string(); current_string[string_ptr]=carac; string_ptr++; return(string_ptr); } /* ******************************************************************* */ /* Text processing */ Uint handle_byte(offset) /* Handle byte of text, convert it and output to stdout. Returns the offset of the next byte to be treated */ Uint offset; /* Current byte is at this offset in the file (not in the array: text_array[offset-128] ) */ { Uint ch_ptr, k; char *ptr; ch_ptr = offset-128; /* Check paragraph information */ if (find_FOD(ParPage, offset, &pa_FOD, &prev_pa_FOD, ParPageNb) || new_paragraph) { /* Handle Pgf info */ if (!new_paragraph) /* Simulate new paragraph even if there was no CR-LF */ { flush_string(); printf(" > # End ParaLine\n> # End Para\n"); new_paragraph++; } printf("<Para\n"); { Set_PAP(&Temp_PAP, &Current_PAP, NULL, sizeof(struct PAragraphProperty)); PAP = (struct PAragraphProperty *) pa_FOD.PROP; Set_PAP(&Current_PAP, PAP, &Default_PAP, pa_FOD.cch); output_PAP(&Current_PAP, &Temp_PAP, 0); /* Output new properties */ Set_CHP(&Current_CHP, &Default_CHP, NULL, sizeof(struct PROPerty_of_CHaracter)); } puts(" <ParaLine"); } /* Check character information */ if ( find_FOD(CharPage, offset, &ch_FOD, &prev_ch_FOD, CharPageNb) || new_paragraph) { flush_string(); Set_CHP(&Temp_CHP, &Current_CHP, NULL, sizeof(struct PROPerty_of_CHaracter)); Chp = (struct PROPerty_of_CHaracter *) ch_FOD.PROP; if (ch_FOD.cch==0 && styled_paragraph) /* Use default pgf font? */ { Set_CHP(&Current_CHP, style_key[Current_PAP.word0.stc].PROP, &Default_CHP, style_key[Current_PAP.word0.stc].cch); } else { /* Use direct formatting */ Set_CHP(&Current_CHP, Chp, &Default_CHP, ch_FOD.cch); } output_CHP(&Current_CHP, &Temp_CHP, 0); /* Output new properties */ } new_paragraph=0; /* Caracteres speciaux ? */ #define ch (Uchar)text_array[ch_ptr] #define ch2 (Uchar)text_array[ch_ptr+1] if (ch==RETURN && ch2==LF) /* End of paragraph */ { flush_string(); puts(" > # End ParaLine"); puts("> # End Para"); new_paragraph=1; return(offset+2); } if (ch==NL) /* Hard line break */ { flush_string(); puts("<Char HardReturn>"); puts(" > # End ParaLine"); puts(" <ParaLine "); return(offset+1); } if (ch==FF) /* Form Feed */ { flush_string(); puts(" > # End ParaLine"); puts("> # End Para"); puts("# New Page"); new_paragraph=1; /* Real form feed */ new_page=1; return(offset+1); } /* Handle special characters */ if (Current_CHP.word01.fSpecial) { if (ch==FOOTNOTE || ch ==5) /* This is a footnote */ { flush_string(); printf("<String `%u'> # Footnote ref\n", Current_footnote); if (! processing_notes) printf("<AFrame %u> # Footnote text Frame\n", (Current_footnote)*2 ); Current_footnote++; return(offset+1); } flush_string(); printf("# Special char %u ignored\n", ch); return(offset+1); /* Other special characters are ignored %%% */ } if (char_def[ch] == 0) /* Ignorer char 0 */ return(offset+1); if (char_def[ch]<' ' || char_def[ch]>=0x7f /* Non standard ASCII */ || char_def[ch] == 0x27 || char_def[ch] == TAB || char_def[ch] == '\\' || char_def[ch] == '>' || char_def[ch] == 0x60 ) { if (string_ptr+7>=STRING_LENGTH) flush_string(); ptr=(char *) ¤t_string[string_ptr]; sprintf(ptr, "\\x%x ",(Uint) char_def[ch]); string_ptr += strlen(ptr); } else /* Standard ASCII */ { if (islower(char_def[ch]) && Current_CHP.word01.csm) /* Convert to upper case */ add2string((Uchar)toupper(char_def[ch])); else add2string(char_def[ch]); } return(offset+1); } /* Process text */ void process_text(start_offset, end_offset, text_ID) Uint start_offset, /* Offset into text_array of the text portion to handle */ end_offset, /* Offset of byte following end of portion */ text_ID; /* ID of textrect */ { Uint i; /* Initialisation */ /* ch_FOD.i_page=0; ch_FOD.i_FOD=0; ch_FOD.fcLimit=127; pa_FOD.i_page=0; pa_FOD.i_FOD=0; pa_FOD.fcLimit=127; */ puts("<TextFlow"); if (text_ID != 0) printf(" <TextRectID %u>\n", text_ID); Set_PAP(&Current_PAP, &Default_PAP, NULL, sizeof(struct PAragraphProperty)); new_paragraph=1; /* Go through text */ for (i=start_offset+128; i < end_offset+128;) i=handle_byte(i); flush_string(); if (! new_paragraph) { printf(" > # End ParaLine\n > # End Para\n"); new_paragraph++; } puts("> # End of TextFlow"); } /* ******************************************************************* */ /* READ STYLE SHEET */ int read_style(StyleFileName) char *StyleFileName; { int FileIn, /* Input file descriptor */ i,n; char *ptr; struct st_line *s_line; /* pointer to current line */ FileIn = open(StyleFileName,O_RDONLY); if (FileIn == -1) { printf("# Impossible de lire la feuille de style : %s\n",StyleFileName); return(0); } if (read(FileIn,&s_header,sizeof(s_header)) != sizeof(s_header)) { puts("# Impossible de lire l'entete (Style)"); return(0); } if ((Ushort)Short(header.Wident) != 0137061) { puts("# Not a Word 4.0 style sheet"); return(0); } text_array_size = Int(s_header.fcMac) - 128; last_text_byte = text_array_size; printf("# Style sheet text size:%u bytes\n", text_array_size); text_array = (char *) malloc(text_array_size); /* Allocation de memoire */ if (text_array == NULL) { puts("Erreur d'allocation"); exit(1); } if (read(FileIn, text_array, text_array_size) != text_array_size) { puts("Erreur lors de la lecture du texte"); exit(1); } puts("# Style sheet text read "); /* Lecture des pages de formatage */ /* Caracteres */ #define s_pnChar ((short)((Int(s_header.fcMac) + 127) / 128)) /* Page number of beginning of Char info */ s_CharPageNb = (Ushort)Short(s_header.pnPara) - s_pnChar; s_CharPage = (struct Page *) malloc(s_CharPageNb*128); if (s_CharPage == NULL) { puts("# (style) Erreur d'allocation, caracteres"); return(0); } lseek(FileIn, (s_pnChar*128), L_SET); /* Go to beginning of pages */ if (read(FileIn, s_CharPage, (s_CharPageNb*128)) != (s_CharPageNb*128)) { puts("# (style) Erreur lors de la lecture du format de caractere"); return(0); } printf("# (style) Char table read, %u pages\n",s_CharPageNb); /* Paragraphes */ s_ParPageNb = (Ushort)Short(s_header.pnFntb) - (Ushort)Short(s_header.pnPara); s_ParPage = (struct Page *) malloc(s_ParPageNb*128); if (s_ParPage == NULL) { puts("# (style) Erreur d'allocation, paragraphes"); return(0); } lseek(FileIn, ((Ushort)Short(s_header.pnPara)*128), L_SET); /* Go to beginning of pages */ if (read(FileIn, s_ParPage, (s_ParPageNb*128)) != (s_ParPageNb*128)) { puts("# (style) Erreur lors de la lecture du format de paragraphe"); return(0); } printf("# Para table read, %u pages\n",s_ParPageNb); close(FileIn); /* Initialisation des styles de caracteres */ for (n=0; n<31; n++); { styles[n].PROP = (char *) &Default_CHP; styles[n].cch = sizeof(struct PROPerty_of_CHaracter); } style_key = (struct STYLE_KEY *) malloc(sizeof(struct STYLE_KEY)*128); if (style_key == NULL) { puts("# Erreur d'allocation"); return(0); } for (n=0; n<128; n++) { style_key[n].KEY[0]=' '; style_key[n].KEY[1]=' '; } s_line = (struct st_line *) text_array; ch_FOD.fcLimit = 127; pa_FOD.fcLimit = 127; /* Impression du catalogue des paragraphes */ puts("<Catalog"); for (i=0; sizeof(struct st_line)*i < last_text_byte; i++) { printf("# Processing style sheet line %u\n",i); /* Copy 2 char key */ strncpy(style_key[s_line[i].stc].KEY, s_line[i].KEY, 2); if (s_line[i].stc<30) /* Character style is in 0..30 */ { printf("# Storing character style %u\n", s_line[i].stc); if (find_FOD(s_CharPage, (i*32)+128, &ch_FOD, &prev_ch_FOD, s_CharPageNb)) { styles[s_line[i].stc].PROP = (char *) ch_FOD.PROP; styles[s_line[i].stc].cch = ch_FOD.cch; } } else { if (s_line[i].stc<105) /* Paragraph style is in 31..104 */ { find_FOD(s_ParPage, (i*32)+128, &pa_FOD, &prev_pa_FOD, s_ParPageNb); Set_PAP(&Current_PAP, pa_FOD.PROP, &Default_PAP, pa_FOD.cch); find_FOD(s_CharPage, (i*32)+128, &ch_FOD, &prev_ch_FOD, s_CharPageNb); /* Store pgf's character format */ style_key[s_line[i].stc].PROP = (char *) ch_FOD.PROP; style_key[s_line[i].stc].cch = ch_FOD.cch; Set_CHP(&Current_CHP, ch_FOD.PROP, &Default_CHP, ch_FOD.cch); printf("<Pgf\n <PgfTag `Para%c%c'> # Styled paragraph\n", s_line[i].KEY[0], s_line[i].KEY[1]); output_PAP2(&Current_PAP, &Default_PAP, 1); printf("<Font\n"); output_CHP2(&Current_CHP, &Default_CHP, 1); printf("> # End of Font\n> # End of Pgf\n"); } } } puts("> # End Catalog"); init_defaults(); free(text_array); return(1); } /* ******************************************************************* */ /* MAIN PROGRAM ============ */ main(argc,argv) int argc; char *argv[]; { int FileIn, /* Input file descriptor */ i,n,a,b; char *ptr; FILE *DataFile; char input_path[90]; /* storage for input file path */ DataFileName = (char *) argv[2]; for (n=0; n<256; n++) char_def[n]=n; DataFile = fopen(DataFileName,"r"); if (DataFile != NULL) { while (fscanf(DataFile,"<%u %u>\n",&b,&a) == 2) char_def[b]=a; close(DataFile); } init_defaults(); /* Initialise some constants */ FileIn = open(argv[1],O_RDONLY); if (FileIn == -1) { puts("Impossible d'ouvrir le fichier d'entree"); exit(1); } if (read(FileIn,&header,sizeof(header)) != sizeof(header)) { puts("Impossible de lire l'entete"); exit(1); } if ((Ushort)Short(header.Wident) != 0137061) { puts("Not a Word 4.0 file"); exit(1); } /* Impression de l'entete */ puts("<MIFFile 1.01> # Generated by word2mif, University of Geneva"); puts("include(mif_read.m4)"); puts("#"); printf("# File Name:%s\n", argv[1]); puts("<Units Ucm>"); /* Lecture et analyse de la feuille de style */ styled_flag = 0; i=0; for (n=0; n<80 && argv[1][n]; n++) /* Isolate the path of input file */ if (argv[1][n]=='/') i=n+1; /* End of path is at last slash */ if (i) strncpy(input_path, argv[1], i); /* Copy input path */ input_path[i]=0; /* End of string */ i=0; /* Ignore the PC tree path */ for (n=0; n<33 && header.szSsht[n]; n++) { if (isupper(header.szSsht[n])) /* map filename to lower case */ header.szSsht[n]= tolower(header.szSsht[n]); if (header.szSsht[n]=='\\') /* Filename starts after last backslash */ i=n+1; } if (n) { strncat(input_path, &header.szSsht[i],90); printf("# Style sheeet name:%s i=%u\n", input_path, i); styled_flag = read_style(input_path); } /* Lecture du texte proprement dit */ text_array_size = Int(header.fcMac) - 128; last_text_byte = text_array_size; printf("# Text size:%u bytes\n", text_array_size); text_array = (char *) malloc(text_array_size); /* Allocation de memoire */ if (text_array == NULL) { puts("Erreur d'allocation"); exit(1); } if (read(FileIn, text_array, text_array_size) != text_array_size) { puts("Erreur lors de la lecture du texte"); exit(1); } puts("# Text read "); /* Lecture des pages de formatage */ /* Caracteres */ CharPageNb = (Ushort)Short(header.pnPara) - pnChar; CharPage = (struct Page *) malloc(CharPageNb*128); if (CharPage == NULL) { puts("Erreur d'allocation, caracteres"); exit(1); } lseek(FileIn, (pnChar*128), L_SET); /* Go to beginning of pages */ if (read(FileIn, CharPage, (CharPageNb*128)) != (CharPageNb*128)) { puts("Erreur lors de la lecture du format de caractere"); exit(1); } printf("# Char table read, %u pages\n",CharPageNb); /* Paragraphes */ ParPageNb = (Ushort)Short(header.pnFntb) - (Ushort)Short(header.pnPara); ParPage = (struct Page *) malloc(ParPageNb*128); if (ParPage == NULL) { puts("Erreur d'allocation, paragraphes"); exit(1); } lseek(FileIn, ((Ushort)Short(header.pnPara)*128), L_SET); /* Go to beginning of pages */ if (read(FileIn, ParPage, (ParPageNb*128)) != (ParPageNb*128)) { puts("Erreur lors de la lecture du format de paragraphe"); exit(1); } printf("# Para table read, %u pages\n",ParPageNb); /* Footnotes */ FntbPageNb = (Ushort)Short(header.pnSep) - (Ushort)Short(header.pnFntb); if (FntbPageNb == 0) FNTB == NULL; else { FNTB = (struct FootNoteTaBle *) malloc(FntbPageNb*128); if (FNTB == NULL) { puts("Erreur d'allocation, notes"); exit(1); } lseek(FileIn, ((Ushort)Short(header.pnFntb)*128), L_SET); /* Go to beginning of pages */ if (read(FileIn, FNTB, (FntbPageNb*128)) != (FntbPageNb*128)) { puts("Erreur lors de la lecture de la table des notes"); exit(1); } printf("# Footnote table read, %u pages\n", FntbPageNb); /* Process footnotes */ /* 1: Define anchored Frames */ puts("<AFrames"); for (i=1; i < (Ushort)Short(FNTB->cfnd); i++) /* create one Frame per footnote */ { puts(" <Frame"); printf(" <ID %u>\n",(i*2)); /* Frame ID, begins at 2 (4,6,8...) */ puts(" <Pen 15>"); puts(" <PenWidth `0.500 '>"); puts(" <Fill 7>"); puts(" <Inverted No>"); /* puts(" <BRect 1.00\" 8.25\" 6.25\" 1.00\">"); */ puts(" <BRect 0 0 6.25\" 35pt>"); puts(" <FrameType Bottom>"); puts(" <NSOffset 0\">"); puts(" <BLOffset 0\">"); puts(" <AnchorAlign Left>"); puts(" <Cropped Yes>"); puts(" <TextRect"); printf(" <ID %u>\n", (i*2)+1); /* TextRectID, value 3,5,7,9... */ puts(" <BRect 0\" 0\" 6.13\" 30pt>"); puts(" <TRNext 0>"); puts(" > # End of TextRect"); puts(" > # End of Frame"); } puts("> # End of AFrames"); FND = (struct FootNoteDescriptor *) &(FNTB->FNDs); /* Address of first FND */ last_text_byte = Int(FND[0].cpFtn)-1; } /* Process main text */ printf("# Main text: First byte:0, last byte:%u\n", last_text_byte+1); processing_notes=0; process_text(0, last_text_byte+1, 0); /* Main loop */ if (FntbPageNb) { /* 2: Process footnote contents */ processing_notes = 1; Current_footnote = 1; puts("# Footnotes section"); for (i=1; i < (Ushort)Short(FNTB->cfnd); i++) { printf(" # Start:%u Stop:%u\n", Int(FND[(i-1)].cpFtn), Int(FND[i].cpFtn)); process_text(Int(FND[(i-1)].cpFtn), Int(FND[i].cpFtn), (i*2)+1); } puts("# End of footnotes"); } printf("# End of MIFFILE\n"); close(FileIn); exit(0); } //E*O*F word2mif.c// echo x - word2mif.data cat > "word2mif.data" << '//E*O*F word2mif.data//' <31 6> <128 130> <129 159> <130 142> <131 137> <132 138> <133 136> <134 140> <135 141> <136 144> <137 145> <138 143> <139 149> <140 148> <141 147> <142 128> <143 129> <144 131> <145 190> <146 174> <147 153> <148 154> <149 152> <150 158> <151 157> <152 217> <153 133> <154 134> <155 162> <156 163> <157 180> <159 196> <160 135> <161 146> <162 151> <163 156> <164 150> <165 132> <166 187> <167 188> <168 192> <173 193> <174 199> <175 200> <196 5> <255 4> //E*O*F word2mif.data// echo x - word4.h cat > "word4.h" << '//E*O*F word4.h//' /* Microsoft Word 4.0 Binary file format (IBM-PC) O. Schopfer, University of Geneva, August 1988 "@(#) word4.h 1.2.1.2 8/25/88 Release %I% */ /* Constantes */ #define RETURN 13 /* Paragraphs end with RETURN+LF */ #define LF 10 #define NL 11 /* Hard line break */ #define FF 12 /* Explicit page break */ #define TAB 9 /* Tabulator */ #define SPACE 32 /* Normal space */ #define NBSPACE 255 /* Non-breaking space */ #define HYPHEN 45 /* Normal Hyphen */ #define NBHYPHEN 196 /* Non-breaking Hyphen */ #define NRHYPHEN 31 /* Non-required Hyphen */ #define PAGE 1 /* (page) */ #define DATE 2 /* (date_d'impression) */ #define TIME 3 /* (heure_d'impression) */ #define FOOTNOTE 4 /* Footnote reference mark (automatic numbering) */ /* Types */ typedef unsigned int Uint; typedef unsigned char Uchar; typedef unsigned short Ushort; typedef unsigned long Ulong; struct PCSHORT { Uchar byte[2]; }; typedef struct PCSHORT PCshort; struct PCINT { PCshort word[2]; }; typedef struct PCINT PCint; struct PCLONG { PCint lword[2]; }; typedef struct PCLONG PClong; /* Entete */ struct Header { PCshort Wident, /* Should be 0137061 */ dty, /* Document type, should be 0 */ wTool, /* Reserved, 0125400 */ Reserved[4]; PCint fcMac; /* Number of bytes of actual text PLUS 128 (bytes in one sector, low order first) */ PCshort pnPara, /* Page number of start of Paragraph info (a page is a 128-byte chunk) */ #define pnChar ((short)((Int(header.fcMac) + 127) / 128)) /* Page number of beginning of Char info */ pnFntb, /* Page number of Footnote table FNTB (pnSep if none) */ pnSep, /* Page number of start of division info (pnSetb if none) */ pnSetb, /* Page number of start of SETB (pnPgtb if none) */ pnPgtb, /* Page number of start of Page Table PGTB */ pnSumd; /* Page number of Sumary Information */ char szSsht[66], /* Style sheet filename, zero terminated */ Reserv[2], PRD[8]; /* PRD filename without path+extension */ PCshort pnMac, /* Count of disk pages in whole file (last page number + 1) */ fMarkRev, /* Revision mark */ Reserv02[9]; }; struct Page { PCint fcFirst; /* Byte # in file of first character covered by this page of formatting info. The byte # of the 1st character in the text is 128. (low order byte first) */ Uchar FODs[123], /* FOrmat Descriptor and Format PROPerties */ cfod; /* Number of FOD's on this page */ }; /* FOD Format Descriptor (fixed size) */ struct FOrmatDescriptor { PCint fcLim; /* Byte # in file AFTER last character covered by this FOD */ PCshort bfprop; /* Byte offset from beginning of FODs[] to corresponding FPROP for these characters or this paragraph. A value of FFFF means that there is no FPROP */ }; /* Formatting PROPerty (variable size) */ struct FormatPROP { Uchar cch; /* Number of bytes in this FPROP, excluding this byte */ }; /* CHP CHaracter Property */ struct Word0 { unsigned stc:7; /* Style code */ /* 0 */ unsigned fStyled:1; /* Is character styled */ unsigned ftc:6; /* Font code */ unsigned fItalic:1; /* Italic */ /* 1 */ unsigned fBold:1; /* Bold flag */ }; struct Word1 { /* 2 */ Uchar hps; /* Size of font in half pts (def:24 dec) */ /* 3 */ unsigned fHidden:1; /* Hidden char. */ unsigned fSpecial:1; /* Special character (i.e. Footnote) */ unsigned csm:2; /* Case modifier: 0 normal 1 upper 2 small caps */ unsigned fNew:1; /* Revised char(s) */ unsigned fDline:1; /* Double Underline */ unsigned fStrike:1; /* Strikethrough */ /* 3 */ unsigned fUline:1; /* Underline */ }; struct PROPerty_of_CHaracter { struct Word0 word00; struct Word1 word01; /* 4 */ Uchar hpsPitch; /* Reserved */ /* 5 */ char hpsPos, /* Position: 0 Normal <0 Subscript >0 Superscript */ Reserv02[4]; }; /* PAP PAragraph Property */ struct Word00 { unsigned stc:7; /* Style code */ /* 0 */ unsigned fStyled:1; /* Is character styled */ unsigned :3; /* Reserved */ unsigned fSBS:1; /* Parag. Side by side */ unsigned fKeep:1; /* Keep paragraph on one page */ unsigned fKFollow:1; /* Keep this parag. together with next */ /* 1 */ unsigned jc:2; /* Justification 0=left 1=center 2=right 3=both */ }; struct Word01 { unsigned :1; /* Reserved */ /* 2 */ unsigned stcNorm:7; /* Style code of normal chars. */ unsigned fHidden:1; /* Paragraph is Hidden in outline view (mode plan) */ /* 3 */ unsigned level:7; /* Level number (outline) */ }; struct Word08 { unsigned bsc:2; /* Border style code (0=Normal borders) */ unsigned btc:2; /* Border type code (0=normal parag) */ /* 16 */ unsigned rhc:4; /* Running-head code (0=normal parag) */ /* 17 */unsigned :8; /* Reserved */ }; struct PAragraphProperty { struct Word00 word0; struct Word01 word1; /* 4-5 */ PCshort dxaRight, /* Right indent in twips(=1/1440 inch) */ /* 6-7 */ dxaLeft, /* Left indent in twips */ /* 8-9 */ dxaLeft1, /* First line indent */ /* 10-11 */ dyaLine, /* Line spacing (standard 240 ) */ /* 12-13 */ dyaBefore, /* Space before */ /* 14-15 */ dyaAfter; /* Space after */ struct Word08 word8; /* 18 */ char Reserv03[4]; /* Reserved */ /* 22 */ char rgTBDs[80]; /* Room for 20 tab stops */ }; /* TBD Tab descriptor */ struct Word002 { unsigned :2; /* Reserved */ unsigned tlc:3; /* Tab leader code: 0=none, 1=dots, 2=hyphens, 3=underline. */ /* 2 */ unsigned jcTab:3; /* Justification after tab: 0=left, 1=center, 2=right, 3=decimal, 4=vertical bar */ /* 3 */ char chAlign; /* Char to decimal align on (0=".") */ }; struct TaBDescriptor { PCshort dxa; /* Distance from left margin in twips */ struct Word002 word2; }; /* FOOTNOTE SECTION The footnote section (optional) starts at the first complete page after the PARAGRAPH section, and contains the FNTB which contains an array of FND, footnote descriptors */ /* FNTB Footnote Table */ struct FootNoteTaBle { PCshort cfnd, /* Number of FND + 1 (1 or more) */ cfndMax; /* Same as word 0 */ char FNDs; /* First char of first FND (just to have its address) */ }; /* FND Footnote Descriptor */ struct FootNoteDescriptor { PCint cpRef, /* Byte offset into text area of footnote ref */ cpFtn; /* Byte offset into text area of footnote text */ }; /* SECTION PROPERTY (Division) The SEP section (optional) is on the page immediately after the footnote section, and contains one or more SEPS */ struct SEP_Word0 { Uchar cch; /* Count of bytes, EXCLUDING this one */ unsigned stc:7; /* Style code */ unsigned fStyled:1; /* Section is styled */ }; struct SEctionProperty { struct SEP_Word0 word0; /* 0-1 */ char Bkc_Pgn; /* Break code, and page numbering */ PCshort yaMac, /* Page length in Twips */ xaMac, /* Page width in Twips */ pgnStart, /* Start numbering at ... (def: -1) */ yaTop, /* Top margin Twips */ dyaText, /* Text height */ xaLeft, /* Left margin */ dxaText; /* Text width */ char Reserved, /* Running heads etc. */ cColumns; /* Number of columns */ PCshort yaRH1, /* Position of top header */ yaRH2, /* Position of bottom header */ dxaCol, /* Gap between columns */ dxaGutter, /* Marge de reliure */ yaPgn, /* Y position of page # */ xaPgn, /* X position of page # */ dxaLnn; /* distance of line # from left */ char Reserved02; }; struct SEctionTaBle { PCshort csed, /* # of sections (1 or more ) */ csedMax; /* Undefined (=csed?) */ }; struct SEctionDescriptor { PCint cp; /* Byte offset in text area of the end-of-section mark */ PCshort fn; /* Undefined */ PCint fcScp; /* Byte offset (into file) of associated SEP */ }; /* ************************************************ STYLE SHEET SPECIFICATIONS */ struct st_line /* Text line of style sheet */ { char stc, /* Style code */ KEY[2], /* Two letters of style code */ remark[28], /* DEscription */ cr; }; //E*O*F word4.h// exit 0