schopfer@cui.unige.ch (SCHOPFER Olivier) (04/17/89)
Word2mif Version 1.4
Filter to convert word processor files from Microsoft Word 4.0
to Framemaker "MIF" (Maker Interchange Format)
Calling sequence:
un1sun1% word2mif input_file word2mif.data > output_file.mif
word2mif.data is a data file containing the character translation information
If there is a style sheet, it's supposed to be on the same directory than
the input_file.
Couper ici
______________________________
# This is a shell archive. Remove anything before this line, then
# unpack it by saving it in a file and typing "sh file". (Files
# unpacked will be owned by you and have default permissions.)
#
# This archive contains:
# word2mif.c word2mif.data word4.h
echo x - word2mif.c
cat > "word2mif.c" << '//E*O*F word2mif.c//'
/*
@(#) word2mif.c 1.4 8/25/88 Release 1.4
Olivier Schopfer, University of Geneva, Switzerland
schopfer@cui.uucp
schopfer@cgeuge51.BITNET
Filter to convert word processor files from Microsoft Word 4.0
to Framemaker "MIF" (Maker Interchange Format)
Calling sequence:
un1sun1% word2mif input_file word2mif.data > output_file.mif
word2mif.data is a data file containing the character translation information
If there is a style sheet, it's supposed to be on the same directory than
the input_file.
*/
#include <ctype.h>
#include <stdio.h>
#include <sys/file.h>
#include "word4.h"
#define STRING_LENGTH 80
struct Header header, /* File header */
s_header; /* Style sheet header */
char *text_array; /* The text itself */
Uint text_array_size; /* # of text bytes */
struct Page *CharPage, *ParPage,
*SecTablePage, *SecPage; /* Pointers to format info pages */
Uint CharPageNb, ParPageNb, FntbPageNb, /* Number of pages */
SecTablePageNb, SecPageNb;
/* Style sheet data */
struct Page *s_CharPage, *s_ParPage,
*s_SecTablePage, *s_SecPage; /* Pointers to format info pages */
Uint s_CharPageNb, s_ParPageNb, s_FntbPageNb, /* Number of pages */
s_SecTablePageNb, s_SecPageNb,
/* End of style sheet data */
last_text_byte; /* Offset of last real text byte
(Except footnotes) */
struct FOD_ptr /* Internal structure for FOD's pointers */
{ Uint i_page, /* Page number of FOD */
i_FOD, /* FOD number inside current page */
fcLimit; /* Byte after last one concerned with this FOD */
char *PROP; /* Pointer to CHP or PAP */
char cch; /* Nb of defined bytes in CHP or PAP */
};
struct FOD_ptr ch_FOD, /* Current character FOD */
prev_ch_FOD,
pa_FOD, /* Current paragraph FOD */
prev_pa_FOD;
struct STYLE_KEY
{ Uchar KEY[2]; /* Two letters style code */
char *PROP; /* Pointer to pgf's font */
char cch;
};
struct STYLE_KEY *style_key; /* Pointer to array of keys */
struct STYLE_PTR
{
char *PROP; /* Pointer to CHP */
char cch; /* Nb of defined bytes in CHP */
};
struct STYLE_PTR styles[30]; /* Pointers to styled CHP's */
struct FormatPROP *FPROP;
struct PROPerty_of_CHaracter *Chp, /* Pointer to Char Property */
Current_CHP, /* Current char. settings */
s_Current_CHP, /* Current char. settings (styled) */
Default_CHP, /* Default values */
Temp_CHP; /* Temporary storage */
struct PAragraphProperty *PAP, /* Pointer to Paragraph Property */
Current_PAP, /* Current parag. settings */
Default_PAP, /* Default values */
Temp_PAP; /* Temporary storage */
struct TaBDescriptor *TBD;
struct FootNoteTaBle *FNTB;
struct FootNoteDescriptor *FND;
struct SEctionProperty *SEP;
struct SEctionTaBle *SETB;
struct SEctionDescriptor *SED;
char new_paragraph, /* Boolean, to tell if we are at the beginning of */
new_page, /* a Paragraph (i.e. the last byte was the end */
/* of one) */
processing_notes, /* Flag to tell if processing main text or notes */
styled_flag, /* Has a style sheet been read? */
styled_paragraph, /* Is current paragraph styled? */
current_string[81]; /* String for output */
Uint string_ptr; /* Index into array */
Uint Current_footnote = 1; /* Number of current footnote */
/* Conversion table for characters */
char *DataFileName; /* Data filename */
Uchar char_def[256]; /* Char translation table */
/* Miscellaneous conversion functions */
short Short(var) /* PC 16 bits to 68000 16 bits conversion */
PCshort var;
{ Ushort res;
res = 0;
res = (Ushort) ((var.byte[1]) << 8) | ((Ushort) var.byte[0]) ;
return(res);
}
unsigned int Int(var) /* PC 32 bits to 68000 32 bits conversion */
PCint var;
{ unsigned int res;
res = ((Uint)(Ushort)Short(var.word[1]) <<16 ) | ((Uint)(Ushort)Short(var.word[0])) ;
return(res);
}
unsigned long Long(var) /* 64 bits conversion */
PClong var;
{ unsigned int res;
res = ((Ulong)Int(var.lword[0]) << 32) | ((Ulong)Int(var.lword[1]));
return(res);
}
/* **********************************************************************
Functions to handle formating properties
======================================== */
/* Set paragraph properties */
void Set_PAP(destPAP, srcPAP, defPAP, bytes_defined)
struct PAragraphProperty
*destPAP, /* Destination */
*srcPAP, /* Source PAP */
*defPAP; /* Default PAP, for undef. bytes */
Uchar bytes_defined; /* Number of defined bytes in srcPAP */
{ char *dest_ptr, *src_ptr, *def_ptr; /* Pointer to corresponding char arrays */
int i;
dest_ptr = (char *) destPAP;
src_ptr = (char *) srcPAP;
def_ptr = (char *) defPAP;
/* Copy defined bytes from srcPAP, and others from defPAP */
for (i=0; i<sizeof(struct PAragraphProperty); i++)
if (i < bytes_defined)
dest_ptr[i] = src_ptr[i];
else
dest_ptr[i] = def_ptr[i];
return;
}
/* Set character properties */
void Set_CHP(destCHP, srcCHP, defCHP, bytes_defined)
struct PROPerty_of_CHaracter
*destCHP, /* Destination */
*srcCHP, /* Source CHP */
*defCHP; /* Default CHP, for undef. bytes */
Uchar bytes_defined; /* Number of defined bytes in srcCHP */
{ char *dest_ptr, *src_ptr, *def_ptr; /* Pointer to corresponding char arrays */
int i;
dest_ptr = (char *) destCHP;
src_ptr = (char *) srcCHP;
def_ptr = (char *) defCHP;
/* Copy defined bytes from srcCHP, and others from defCHP */
for (i=0; i<sizeof(struct PROPerty_of_CHaracter); i++)
if (i < bytes_defined)
dest_ptr[i] = src_ptr[i];
else
dest_ptr[i] = def_ptr[i];
/* If character is styled, copy styled char data instead of standard data */
if (destCHP->word00.fStyled && styled_flag)
Set_CHP(destCHP, styles[destCHP->word00.stc].PROP, defCHP,
styles[destCHP->word00.stc].cch); /* Recursive call */
return;
}
/* *************************************************************
Function to set FOD pointer arguments to descr
prev_descr gets the old values */
void set_FOD(page, descr, prev_descr)
struct Page *page; /* Formatting information page (CharPage or ParPage) */
struct FOD_ptr *descr, /* pointer to current format descriptor to be set */
*prev_descr; /* old value of descr */
{
struct FOrmatDescriptor *FOD;
prev_descr->i_page = descr->i_page;
prev_descr->i_FOD = descr->i_FOD;
prev_descr->fcLimit = descr->fcLimit;
prev_descr->PROP = descr->PROP;
prev_descr->cch = descr->cch;
FOD = (struct FOrmatDescriptor *)
&page[descr->i_page].FODs[descr->i_FOD * sizeof(struct FOrmatDescriptor)];
descr->fcLimit = Int(FOD->fcLim);
if (((Ushort)Short(FOD->bfprop)) != 0xFFFF) /* Property defined? */
{
FPROP = (struct FormatPROP *)
&page[descr->i_page].FODs[(Ushort)Short(FOD->bfprop)];
descr->cch = FPROP->cch;
descr->PROP = (char *)
&page[descr->i_page].FODs[((Ushort)Short(FOD->bfprop)+1)];
}
else
{
descr->cch = 0;
descr->PROP = (char *) NULL;
}
return;
}
/* Find FOD associated with offset, into specified formatting page,
set descr to point the Format descriptor, prev_descr the last descriptor
Returns 0 if format didn't change, 1 if it did */
int find_FOD(page, offset, descr, prev_descr, pageNb)
struct Page *page; /* Pointer to formatting info page */
Uint offset; /* current offset into file */
struct FOD_ptr *descr, /* Descriptor of current FOD */
*prev_descr; /* Last descriptor */
Uint pageNb; /* Number of available pages */
{
if (descr->fcLimit == 127)
set_FOD(page, descr, prev_descr); /* Force reading of 1st FOD */
if (offset == descr->fcLimit) /* Right at beginning of new section */
{
if (page[descr->i_page].cfod <= (descr->i_FOD + 1))
{
if (descr->i_page+1 >= pageNb) /* Thers is no more FOD */
{
descr->PROP=(char *) NULL;
descr->cch =0;
descr->fcLimit = offset+1;
return(0);
}
descr->i_page++;
descr->i_FOD = 0; /* Beginning of a new page */
}
else
descr->i_FOD++; /* Skip to next FOD in the same page */
set_FOD(page, descr, prev_descr);
return(1);
}
else /* offset <> fcLim */
{
if (prev_descr->fcLimit <= offset && offset < descr->fcLimit)
{
if (prev_descr->fcLimit == 127) /* First section */
{
prev_descr->fcLimit = 0;
return(1);
}
else
return(0); /* prev_lim <= offset < lim */
}
if (offset < descr->fcLimit)
{
/* offset < prev_lim < lim */
descr->i_page=0;
descr->i_FOD=0;
descr->fcLimit=0;
descr->cch = 0;
descr->PROP = (char *) NULL;
set_FOD(page, descr, prev_descr);
}
while ((prev_descr->fcLimit) < offset)
{
if (page[descr->i_page].cfod <= (descr->i_FOD + 1))
{
if (descr->i_page+1 >= pageNb) /* Thers is no more FOD */
{
descr->PROP=(char *) NULL;
descr->cch =0;
descr->fcLimit = offset+1;
return(0);
}
descr->i_page++;
descr->i_FOD = 0; /* Beginning of a new page */
}
else
descr->i_FOD++; /* Skip to next FOD in the same page */
set_FOD(page, descr, prev_descr);
}
return(1);
}
}
/* Initialisation
************** */
void init_defaults()
{ char *ptr;
int n;
/* Paragraphe par defaut */
ptr = (char *) &Default_PAP;
for (n=0; n<sizeof(struct PAragraphProperty); n++)
ptr[n]=0; /* Every default value are zeroes */
ptr[0]=61;
ptr[2]=30;
ptr[10]=240;
Set_PAP(&Current_PAP, &Default_PAP, NULL, sizeof(struct PAragraphProperty));
/* Charactere par defaut */
ptr = (char *) &Default_CHP;
for (n=0; n<sizeof(struct PROPerty_of_CHaracter); n++)
ptr[n]=0; /* Everything is zero except... */
ptr[2]=24;
Default_CHP.word00.ftc = 6; /* Default font is Times */
Set_CHP(&Current_CHP, &Default_CHP, NULL, sizeof(struct PROPerty_of_CHaracter));
/* Initialiser les pointeurs de formattage */
ch_FOD.i_page=0;
ch_FOD.i_FOD=0;
ch_FOD.cch=0;
ch_FOD.PROP=(char *) NULL;
ch_FOD.fcLimit=127; /* Force reading of FOD */
prev_ch_FOD.fcLimit=0;
pa_FOD.i_page=0;
pa_FOD.i_FOD=0;
pa_FOD.cch=0;
pa_FOD.PROP=(char *) NULL;
pa_FOD.fcLimit=127; /* Force reading of FOD */
prev_pa_FOD.fcLimit=0;
string_ptr = 0; /* Init string pointer */
}
/* Ouput character data */
void output_CHP2(sourceCHP, currentCHP, explicit_flag)
struct PROPerty_of_CHaracter
*sourceCHP, *currentCHP;
char explicit_flag; /* If set, every value is printed */
{
if (explicit_flag || (sourceCHP->word00.ftc != currentCHP->word00.ftc))
{
printf(" <FFamily ");
switch(sourceCHP->word00.ftc)
{ case 0:
case 6: puts("Courier>"); break;
case 8: puts("Helvetica>"); break;
case 9: puts("AvantGarde>"); break;
case 10: puts("HelveticaNarrow>"); break;
case 16: puts("Bookman>"); break;
case 24: puts("Times>"); break;
case 25: puts("NewCenturySchlbk>"); break;
case 26: puts("Palatino>"); break;
case 50: puts("Hebrew>"); break;
case 56: puts("Symbol>"); break;
case 57: puts("LineDraw>"); break;
case 58: puts("SuperGreek>"); break;
case 59: puts("SSuperGreek>"); break;
default: printf("Courier> # Font %u unknown, using default\n",
sourceCHP->word00.ftc); break;
}
}
if (explicit_flag || (sourceCHP->word01.hps != currentCHP->word01.hps))
printf (" <FSize %u>\n", sourceCHP->word01.hps / 2);
if (explicit_flag || (sourceCHP->hpsPos != currentCHP->hpsPos))
printf (" <FDY %d>\n", -(((char)sourceCHP->hpsPos) / (char)2));
if (explicit_flag || (sourceCHP->word00.fItalic != currentCHP->word00.fItalic))
{
printf(" <FItalic ");
if (sourceCHP->word00.fItalic)
puts("Yes>");
else
puts("No>");
}
if (explicit_flag || (sourceCHP->word00.fBold != currentCHP->word00.fBold))
{
printf(" <FBold ");
if (sourceCHP->word00.fBold)
puts("Yes>");
else
puts("No>");
}
if (explicit_flag || (sourceCHP->word01.fUline != currentCHP->word01.fUline)
|| (sourceCHP->word01.fDline != currentCHP->word01.fDline))
{
printf(" <FUnderline ");
if ((sourceCHP->word01.fUline) || (sourceCHP->word01.fDline))
puts("Yes>");
else
puts("No>");
}
if (explicit_flag || (sourceCHP->word01.fStrike != currentCHP->word01.fStrike))
{
printf(" <FStrike ");
if (sourceCHP->word01.fStrike)
puts("Yes>");
else
puts("No>");
}
if (sourceCHP->word01.fHidden != currentCHP->word01.fHidden)
{
printf(" <Marker\n <MarkerType 1>\n <MText `Hidden text'>\n");
if (sourceCHP->word01.fHidden)
printf(" <MStyle Start>\n");
else
printf(" <MStyle End>\n");
}
}
/* Ouput Character properties */
void output_CHP(sourceCHP, currentCHP, explicit_flag)
struct PROPerty_of_CHaracter
*sourceCHP, *currentCHP;
char explicit_flag; /* If set, every value is printed */
{ int i, flag;
char *s_ptr, *c_ptr;
/* is there any difference? */
flag = 0;
s_ptr = (char *) sourceCHP;
c_ptr = (char *) currentCHP;
for (i=0; i<sizeof(struct PROPerty_of_CHaracter); i++)
if (s_ptr[i] != c_ptr[i])
flag++;
if (!(explicit_flag) && flag == 0)
return; /* Character format not changed */
if (sourceCHP->word00.fStyled) /* Should only appear if translation bad */
{
printf("# Styled Font nb %u\n", sourceCHP->word00.stc);
return;
}
puts("<Font");
output_CHP2(sourceCHP, currentCHP, explicit_flag);
puts("> # End of Font");
}
/* Ouput paragraph property data */
void output_PAP2(sourcePAP, currentPAP, explicit_flag)
struct PAragraphProperty
*sourcePAP, /* Source of changes */
*currentPAP; /* Current values */
char explicit_flag; /* If set, every value is printed */
{
int i,k,n;
if (explicit_flag || sourcePAP->word0.jc != currentPAP->word0.jc)
{ printf(" <PgfAlignment ");
switch(sourcePAP->word0.jc)
{
case 0: puts("Left>"); break;
case 1: puts("Center>"); break;
case 2: puts("Right>"); break;
case 3: puts("LeftRight>"); break;
}
}
if (explicit_flag || sourcePAP->word0.fKeep != currentPAP->word0.fKeep)
{ printf(" <PgfSplit ");
if (sourcePAP->word0.fKeep)
puts("Yes>");
else
puts("No>");
}
if (explicit_flag || sourcePAP->word0.fKFollow != currentPAP->word0.fKFollow)
{ printf(" <PgfWithNext ");
if (sourcePAP->word0.fKFollow)
puts("Yes>");
else
puts("No>");
}
if (explicit_flag || Short(sourcePAP->dxaRight) != Short(currentPAP->dxaRight))
printf(" <PgfRIndent %dpt>\n", (Short(sourcePAP->dxaRight)/20) );
if (explicit_flag
|| (Short(sourcePAP->dxaLeft) != Short(currentPAP->dxaLeft))
|| (Short(sourcePAP->dxaLeft1) != Short(currentPAP->dxaLeft1)))
{
printf(" <PgfLIndent %upt>\n", (Short(sourcePAP->dxaLeft)/20) );
printf(" <PgfFIndent %upt>\n", (Short(sourcePAP->dxaLeft)/20) + (((int)Short(sourcePAP->dxaLeft1))/20) );
}
if (explicit_flag || Short(sourcePAP->dyaLine) != Short(currentPAP->dyaLine))
{
i = ((short) Short(sourcePAP->dyaLine))/20 - (Current_CHP.word01.hps/2);
printf(" <PgfLeading ");
if (i > 0)
printf("%upt>\n",i);
else
printf("0>\n");
}
if (explicit_flag || Short(sourcePAP->dyaBefore) != Short(currentPAP->dyaBefore))
printf(" <PgfSpBefore %upt>\n", (Short(sourcePAP->dyaBefore)/20) );
if (explicit_flag || Short(sourcePAP->dyaAfter) != Short(currentPAP->dyaAfter))
printf(" <PgfSpAfter %upt>\n", (Short(sourcePAP->dyaAfter)/20) );
/* Process Tabs */
TBD = (struct TaBDescriptor *) &sourcePAP->rgTBDs[0];
n=0;
for (i=0; i<20 ; i++) /* Count the tabs */
if (Short(TBD[i].dxa))
n++;
if (n>0)
printf(" <PgfNumTabs %u>\n", n);
n=0;
for (i=0; i<20; i++)
{
if (Short(TBD[i].dxa))
{
printf(" <TabStop\n <TSX %u>\n <TSType", (Ushort)Short(TBD[i].dxa)/20);
switch(TBD[i].word2.jcTab)
{
case 1: puts(" Center>"); break;
case 2: puts(" Right>"); break;
case 3: puts(" Decimal>"); break;
default: puts(" Left>"); break;
}
switch(TBD[i].word2.tlc) /* TAB leader code */
{
case 0: k=' '; break;
case 1: k='.'; break;
case 2: k='-'; break;
case 3: k='_'; break;
}
printf(" <TSLeader %u>\n > # End TAB\n", k);
}
}
return;
}
/* Output new paragraph properties (only those which changed) */
void output_PAP(sourcePAP, currentPAP, explicit_flag)
struct PAragraphProperty
*sourcePAP, /* Source of changes */
*currentPAP; /* Current values */
char explicit_flag; /* If set, every value is printed */
{ int i, flag;
char *s_ptr, *c_ptr, k;
/* is there any difference? */
flag = 0;
s_ptr = (char *) sourcePAP;
c_ptr = (char *) currentPAP;
for (i=0; i<sizeof(struct PAragraphProperty); i++)
if (s_ptr[i] != c_ptr[i])
flag++;
if ((!(explicit_flag)) && (flag == 0) && (new_page == 0))
return; /* Paragraph format not changed */
styled_paragraph=0;
if (sourcePAP->word0.fStyled) /* Style code */
{
if (styled_flag)
{ printf(" <PgfTag `Para%c%c'> # Styled paragraph\n",
style_key[sourcePAP->word0.stc].KEY[0],
style_key[sourcePAP->word0.stc].KEY[1]);
styled_paragraph=1;
return;
}
else
{ printf(" <PgfTag `Para%u'> # Styled paragraph\n",sourcePAP->word0.stc);
return;
}
}
puts("<Pgf"); /* Begin of Pgf */
if (new_page==2) /* Reset flag */
{
printf(" <PgfColumnTop No>\n");
new_page=0;
}
if (new_page==1)
{
printf(" <PgfColumnTop Yes>\n");
new_page++; /* Flag will be reset next time */
}
output_PAP2(sourcePAP, currentPAP, explicit_flag);
puts("> # End of Pgf");
}
/* ******************************************************************* */
/* String processing */
void flush_string() /* Ouput current string as it is */
{
if (string_ptr>0)
{
current_string[string_ptr]=0; /* String must be zero terminated */
printf("<String `%s'>\n", current_string);
}
string_ptr=0;
return;
}
Uint add2string(carac) /* Add character to current string */
Uchar carac;
{
if (string_ptr >= STRING_LENGTH)
flush_string();
current_string[string_ptr]=carac;
string_ptr++;
return(string_ptr);
}
/* ******************************************************************* */
/* Text processing */
Uint handle_byte(offset) /* Handle byte of text, convert it and output
to stdout. Returns the offset of the next byte
to be treated */
Uint offset; /* Current byte is at this offset in the file (not in
the array: text_array[offset-128] ) */
{ Uint ch_ptr, k;
char *ptr;
ch_ptr = offset-128;
/* Check paragraph information */
if (find_FOD(ParPage, offset, &pa_FOD, &prev_pa_FOD, ParPageNb)
|| new_paragraph)
{ /* Handle Pgf info */
if (!new_paragraph) /* Simulate new paragraph even if there was no CR-LF */
{
flush_string();
printf(" > # End ParaLine\n> # End Para\n");
new_paragraph++;
}
printf("<Para\n");
{
Set_PAP(&Temp_PAP, &Current_PAP, NULL, sizeof(struct PAragraphProperty));
PAP = (struct PAragraphProperty *) pa_FOD.PROP;
Set_PAP(&Current_PAP, PAP, &Default_PAP, pa_FOD.cch);
output_PAP(&Current_PAP, &Temp_PAP, 0); /* Output new properties */
Set_CHP(&Current_CHP, &Default_CHP, NULL, sizeof(struct PROPerty_of_CHaracter));
}
puts(" <ParaLine");
}
/* Check character information */
if ( find_FOD(CharPage, offset, &ch_FOD, &prev_ch_FOD, CharPageNb)
|| new_paragraph)
{
flush_string();
Set_CHP(&Temp_CHP, &Current_CHP, NULL, sizeof(struct PROPerty_of_CHaracter));
Chp = (struct PROPerty_of_CHaracter *) ch_FOD.PROP;
if (ch_FOD.cch==0 && styled_paragraph) /* Use default pgf font? */
{
Set_CHP(&Current_CHP, style_key[Current_PAP.word0.stc].PROP,
&Default_CHP, style_key[Current_PAP.word0.stc].cch);
}
else
{
/* Use direct formatting */
Set_CHP(&Current_CHP, Chp, &Default_CHP, ch_FOD.cch);
}
output_CHP(&Current_CHP, &Temp_CHP, 0); /* Output new properties */
}
new_paragraph=0;
/* Caracteres speciaux ? */
#define ch (Uchar)text_array[ch_ptr]
#define ch2 (Uchar)text_array[ch_ptr+1]
if (ch==RETURN && ch2==LF) /* End of paragraph */
{
flush_string();
puts(" > # End ParaLine");
puts("> # End Para");
new_paragraph=1;
return(offset+2);
}
if (ch==NL) /* Hard line break */
{
flush_string();
puts("<Char HardReturn>");
puts(" > # End ParaLine");
puts(" <ParaLine ");
return(offset+1);
}
if (ch==FF) /* Form Feed */
{
flush_string();
puts(" > # End ParaLine");
puts("> # End Para");
puts("# New Page");
new_paragraph=1; /* Real form feed */
new_page=1;
return(offset+1);
}
/* Handle special characters */
if (Current_CHP.word01.fSpecial)
{
if (ch==FOOTNOTE || ch ==5) /* This is a footnote */
{
flush_string();
printf("<String `%u'> # Footnote ref\n", Current_footnote);
if (! processing_notes)
printf("<AFrame %u> # Footnote text Frame\n", (Current_footnote)*2 );
Current_footnote++;
return(offset+1);
}
flush_string();
printf("# Special char %u ignored\n", ch);
return(offset+1); /* Other special characters are ignored %%% */
}
if (char_def[ch] == 0) /* Ignorer char 0 */
return(offset+1);
if (char_def[ch]<' ' || char_def[ch]>=0x7f /* Non standard ASCII */
|| char_def[ch] == 0x27
|| char_def[ch] == TAB
|| char_def[ch] == '\\'
|| char_def[ch] == '>'
|| char_def[ch] == 0x60 )
{
if (string_ptr+7>=STRING_LENGTH)
flush_string();
ptr=(char *) ¤t_string[string_ptr];
sprintf(ptr, "\\x%x ",(Uint) char_def[ch]);
string_ptr += strlen(ptr);
}
else /* Standard ASCII */
{
if (islower(char_def[ch]) && Current_CHP.word01.csm) /* Convert to upper case */
add2string((Uchar)toupper(char_def[ch]));
else
add2string(char_def[ch]);
}
return(offset+1);
}
/* Process text */
void process_text(start_offset, end_offset, text_ID)
Uint start_offset, /* Offset into text_array of the text portion to handle */
end_offset, /* Offset of byte following end of portion */
text_ID; /* ID of textrect */
{
Uint i;
/* Initialisation */
/*
ch_FOD.i_page=0; ch_FOD.i_FOD=0;
ch_FOD.fcLimit=127;
pa_FOD.i_page=0; pa_FOD.i_FOD=0;
pa_FOD.fcLimit=127; */
puts("<TextFlow");
if (text_ID != 0)
printf(" <TextRectID %u>\n", text_ID);
Set_PAP(&Current_PAP, &Default_PAP, NULL, sizeof(struct PAragraphProperty));
new_paragraph=1;
/* Go through text */
for (i=start_offset+128; i < end_offset+128;)
i=handle_byte(i);
flush_string();
if (! new_paragraph)
{
printf(" > # End ParaLine\n > # End Para\n");
new_paragraph++;
}
puts("> # End of TextFlow");
}
/* ******************************************************************* */
/* READ STYLE SHEET */
int read_style(StyleFileName)
char *StyleFileName;
{
int FileIn, /* Input file descriptor */
i,n;
char *ptr;
struct st_line *s_line; /* pointer to current line */
FileIn = open(StyleFileName,O_RDONLY);
if (FileIn == -1)
{ printf("# Impossible de lire la feuille de style : %s\n",StyleFileName);
return(0);
}
if (read(FileIn,&s_header,sizeof(s_header)) != sizeof(s_header))
{ puts("# Impossible de lire l'entete (Style)");
return(0);
}
if ((Ushort)Short(header.Wident) != 0137061)
{ puts("# Not a Word 4.0 style sheet");
return(0);
}
text_array_size = Int(s_header.fcMac) - 128;
last_text_byte = text_array_size;
printf("# Style sheet text size:%u bytes\n", text_array_size);
text_array = (char *) malloc(text_array_size); /* Allocation de memoire */
if (text_array == NULL)
{ puts("Erreur d'allocation");
exit(1);
}
if (read(FileIn, text_array, text_array_size) != text_array_size)
{ puts("Erreur lors de la lecture du texte");
exit(1);
}
puts("# Style sheet text read ");
/* Lecture des pages de formatage */
/* Caracteres */
#define s_pnChar ((short)((Int(s_header.fcMac) + 127) / 128))
/* Page number of beginning of Char info */
s_CharPageNb = (Ushort)Short(s_header.pnPara) - s_pnChar;
s_CharPage = (struct Page *) malloc(s_CharPageNb*128);
if (s_CharPage == NULL)
{ puts("# (style) Erreur d'allocation, caracteres");
return(0);
}
lseek(FileIn, (s_pnChar*128), L_SET); /* Go to beginning of pages */
if (read(FileIn, s_CharPage, (s_CharPageNb*128)) != (s_CharPageNb*128))
{ puts("# (style) Erreur lors de la lecture du format de caractere");
return(0);
}
printf("# (style) Char table read, %u pages\n",s_CharPageNb);
/* Paragraphes */
s_ParPageNb = (Ushort)Short(s_header.pnFntb) - (Ushort)Short(s_header.pnPara);
s_ParPage = (struct Page *) malloc(s_ParPageNb*128);
if (s_ParPage == NULL)
{ puts("# (style) Erreur d'allocation, paragraphes");
return(0);
}
lseek(FileIn, ((Ushort)Short(s_header.pnPara)*128), L_SET); /* Go to beginning of pages */
if (read(FileIn, s_ParPage, (s_ParPageNb*128)) != (s_ParPageNb*128))
{ puts("# (style) Erreur lors de la lecture du format de paragraphe");
return(0);
}
printf("# Para table read, %u pages\n",s_ParPageNb);
close(FileIn);
/* Initialisation des styles de caracteres */
for (n=0; n<31; n++);
{
styles[n].PROP = (char *) &Default_CHP;
styles[n].cch = sizeof(struct PROPerty_of_CHaracter);
}
style_key = (struct STYLE_KEY *) malloc(sizeof(struct STYLE_KEY)*128);
if (style_key == NULL)
{ puts("# Erreur d'allocation");
return(0);
}
for (n=0; n<128; n++)
{
style_key[n].KEY[0]=' ';
style_key[n].KEY[1]=' ';
}
s_line = (struct st_line *) text_array;
ch_FOD.fcLimit = 127;
pa_FOD.fcLimit = 127;
/* Impression du catalogue des paragraphes */
puts("<Catalog");
for (i=0; sizeof(struct st_line)*i < last_text_byte; i++)
{
printf("# Processing style sheet line %u\n",i);
/* Copy 2 char key */
strncpy(style_key[s_line[i].stc].KEY, s_line[i].KEY, 2);
if (s_line[i].stc<30) /* Character style is in 0..30 */
{
printf("# Storing character style %u\n", s_line[i].stc);
if (find_FOD(s_CharPage, (i*32)+128, &ch_FOD, &prev_ch_FOD, s_CharPageNb))
{
styles[s_line[i].stc].PROP = (char *) ch_FOD.PROP;
styles[s_line[i].stc].cch = ch_FOD.cch;
}
}
else
{
if (s_line[i].stc<105) /* Paragraph style is in 31..104 */
{
find_FOD(s_ParPage, (i*32)+128, &pa_FOD, &prev_pa_FOD, s_ParPageNb);
Set_PAP(&Current_PAP, pa_FOD.PROP, &Default_PAP, pa_FOD.cch);
find_FOD(s_CharPage, (i*32)+128, &ch_FOD, &prev_ch_FOD, s_CharPageNb);
/* Store pgf's character format */
style_key[s_line[i].stc].PROP = (char *) ch_FOD.PROP;
style_key[s_line[i].stc].cch = ch_FOD.cch;
Set_CHP(&Current_CHP, ch_FOD.PROP, &Default_CHP, ch_FOD.cch);
printf("<Pgf\n <PgfTag `Para%c%c'> # Styled paragraph\n",
s_line[i].KEY[0], s_line[i].KEY[1]);
output_PAP2(&Current_PAP, &Default_PAP, 1);
printf("<Font\n");
output_CHP2(&Current_CHP, &Default_CHP, 1);
printf("> # End of Font\n> # End of Pgf\n");
}
}
}
puts("> # End Catalog");
init_defaults();
free(text_array);
return(1);
}
/* ******************************************************************* */
/* MAIN PROGRAM
============ */
main(argc,argv)
int argc;
char *argv[];
{
int FileIn, /* Input file descriptor */
i,n,a,b;
char *ptr;
FILE *DataFile;
char input_path[90]; /* storage for input file path */
DataFileName = (char *) argv[2];
for (n=0; n<256; n++)
char_def[n]=n;
DataFile = fopen(DataFileName,"r");
if (DataFile != NULL)
{
while (fscanf(DataFile,"<%u %u>\n",&b,&a) == 2)
char_def[b]=a;
close(DataFile);
}
init_defaults(); /* Initialise some constants */
FileIn = open(argv[1],O_RDONLY);
if (FileIn == -1)
{ puts("Impossible d'ouvrir le fichier d'entree");
exit(1);
}
if (read(FileIn,&header,sizeof(header)) != sizeof(header))
{ puts("Impossible de lire l'entete");
exit(1);
}
if ((Ushort)Short(header.Wident) != 0137061)
{ puts("Not a Word 4.0 file");
exit(1);
}
/* Impression de l'entete */
puts("<MIFFile 1.01> # Generated by word2mif, University of Geneva");
puts("include(mif_read.m4)");
puts("#");
printf("# File Name:%s\n", argv[1]);
puts("<Units Ucm>");
/* Lecture et analyse de la feuille de style */
styled_flag = 0;
i=0;
for (n=0; n<80 && argv[1][n]; n++) /* Isolate the path of input file */
if (argv[1][n]=='/')
i=n+1; /* End of path is at last slash */
if (i)
strncpy(input_path, argv[1], i); /* Copy input path */
input_path[i]=0; /* End of string */
i=0; /* Ignore the PC tree path */
for (n=0; n<33 && header.szSsht[n]; n++)
{
if (isupper(header.szSsht[n])) /* map filename to lower case */
header.szSsht[n]= tolower(header.szSsht[n]);
if (header.szSsht[n]=='\\') /* Filename starts after last backslash */
i=n+1;
}
if (n)
{
strncat(input_path, &header.szSsht[i],90);
printf("# Style sheeet name:%s i=%u\n", input_path, i);
styled_flag = read_style(input_path);
}
/* Lecture du texte proprement dit */
text_array_size = Int(header.fcMac) - 128;
last_text_byte = text_array_size;
printf("# Text size:%u bytes\n", text_array_size);
text_array = (char *) malloc(text_array_size); /* Allocation de memoire */
if (text_array == NULL)
{ puts("Erreur d'allocation");
exit(1);
}
if (read(FileIn, text_array, text_array_size) != text_array_size)
{ puts("Erreur lors de la lecture du texte");
exit(1);
}
puts("# Text read ");
/* Lecture des pages de formatage */
/* Caracteres */
CharPageNb = (Ushort)Short(header.pnPara) - pnChar;
CharPage = (struct Page *) malloc(CharPageNb*128);
if (CharPage == NULL)
{ puts("Erreur d'allocation, caracteres");
exit(1);
}
lseek(FileIn, (pnChar*128), L_SET); /* Go to beginning of pages */
if (read(FileIn, CharPage, (CharPageNb*128)) != (CharPageNb*128))
{ puts("Erreur lors de la lecture du format de caractere");
exit(1);
}
printf("# Char table read, %u pages\n",CharPageNb);
/* Paragraphes */
ParPageNb = (Ushort)Short(header.pnFntb) - (Ushort)Short(header.pnPara);
ParPage = (struct Page *) malloc(ParPageNb*128);
if (ParPage == NULL)
{ puts("Erreur d'allocation, paragraphes");
exit(1);
}
lseek(FileIn, ((Ushort)Short(header.pnPara)*128), L_SET); /* Go to beginning of pages */
if (read(FileIn, ParPage, (ParPageNb*128)) != (ParPageNb*128))
{ puts("Erreur lors de la lecture du format de paragraphe");
exit(1);
}
printf("# Para table read, %u pages\n",ParPageNb);
/* Footnotes */
FntbPageNb = (Ushort)Short(header.pnSep) - (Ushort)Short(header.pnFntb);
if (FntbPageNb == 0)
FNTB == NULL;
else
{
FNTB = (struct FootNoteTaBle *) malloc(FntbPageNb*128);
if (FNTB == NULL)
{ puts("Erreur d'allocation, notes");
exit(1);
}
lseek(FileIn, ((Ushort)Short(header.pnFntb)*128), L_SET); /* Go to beginning
of pages */
if (read(FileIn, FNTB, (FntbPageNb*128)) != (FntbPageNb*128))
{ puts("Erreur lors de la lecture de la table des notes");
exit(1);
}
printf("# Footnote table read, %u pages\n", FntbPageNb);
/* Process footnotes */
/* 1: Define anchored Frames */
puts("<AFrames");
for (i=1; i < (Ushort)Short(FNTB->cfnd); i++) /* create one Frame per footnote */
{
puts(" <Frame");
printf(" <ID %u>\n",(i*2)); /* Frame ID, begins at 2 (4,6,8...) */
puts(" <Pen 15>");
puts(" <PenWidth `0.500 '>");
puts(" <Fill 7>");
puts(" <Inverted No>");
/* puts(" <BRect 1.00\" 8.25\" 6.25\" 1.00\">"); */
puts(" <BRect 0 0 6.25\" 35pt>");
puts(" <FrameType Bottom>");
puts(" <NSOffset 0\">");
puts(" <BLOffset 0\">");
puts(" <AnchorAlign Left>");
puts(" <Cropped Yes>");
puts(" <TextRect");
printf(" <ID %u>\n", (i*2)+1); /* TextRectID, value 3,5,7,9... */
puts(" <BRect 0\" 0\" 6.13\" 30pt>");
puts(" <TRNext 0>");
puts(" > # End of TextRect");
puts(" > # End of Frame");
}
puts("> # End of AFrames");
FND = (struct FootNoteDescriptor *) &(FNTB->FNDs); /* Address of first FND */
last_text_byte = Int(FND[0].cpFtn)-1;
}
/* Process main text */
printf("# Main text: First byte:0, last byte:%u\n", last_text_byte+1);
processing_notes=0;
process_text(0, last_text_byte+1, 0); /* Main loop */
if (FntbPageNb)
{
/* 2: Process footnote contents */
processing_notes = 1;
Current_footnote = 1;
puts("# Footnotes section");
for (i=1; i < (Ushort)Short(FNTB->cfnd); i++)
{
printf(" # Start:%u Stop:%u\n", Int(FND[(i-1)].cpFtn),
Int(FND[i].cpFtn));
process_text(Int(FND[(i-1)].cpFtn), Int(FND[i].cpFtn), (i*2)+1);
}
puts("# End of footnotes");
}
printf("# End of MIFFILE\n");
close(FileIn);
exit(0);
}
//E*O*F word2mif.c//
echo x - word2mif.data
cat > "word2mif.data" << '//E*O*F word2mif.data//'
<31 6>
<128 130>
<129 159>
<130 142>
<131 137>
<132 138>
<133 136>
<134 140>
<135 141>
<136 144>
<137 145>
<138 143>
<139 149>
<140 148>
<141 147>
<142 128>
<143 129>
<144 131>
<145 190>
<146 174>
<147 153>
<148 154>
<149 152>
<150 158>
<151 157>
<152 217>
<153 133>
<154 134>
<155 162>
<156 163>
<157 180>
<159 196>
<160 135>
<161 146>
<162 151>
<163 156>
<164 150>
<165 132>
<166 187>
<167 188>
<168 192>
<173 193>
<174 199>
<175 200>
<196 5>
<255 4>
//E*O*F word2mif.data//
echo x - word4.h
cat > "word4.h" << '//E*O*F word4.h//'
/* Microsoft Word 4.0 Binary file format (IBM-PC)
O. Schopfer, University of Geneva, August 1988
"@(#) word4.h 1.2.1.2 8/25/88 Release %I%
*/
/* Constantes */
#define RETURN 13 /* Paragraphs end with RETURN+LF */
#define LF 10
#define NL 11 /* Hard line break */
#define FF 12 /* Explicit page break */
#define TAB 9 /* Tabulator */
#define SPACE 32 /* Normal space */
#define NBSPACE 255 /* Non-breaking space */
#define HYPHEN 45 /* Normal Hyphen */
#define NBHYPHEN 196 /* Non-breaking Hyphen */
#define NRHYPHEN 31 /* Non-required Hyphen */
#define PAGE 1 /* (page) */
#define DATE 2 /* (date_d'impression) */
#define TIME 3 /* (heure_d'impression) */
#define FOOTNOTE 4 /* Footnote reference mark (automatic numbering) */
/* Types */
typedef unsigned int Uint;
typedef unsigned char Uchar;
typedef unsigned short Ushort;
typedef unsigned long Ulong;
struct PCSHORT
{ Uchar byte[2]; };
typedef struct PCSHORT PCshort;
struct PCINT
{ PCshort word[2]; };
typedef struct PCINT PCint;
struct PCLONG
{ PCint lword[2]; };
typedef struct PCLONG PClong;
/* Entete */
struct Header
{
PCshort Wident, /* Should be 0137061 */
dty, /* Document type, should be 0 */
wTool, /* Reserved, 0125400 */
Reserved[4];
PCint fcMac; /* Number of bytes of actual text PLUS 128
(bytes in one sector, low order first) */
PCshort pnPara, /* Page number of start of Paragraph info (a page
is a 128-byte chunk) */
#define pnChar ((short)((Int(header.fcMac) + 127) / 128))
/* Page number of beginning of Char info */
pnFntb, /* Page number of Footnote table FNTB
(pnSep if none) */
pnSep, /* Page number of start of division info
(pnSetb if none) */
pnSetb, /* Page number of start of SETB
(pnPgtb if none) */
pnPgtb, /* Page number of start of Page Table PGTB */
pnSumd; /* Page number of Sumary Information */
char szSsht[66], /* Style sheet filename, zero terminated */
Reserv[2],
PRD[8]; /* PRD filename without path+extension */
PCshort pnMac, /* Count of disk pages in whole file
(last page number + 1) */
fMarkRev, /* Revision mark */
Reserv02[9];
};
struct Page
{
PCint fcFirst; /* Byte # in file of first character covered by
this page of formatting info. The byte # of
the 1st character in the text is 128.
(low order byte first) */
Uchar FODs[123], /* FOrmat Descriptor and Format PROPerties */
cfod; /* Number of FOD's on this page */
};
/* FOD Format Descriptor (fixed size) */
struct FOrmatDescriptor
{
PCint fcLim; /* Byte # in file AFTER last character covered
by this FOD */
PCshort bfprop; /* Byte offset from beginning of FODs[] to
corresponding FPROP for these characters or
this paragraph. A value
of FFFF means that there is no FPROP */
};
/* Formatting PROPerty (variable size) */
struct FormatPROP
{
Uchar cch; /* Number of bytes in this FPROP, excluding this byte */
};
/* CHP CHaracter Property */
struct Word0
{
unsigned stc:7; /* Style code */
/* 0 */ unsigned fStyled:1; /* Is character styled */
unsigned ftc:6; /* Font code */
unsigned fItalic:1; /* Italic */
/* 1 */ unsigned fBold:1; /* Bold flag */
};
struct Word1
{
/* 2 */ Uchar hps; /* Size of font in half pts (def:24 dec) */
/* 3 */ unsigned fHidden:1; /* Hidden char. */
unsigned fSpecial:1; /* Special character (i.e. Footnote) */
unsigned csm:2; /* Case modifier:
0 normal
1 upper
2 small caps */
unsigned fNew:1; /* Revised char(s) */
unsigned fDline:1; /* Double Underline */
unsigned fStrike:1; /* Strikethrough */
/* 3 */ unsigned fUline:1; /* Underline */
};
struct PROPerty_of_CHaracter
{
struct Word0 word00;
struct Word1 word01;
/* 4 */ Uchar hpsPitch; /* Reserved */
/* 5 */ char hpsPos, /* Position: 0 Normal
<0 Subscript
>0 Superscript */
Reserv02[4];
};
/* PAP PAragraph Property */
struct Word00
{
unsigned stc:7; /* Style code */
/* 0 */ unsigned fStyled:1; /* Is character styled */
unsigned :3; /* Reserved */
unsigned fSBS:1; /* Parag. Side by side */
unsigned fKeep:1; /* Keep paragraph on one page */
unsigned fKFollow:1; /* Keep this parag. together with next */
/* 1 */ unsigned jc:2; /* Justification 0=left
1=center
2=right
3=both */
};
struct Word01
{
unsigned :1; /* Reserved */
/* 2 */ unsigned stcNorm:7; /* Style code of normal chars. */
unsigned fHidden:1; /* Paragraph is Hidden in outline view
(mode plan) */
/* 3 */ unsigned level:7; /* Level number (outline) */
};
struct Word08
{
unsigned bsc:2; /* Border style code (0=Normal borders) */
unsigned btc:2; /* Border type code (0=normal parag) */
/* 16 */ unsigned rhc:4; /* Running-head code (0=normal parag) */
/* 17 */unsigned :8; /* Reserved */
};
struct PAragraphProperty
{
struct Word00 word0;
struct Word01 word1;
/* 4-5 */ PCshort dxaRight, /* Right indent in twips(=1/1440 inch) */
/* 6-7 */ dxaLeft, /* Left indent in twips */
/* 8-9 */ dxaLeft1, /* First line indent */
/* 10-11 */ dyaLine, /* Line spacing (standard 240 ) */
/* 12-13 */ dyaBefore, /* Space before */
/* 14-15 */ dyaAfter; /* Space after */
struct Word08 word8;
/* 18 */ char Reserv03[4]; /* Reserved */
/* 22 */ char rgTBDs[80]; /* Room for 20 tab stops */
};
/* TBD Tab descriptor */
struct Word002
{
unsigned :2; /* Reserved */
unsigned tlc:3; /* Tab leader code: 0=none, 1=dots,
2=hyphens, 3=underline. */
/* 2 */ unsigned jcTab:3; /* Justification after tab:
0=left, 1=center, 2=right,
3=decimal, 4=vertical bar */
/* 3 */ char chAlign; /* Char to decimal align on (0=".") */
};
struct TaBDescriptor
{
PCshort dxa; /* Distance from left margin in twips */
struct Word002 word2;
};
/* FOOTNOTE SECTION
The footnote section (optional) starts at the first complete page after the
PARAGRAPH section, and contains the FNTB which contains an array of FND,
footnote descriptors */
/* FNTB Footnote Table */
struct FootNoteTaBle
{
PCshort cfnd, /* Number of FND + 1 (1 or more) */
cfndMax; /* Same as word 0 */
char FNDs; /* First char of first FND (just to have
its address) */
};
/* FND Footnote Descriptor */
struct FootNoteDescriptor
{
PCint cpRef, /* Byte offset into text area of footnote ref */
cpFtn; /* Byte offset into text area of footnote text */
};
/* SECTION PROPERTY (Division)
The SEP section (optional) is on the page immediately after the footnote
section, and contains one or more SEPS */
struct SEP_Word0
{
Uchar cch; /* Count of bytes, EXCLUDING this one */
unsigned stc:7; /* Style code */
unsigned fStyled:1; /* Section is styled */
};
struct SEctionProperty
{
struct SEP_Word0 word0; /* 0-1 */
char Bkc_Pgn; /* Break code, and page numbering */
PCshort yaMac, /* Page length in Twips */
xaMac, /* Page width in Twips */
pgnStart, /* Start numbering at ... (def: -1) */
yaTop, /* Top margin Twips */
dyaText, /* Text height */
xaLeft, /* Left margin */
dxaText; /* Text width */
char Reserved, /* Running heads etc. */
cColumns; /* Number of columns */
PCshort yaRH1, /* Position of top header */
yaRH2, /* Position of bottom header */
dxaCol, /* Gap between columns */
dxaGutter, /* Marge de reliure */
yaPgn, /* Y position of page # */
xaPgn, /* X position of page # */
dxaLnn; /* distance of line # from left */
char Reserved02;
};
struct SEctionTaBle
{
PCshort csed, /* # of sections (1 or more ) */
csedMax; /* Undefined (=csed?) */
};
struct SEctionDescriptor
{
PCint cp; /* Byte offset in text area of the end-of-section
mark */
PCshort fn; /* Undefined */
PCint fcScp; /* Byte offset (into file) of associated SEP */
};
/* ************************************************
STYLE SHEET SPECIFICATIONS */
struct st_line /* Text line of style sheet */
{
char stc, /* Style code */
KEY[2], /* Two letters of style code */
remark[28], /* DEscription */
cr;
};
//E*O*F word4.h//
exit 0