BHUBER@ECLA.USC.EDU.UUCP (09/20/87)
Since I received far too many requests for the Orca Pascal source code to
read Appleworks word processing files, I am posting to the entire mailing
list.
This is NOT a complete program! Only the portions to read the AWP files
are given. This is Orca Pascal source for the GS machine and would require
that compiler in order to use it. Porting to TML should be simple (I've
done it).
{$keep 'typeset',optimize 7,names-,rangecheck-}
program typeset (input,output);
var
in_char: char;
in_file_name: string[255];
in_file: file of byte;
segment_length: integer;
characters_in,
characters_out: longint;
procedure initialization_routine;
var x,i: integer;
begin
in_file_name := '';
write('Please provide an AWP input filename? ');
readln(input,in_file_name);
reset(in_file,in_file_name);
for x := 1 to 300 do read(in_file,i); {skip header info}
characters_in := 300;
characters_out := 0;
document_line_number := 0;
segment_length := 0;
end;
procedure get_next_segment_text_character;
procedure get_next_segment_character;
procedure get_next_character;
var i: integer;
begin {get_next_character}
if eof(in_file) then close_out_processing;
read(in_file,i);
in_char := chr(i);
characters_in := characters_in + 1
end; {get_next_character}
begin {get_next_segment_character}
get_next_character;
if segment_length > 1 then segment_length := segment_length - 1
else
begin {get next segment of text}
segment_length := ord(in_char) - 2;
document_line_number := document_line_number + 1;
get_next_character; {tells what kind of line follows}
if ord(in_char) in [0..80] then {only AWP text records qualify here}
begin
get_next_character; {skip 2 junk bytes}
get_next_character;
get_next_character {then go into processing}
end
else
begin {get a text segment}
segment_length := 0;
get_next_segment_character {otherwise get next segment}
end
end
end; {get_next_segment_character}
begin {get_next_segment_text_character}
get_next_segment_character;
if in_char < ' ' then {process if printer control codes, etc.}
case ord(in_char) of
$01, {begin boldface}
$02, {end boldface}
$03, {begin superscript}
$04, {end superscript}
$05, {begin subscript}
$06, {end subscript}
$07, {begin underline}
$08, {end underline}
$09, {print page number}
$0a: {enter keyboard}
get_next_segment_text_character;
$0b: {sticky space}
in_char := ' ';
$0c: {mail merge functions}
begin
output_error(6);
repeat get_next_segment_text_character until in_char = ']';
get_next_segment_text_character
end;
otherwise: {unknown imbedded control character}
begin
output_error(3);
get_next_segment_text_character
end
end {of case statement}
end;
procedure process_each_text_character;
begin {process_each_text_character}
get_next_segment_text_character;
{put your own stuff in here - each text character (with control codes and mail
merge command removed) appears here, one character (located in "in_char") at a
time for each call to "get_next_segment_text_character"}
end; {process_each_text_character}
begin
initialization_routine;
while not eof(in_file) do process_each_text_character;
close_out_processing {do whatever you need to do here}
end.
Note: the output_error() references an unlisted procedure.
Cheers,
Bud Huber