[comp.sources.misc] v08i074: cz text to PostScript system, part 10 of 14

allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc) (10/01/89)

Posting-number: Volume 8, Issue 74
Submitted-by: howard@dahlbeck.ericsson.se (Howard Gayle)
Archive-name: cz/part10

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then feed it
# into a shell via "sh file" or similar.  To overwrite existing files,
# type "sh file -c".
# The tool that generated this appeared in the comp.sources.unix newsgroup;
# send mail to comp-sources-unix@uunet.uu.net if you want that tool.
# If this archive is complete, you will see the following message at the end:
#		"End of archive 10 (of 14)."
# Contents:  78seus.c
# Wrapped by howard@dahlbeck on Mon Sep 25 07:15:23 1989
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f '78seus.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'78seus.c'\"
else
echo shar: Extracting \"'78seus.c'\" \(50344 characters\)
sed "s/^X//" >'78seus.c' <<'END_OF_FILE'
X/*
X * 78seus - convert Swedish or (US) English from ISO 646 to ISO 8859/1
X */
X
X#ifndef lint
Xstatic char _cpyrgt[] = "Copyright 1989 Howard Lee Gayle";
X#endif lint
X
X/*
X * This program is free software; you can redistribute it and/or modify
X * it under the terms of the GNU General Public License version 1,
X * as published by the Free Software Foundation.
X *
X * This program is distributed in the hope that it will be useful,
X * but WITHOUT ANY WARRANTY; without even the implied warranty of
X * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
X * GNU General Public License for more details.
X *
X * You should have received a copy of the GNU General Public License
X * along with this program; if not, write to the Free Software
X * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
X */
X
X#include <stdio.h>
X#include <howard/port.h>
X#include <howard/version.h>
X#include <howard/usage.h>
X
XMAINVER ("@(#)$Header: 78seus.c,v 1.43 89/08/28 18:39:58 howard Exp $");
XUSAGE ("[-# shar-points] [-A attack] [-B blank-line-smoothing] [-D decay] [-b body-points] [-c colon-smoothing] [-d] [-f] [-m] [-s lines] [-t threshold]");
X
X#include <ctype.h>
X#include <limits.h>
X#include <string.h>
X#include <howard/a2.h>
X#include <howard/malf.h>
X#include <howard/registers.i>
X#include "cz.h"
X#include "78.h"
X
XPRIVATE byteT byte2t[256] = /* Map each byte to a trigram code.*/
X{
X32,/* 0/ 0    0    0   0  NUL (null)                                         */
X32,/* 0/ 1    1    1   1  SOH (start of heading)                             */
X32,/* 0/ 2    2    2   2  STX (start of text)                                */
X32,/* 0/ 3    3    3   3  ETX (end of text)                                  */
X32,/* 0/ 4    4    4   4  EOT (end of transmission)                          */
X32,/* 0/ 5    5    5   5  ENQ (enquiry)                                      */
X32,/* 0/ 6    6    6   6  ACK (acknowledge)                                  */
X32,/* 0/ 7    7    7   7  BEL (bell)                                         */
X32,/* 0/ 8   10    8   8  BS  (backspace)                                    */
X32,/* 0/ 9   11    9   9  HT  (horizontal tabulation)                        */
X32,/* 0/10   12   10   A  LF  (line feed)                                    */
X32,/* 0/11   13   11   B  VT  (vertical tabulation)                          */
X32,/* 0/12   14   12   C  FF  (form feed)                                    */
X32,/* 0/13   15   13   D  CR  (carriage return)                              */
X32,/* 0/14   16   14   E  SO  (shift out)                                    */
X32,/* 0/15   17   15   F  SI  (shift in)                                     */
X32,/* 1/ 0   20   16  10  DLE (data link escape)                             */
X32,/* 1/ 1   21   17  11  DC1 (device control 1)                             */
X32,/* 1/ 2   22   18  12  DC2 (device control 2)                             */
X32,/* 1/ 3   23   19  13  DC3 (device control 3)                             */
X32,/* 1/ 4   24   20  14  DC4 (device control 4)                             */
X32,/* 1/ 5   25   21  15  NAK (negative aknowledge)                          */
X32,/* 1/ 6   26   22  16  SYN (synchronous idle)                             */
X32,/* 1/ 7   27   23  17  ETB (end of transmission block)                    */
X32,/* 1/ 8   30   24  18  CAN (cancel)                                       */
X32,/* 1/ 9   31   25  19  EM  (end of medium)                                */
X32,/* 1/10   32   26  1A  SUB (substitute character)                         */
X32,/* 1/11   33   27  1B  ESC (escape)                                       */
X32,/* 1/12   34   28  1C  IS4/FS (information separator 4 / file separator)  */
X32,/* 1/13   35   29  1D  IS3/GS (information separator 3 / group separator) */
X32,/* 1/14   36   30  1E  IS2/RS (information separator 2 / record separator)*/
X32,/* 1/15   37   31  1F  IS1/US (information separator 1 / unit separator)  */
X32,/* 2/ 0   40   32  20  space                                              */
X32,/* 2/ 1   41   33  21  exclamation mark                                   */
X32,/* 2/ 2   42   34  22  quotation mark                                     */
X32,/* 2/ 3   43   35  23  number sign                                        */
X32,/* 2/ 4   44   36  24  dollar sign                                        */
X32,/* 2/ 5   45   37  25  percent sign                                       */
X32,/* 2/ 6   46   38  26  ampersand                                          */
X32,/* 2/ 7   47   39  27  apostrophe                                         */
X32,/* 2/ 8   50   40  28  left parenthesis                                   */
X32,/* 2/ 9   51   41  29  right parenthesis                                  */
X32,/* 2/10   52   42  2A  asterisk                                           */
X32,/* 2/11   53   43  2B  plus sign                                          */
X32,/* 2/12   54   44  2C  comma                                              */
X32,/* 2/13   55   45  2D  hyphen, minus sign                                 */
X32,/* 2/14   56   46  2E  full stop                                          */
X32,/* 2/15   57   47  2F  solidus                                            */
X32,/* 3/ 0   60   48  30  digit zero                                         */
X32,/* 3/ 1   61   49  31  digit one                                          */
X32,/* 3/ 2   62   50  32  digit two                                          */
X32,/* 3/ 3   63   51  33  digit three                                        */
X32,/* 3/ 4   64   52  34  digit four                                         */
X32,/* 3/ 5   65   53  35  digit five                                         */
X32,/* 3/ 6   66   54  36  digit six                                          */
X32,/* 3/ 7   67   55  37  digit seven                                        */
X32,/* 3/ 8   70   56  38  digit eight                                        */
X32,/* 3/ 9   71   57  39  digit nine                                         */
X32,/* 3/10   72   58  3A  colon                                              */
X32,/* 3/11   73   59  3B  semicolon                                          */
X32,/* 3/12   74   60  3C  less-than sign                                     */
X32,/* 3/13   75   61  3D  equals sign                                        */
X32,/* 3/14   76   62  3E  greater-than sign                                  */
X32,/* 3/15   77   63  3F  question mark                                      */
X32,/* 4/ 0  100   64  40  commercial at                                      */
X 0,/* 4/ 1  101   65  41  A                                                  */
X 1,/* 4/ 2  102   66  42  B                                                  */
X 2,/* 4/ 3  103   67  43  C                                                  */
X 3,/* 4/ 4  104   68  44  D                                                  */
X 4,/* 4/ 5  105   69  45  E                                                  */
X 5,/* 4/ 6  106   70  46  F                                                  */
X 6,/* 4/ 7  107   71  47  G                                                  */
X 7,/* 4/ 8  110   72  48  H                                                  */
X 8,/* 4/ 9  111   73  49  I                                                  */
X 9,/* 4/10  112   74  4A  J                                                  */
X10,/* 4/11  113   75  4B  K                                                  */
X11,/* 4/12  114   76  4C  L                                                  */
X12,/* 4/13  115   77  4D  M                                                  */
X13,/* 4/14  116   78  4E  N                                                  */
X14,/* 4/15  117   79  4F  O                                                  */
X15,/* 5/ 0  120   80  50  P                                                  */
X16,/* 5/ 1  121   81  51  Q                                                  */
X17,/* 5/ 2  122   82  52  R                                                  */
X18,/* 5/ 3  123   83  53  S                                                  */
X19,/* 5/ 4  124   84  54  T                                                  */
X20,/* 5/ 5  125   85  55  U                                                  */
X21,/* 5/ 6  126   86  56  V                                                  */
X22,/* 5/ 7  127   87  57  W                                                  */
X23,/* 5/ 8  130   88  58  X                                                  */
X24,/* 5/ 9  131   89  59  Y                                                  */
X25,/* 5/10  132   90  5A  Z                                                  */
X27,/* 5/11  133   91  5B  left square bracket                                */
X28,/* 5/12  134   92  5C  reverse solidus                                    */
X26,/* 5/13  135   93  5D  right square bracket                               */
X32,/* 5/14  136   94  5E  circumflex accent                                  */
X32,/* 5/15  137   95  5F  low line, underline                                */
X29,/* 6/ 0  140   96  60  grave accent                                       */
X 0,/* 6/ 1  141   97  61  a                                                  */
X 1,/* 6/ 2  142   98  62  b                                                  */
X 2,/* 6/ 3  143   99  63  c                                                  */
X 3,/* 6/ 4  144  100  64  d                                                  */
X 4,/* 6/ 5  145  101  65  e                                                  */
X 5,/* 6/ 6  146  102  66  f                                                  */
X 6,/* 6/ 7  147  103  67  g                                                  */
X 7,/* 6/ 8  150  104  68  h                                                  */
X 8,/* 6/ 9  151  105  69  i                                                  */
X 9,/* 6/10  152  106  6A  j                                                  */
X10,/* 6/11  153  107  6B  k                                                  */
X11,/* 6/12  154  108  6C  l                                                  */
X12,/* 6/13  155  109  6D  m                                                  */
X13,/* 6/14  156  110  6E  n                                                  */
X14,/* 6/15  157  111  6F  o                                                  */
X15,/* 7/ 0  160  112  70  p                                                  */
X16,/* 7/ 1  161  113  71  q                                                  */
X17,/* 7/ 2  162  114  72  r                                                  */
X18,/* 7/ 3  163  115  73  s                                                  */
X19,/* 7/ 4  164  116  74  t                                                  */
X20,/* 7/ 5  165  117  75  u                                                  */
X21,/* 7/ 6  166  118  76  v                                                  */
X22,/* 7/ 7  167  119  77  w                                                  */
X23,/* 7/ 8  170  120  78  x                                                  */
X24,/* 7/ 9  171  121  79  y                                                  */
X25,/* 7/10  172  122  7A  z                                                  */
X27,/* 7/11  173  123  7B  left curly bracket                                 */
X28,/* 7/12  174  124  7C  vertical line                                      */
X26,/* 7/13  175  125  7D  right curly bracket                                */
X32,/* 7/14  176  126  7E  tilde                                              */
X32,/* 7/15  177  127  7F  DEL (delete)                                       */
X32,/* 8/ 0  200  128  80                                                     */
X32,/* 8/ 1  201  129  81                                                     */
X32,/* 8/ 2  202  130  82                                                     */
X32,/* 8/ 3  203  131  83                                                     */
X32,/* 8/ 4  204  132  84                                                     */
X32,/* 8/ 5  205  133  85                                                     */
X32,/* 8/ 6  206  134  86                                                     */
X32,/* 8/ 7  207  135  87                                                     */
X32,/* 8/ 8  210  136  88                                                     */
X32,/* 8/ 9  211  137  89                                                     */
X32,/* 8/10  212  138  8A                                                     */
X32,/* 8/11  213  139  8B                                                     */
X32,/* 8/12  214  140  8C                                                     */
X32,/* 8/13  215  141  8D                                                     */
X32,/* 8/14  216  142  8E                                                     */
X32,/* 8/15  217  143  8F                                                     */
X32,/* 9/ 0  220  144  90                                                     */
X32,/* 9/ 1  221  145  91                                                     */
X32,/* 9/ 2  222  146  92                                                     */
X32,/* 9/ 3  223  147  93                                                     */
X32,/* 9/ 4  224  148  94                                                     */
X32,/* 9/ 5  225  149  95                                                     */
X32,/* 9/ 6  226  150  96                                                     */
X32,/* 9/ 7  227  151  97                                                     */
X32,/* 9/ 8  230  152  98                                                     */
X32,/* 9/ 9  231  153  99                                                     */
X32,/* 9/10  232  154  9A                                                     */
X32,/* 9/11  233  155  9B                                                     */
X32,/* 9/12  234  156  9C                                                     */
X32,/* 9/13  235  157  9D                                                     */
X32,/* 9/14  236  158  9E                                                     */
X32,/* 9/15  237  159  9F                                                     */
X32,/*10/ 0  240  160  A0  NBSP (no-break space)                              */
X32,/*10/ 1  241  161  A1  inverted exclamation mark                          */
X32,/*10/ 2  242  162  A2  cent sign                                          */
X32,/*10/ 3  243  163  A3  pound sign                                         */
X32,/*10/ 4  244  164  A4  general currency sign                              */
X32,/*10/ 5  245  165  A5  yen sign                                           */
X32,/*10/ 6  246  166  A6  broken vertical line                               */
X32,/*10/ 7  247  167  A7  section sign                                       */
X32,/*10/ 8  250  168  A8  diaeresis                                          */
X32,/*10/ 9  251  169  A9  copyright sign                                     */
X32,/*10/10  252  170  AA  ordinal indicator, feminine                        */
X32,/*10/11  253  171  AB  angle quotation mark left                          */
X32,/*10/12  254  172  AC  not sign                                           */
X32,/*10/13  255  173  AD  soft hyphen                                        */
X32,/*10/14  256  174  AE  registered sign                                    */
X32,/*10/15  257  175  AF  macron                                             */
X32,/*11/ 0  260  176  B0  degree sign                                        */
X32,/*11/ 1  261  177  B1  plus or minus sign                                 */
X32,/*11/ 2  262  178  B2  superscript two                                    */
X32,/*11/ 3  263  179  B3  superscript three                                  */
X32,/*11/ 4  264  180  B4  acute accent                                       */
X32,/*11/ 5  265  181  B5  micro sign                                         */
X32,/*11/ 6  266  182  B6  pilcrow                                            */
X32,/*11/ 7  267  183  B7  middle dot                                         */
X32,/*11/ 8  270  184  B8  cedilla                                            */
X32,/*11/ 9  271  185  B9  superscript one                                    */
X32,/*11/10  272  186  BA  ordinal indicator, masculine                       */
X32,/*11/11  273  187  BB  angle quotation mark right                         */
X32,/*11/12  274  188  BC  fraction one-quarter                               */
X32,/*11/13  275  189  BD  fraction one-half                                  */
X32,/*11/14  276  190  BE  fraction three-quarters                            */
X32,/*11/15  277  191  BF  inverted question mark                             */
X32,/*12/ 0  300  192  C0  capital A with grave accent                        */
X32,/*12/ 1  301  193  C1  capital A with acute accent                        */
X32,/*12/ 2  302  194  C2  capital A with circumflex accent                   */
X32,/*12/ 3  303  195  C3  capital A with tilde                               */
X27,/*12/ 4  304  196  C4  capital A with diaeresis or umlaut mark            */
X26,/*12/ 5  305  197  C5  capital A with ring                                */
X32,/*12/ 6  306  198  C6  capital AE diphthong                               */
X32,/*12/ 7  307  199  C7  capital C with cedilla                             */
X32,/*12/ 8  310  200  C8  capital E with grave accent                        */
X32,/*12/ 9  311  201  C9  capital E with acute accent                        */
X32,/*12/10  312  202  CA  capital E with circumflex accent                   */
X32,/*12/11  313  203  CB  capital E with diaeresis or umlaut mark            */
X32,/*12/12  314  204  CC  capital I with grave accent                        */
X32,/*12/13  315  205  CD  capital I with acute accent                        */
X32,/*12/14  316  206  CE  capital I with circumflex accent                   */
X32,/*12/15  317  207  CF  capital I with diaeresis or umlaut mark            */
X32,/*13/ 0  320  208  D0  capital D with stroke, Icelandic eth               */
X32,/*13/ 1  321  209  D1  capital N with tilde                               */
X32,/*13/ 2  322  210  D2  capital O with grave accent                        */
X32,/*13/ 3  323  211  D3  capital O with acute accent                        */
X32,/*13/ 4  324  212  D4  capital O with circumflex accent                   */
X32,/*13/ 5  325  213  D5  capital O with tilde                               */
X28,/*13/ 6  326  214  D6  capital O with diaeresis or umlaut mark            */
X32,/*13/ 7  327  215  D7  multiplication sign                                */
X32,/*13/ 8  330  216  D8  capital O with slash                               */
X32,/*13/ 9  331  217  D9  capital U with grave accent                        */
X32,/*13/10  332  218  DA  capital U with acute accent                        */
X32,/*13/11  333  219  DB  capital U with circumflex accent                   */
X32,/*13/12  334  220  DC  capital U with diaeresis or umlaut mark            */
X32,/*13/13  335  221  DD  capital Y with acute accent                        */
X32,/*13/14  336  222  DE  capital thorn, Icelandic                           */
X32,/*13/15  337  223  DF  small sharp s, German                              */
X32,/*14/ 0  340  224  E0  small a with grave accent                          */
X32,/*14/ 1  341  225  E1  small a with acute accent                          */
X32,/*14/ 2  342  226  E2  small a with circumflex accent                     */
X32,/*14/ 3  343  227  E3  small a with tilde                                 */
X27,/*14/ 4  344  228  E4  small a with diaeresis or umlaut mark              */
X26,/*14/ 5  345  229  E5  small a with ring                                  */
X32,/*14/ 6  346  230  E6  small ae diphthong                                 */
X32,/*14/ 7  347  231  E7  small c with cedilla                               */
X32,/*14/ 8  350  232  E8  small e with grave accent                          */
X29,/*14/ 9  351  233  E9  small e with acute accent                          */
X32,/*14/10  352  234  EA  small e with circumflex accent                     */
X32,/*14/11  353  235  EB  small e with diaeresis or umlaut mark              */
X32,/*14/12  354  236  EC  small i with grave accent                          */
X32,/*14/13  355  237  ED  small i with acute accent                          */
X32,/*14/14  356  238  EE  small i with circumflex accent                     */
X32,/*14/15  357  239  EF  small i with diaeresis or umlaut mark              */
X32,/*15/ 0  360  240  F0  small d with stroke, Icelandic eth                 */
X32,/*15/ 1  361  241  F1  small n with tilde                                 */
X32,/*15/ 2  362  242  F2  small o with grave accent                          */
X32,/*15/ 3  363  243  F3  small o with acute accent                          */
X32,/*15/ 4  364  244  F4  small o with circumflex accent                     */
X32,/*15/ 5  365  245  F5  small o with tilde                                 */
X28,/*15/ 6  366  246  F6  small o with diaeresis or umlaut mark              */
X32,/*15/ 7  367  247  F7  division sign                                      */
X32,/*15/ 8  370  248  F8  small o with slash                                 */
X32,/*15/ 9  371  249  F9  small u with grave accent                          */
X32,/*15/10  372  250  FA  small u with acute accent                          */
X32,/*15/11  373  251  FB  small u with circumflex accent                     */
X32,/*15/12  374  252  FC  small u with diaeresis or umlaut mark              */
X32,/*15/13  375  253  FD  small y with acute accent                          */
X32,/*15/14  376  254  FE  small thorn, Icelandic                             */
X32,/*15/15  377  255  FF  small y with diaeresis or umlaut mark              */
X};
X
X
XPRIVATE byteT se8[256] = /* Map Swedish ISO 646 to ISO 8859/1.*/
X{
X0000,/* 0/ 0    0    0   0  NUL (null)                                       */
X0001,/* 0/ 1    1    1   1  SOH (start of heading)                           */
X0002,/* 0/ 2    2    2   2  STX (start of text)                              */
X0003,/* 0/ 3    3    3   3  ETX (end of text)                                */
X0004,/* 0/ 4    4    4   4  EOT (end of transmission)                        */
X0005,/* 0/ 5    5    5   5  ENQ (enquiry)                                    */
X0006,/* 0/ 6    6    6   6  ACK (acknowledge)                                */
X0007,/* 0/ 7    7    7   7  BEL (bell)                                       */
X0010,/* 0/ 8   10    8   8  BS  (backspace)                                  */
X0011,/* 0/ 9   11    9   9  HT  (horizontal tabulation)                      */
X0012,/* 0/10   12   10   A  LF  (line feed)                                  */
X0013,/* 0/11   13   11   B  VT  (vertical tabulation)                        */
X0014,/* 0/12   14   12   C  FF  (form feed)                                  */
X0015,/* 0/13   15   13   D  CR  (carriage return)                            */
X0016,/* 0/14   16   14   E  SO  (shift out)                                  */
X0017,/* 0/15   17   15   F  SI  (shift in)                                   */
X0020,/* 1/ 0   20   16  10  DLE (data link escape)                           */
X0021,/* 1/ 1   21   17  11  DC1 (device control 1)                           */
X0022,/* 1/ 2   22   18  12  DC2 (device control 2)                           */
X0023,/* 1/ 3   23   19  13  DC3 (device control 3)                           */
X0024,/* 1/ 4   24   20  14  DC4 (device control 4)                           */
X0025,/* 1/ 5   25   21  15  NAK (negative aknowledge)                        */
X0026,/* 1/ 6   26   22  16  SYN (synchronous idle)                           */
X0027,/* 1/ 7   27   23  17  ETB (end of transmission block)                  */
X0030,/* 1/ 8   30   24  18  CAN (cancel)                                     */
X0031,/* 1/ 9   31   25  19  EM  (end of medium)                              */
X0032,/* 1/10   32   26  1A  SUB (substitute character)                       */
X0033,/* 1/11   33   27  1B  ESC (escape)                                     */
X0034,/* 1/12   34   28  1C  IS4/FS (information separator 4 / file separator)*/
X0035,/* 1/13   35   29  1D  IS3/GS (information separator 3 / group separator) */
X0036,/* 1/14   36   30  1E  IS2/RS (information separator 2 / record separator)*/
X0037,/* 1/15   37   31  1F  IS1/US (information separator 1 / unit separator)*/
X0040,/* 2/ 0   40   32  20  space                                            */
X0041,/* 2/ 1   41   33  21  exclamation mark                                 */
X0042,/* 2/ 2   42   34  22  quotation mark                                   */
X0043,/* 2/ 3   43   35  23  number sign                                      */
X0044,/* 2/ 4   44   36  24  dollar sign                                      */
X0045,/* 2/ 5   45   37  25  percent sign                                     */
X0046,/* 2/ 6   46   38  26  ampersand                                        */
X0047,/* 2/ 7   47   39  27  apostrophe                                       */
X0050,/* 2/ 8   50   40  28  left parenthesis                                 */
X0051,/* 2/ 9   51   41  29  right parenthesis                                */
X0052,/* 2/10   52   42  2A  asterisk                                         */
X0053,/* 2/11   53   43  2B  plus sign                                        */
X0054,/* 2/12   54   44  2C  comma                                            */
X0055,/* 2/13   55   45  2D  hyphen, minus sign                               */
X0056,/* 2/14   56   46  2E  full stop                                        */
X0057,/* 2/15   57   47  2F  solidus                                          */
X0060,/* 3/ 0   60   48  30  digit zero                                       */
X0061,/* 3/ 1   61   49  31  digit one                                        */
X0062,/* 3/ 2   62   50  32  digit two                                        */
X0063,/* 3/ 3   63   51  33  digit three                                      */
X0064,/* 3/ 4   64   52  34  digit four                                       */
X0065,/* 3/ 5   65   53  35  digit five                                       */
X0066,/* 3/ 6   66   54  36  digit six                                        */
X0067,/* 3/ 7   67   55  37  digit seven                                      */
X0070,/* 3/ 8   70   56  38  digit eight                                      */
X0071,/* 3/ 9   71   57  39  digit nine                                       */
X0072,/* 3/10   72   58  3A  colon                                            */
X0073,/* 3/11   73   59  3B  semicolon                                        */
X0074,/* 3/12   74   60  3C  less-than sign                                   */
X0075,/* 3/13   75   61  3D  equals sign                                      */
X0076,/* 3/14   76   62  3E  greater-than sign                                */
X0077,/* 3/15   77   63  3F  question mark                                    */
X0100,/* 4/ 0  100   64  40  commercial at                                    */
X0101,/* 4/ 1  101   65  41  A                                                */
X0102,/* 4/ 2  102   66  42  B                                                */
X0103,/* 4/ 3  103   67  43  C                                                */
X0104,/* 4/ 4  104   68  44  D                                                */
X0105,/* 4/ 5  105   69  45  E                                                */
X0106,/* 4/ 6  106   70  46  F                                                */
X0107,/* 4/ 7  107   71  47  G                                                */
X0110,/* 4/ 8  110   72  48  H                                                */
X0111,/* 4/ 9  111   73  49  I                                                */
X0112,/* 4/10  112   74  4A  J                                                */
X0113,/* 4/11  113   75  4B  K                                                */
X0114,/* 4/12  114   76  4C  L                                                */
X0115,/* 4/13  115   77  4D  M                                                */
X0116,/* 4/14  116   78  4E  N                                                */
X0117,/* 4/15  117   79  4F  O                                                */
X0120,/* 5/ 0  120   80  50  P                                                */
X0121,/* 5/ 1  121   81  51  Q                                                */
X0122,/* 5/ 2  122   82  52  R                                                */
X0123,/* 5/ 3  123   83  53  S                                                */
X0124,/* 5/ 4  124   84  54  T                                                */
X0125,/* 5/ 5  125   85  55  U                                                */
X0126,/* 5/ 6  126   86  56  V                                                */
X0127,/* 5/ 7  127   87  57  W                                                */
X0130,/* 5/ 8  130   88  58  X                                                */
X0131,/* 5/ 9  131   89  59  Y                                                */
X0132,/* 5/10  132   90  5A  Z                                                */
X0304,/* 5/11  133   91  5B  left square bracket                              */
X0326,/* 5/12  134   92  5C  reverse solidus                                  */
X0305,/* 5/13  135   93  5D  right square bracket                             */
X0136,/* 5/14  136   94  5E  circumflex accent                                */
X0137,/* 5/15  137   95  5F  low line, underline                              */
X0351,/* 6/ 0  140   96  60  grave accent                                     */
X0141,/* 6/ 1  141   97  61  a                                                */
X0142,/* 6/ 2  142   98  62  b                                                */
X0143,/* 6/ 3  143   99  63  c                                                */
X0144,/* 6/ 4  144  100  64  d                                                */
X0145,/* 6/ 5  145  101  65  e                                                */
X0146,/* 6/ 6  146  102  66  f                                                */
X0147,/* 6/ 7  147  103  67  g                                                */
X0150,/* 6/ 8  150  104  68  h                                                */
X0151,/* 6/ 9  151  105  69  i                                                */
X0152,/* 6/10  152  106  6A  j                                                */
X0153,/* 6/11  153  107  6B  k                                                */
X0154,/* 6/12  154  108  6C  l                                                */
X0155,/* 6/13  155  109  6D  m                                                */
X0156,/* 6/14  156  110  6E  n                                                */
X0157,/* 6/15  157  111  6F  o                                                */
X0160,/* 7/ 0  160  112  70  p                                                */
X0161,/* 7/ 1  161  113  71  q                                                */
X0162,/* 7/ 2  162  114  72  r                                                */
X0163,/* 7/ 3  163  115  73  s                                                */
X0164,/* 7/ 4  164  116  74  t                                                */
X0165,/* 7/ 5  165  117  75  u                                                */
X0166,/* 7/ 6  166  118  76  v                                                */
X0167,/* 7/ 7  167  119  77  w                                                */
X0170,/* 7/ 8  170  120  78  x                                                */
X0171,/* 7/ 9  171  121  79  y                                                */
X0172,/* 7/10  172  122  7A  z                                                */
X0344,/* 7/11  173  123  7B  left curly bracket                               */
X0366,/* 7/12  174  124  7C  vertical line                                    */
X0345,/* 7/13  175  125  7D  right curly bracket                              */
X0176,/* 7/14  176  126  7E  tilde                                            */
X0177,/* 7/15  177  127  7F  DEL (delete)                                     */
X0200,/* 8/ 0  200  128  80                                                   */
X0201,/* 8/ 1  201  129  81                                                   */
X0202,/* 8/ 2  202  130  82                                                   */
X0203,/* 8/ 3  203  131  83                                                   */
X0204,/* 8/ 4  204  132  84                                                   */
X0205,/* 8/ 5  205  133  85                                                   */
X0206,/* 8/ 6  206  134  86                                                   */
X0207,/* 8/ 7  207  135  87                                                   */
X0210,/* 8/ 8  210  136  88                                                   */
X0211,/* 8/ 9  211  137  89                                                   */
X0212,/* 8/10  212  138  8A                                                   */
X0213,/* 8/11  213  139  8B                                                   */
X0214,/* 8/12  214  140  8C                                                   */
X0215,/* 8/13  215  141  8D                                                   */
X0216,/* 8/14  216  142  8E                                                   */
X0217,/* 8/15  217  143  8F                                                   */
X0220,/* 9/ 0  220  144  90                                                   */
X0221,/* 9/ 1  221  145  91                                                   */
X0222,/* 9/ 2  222  146  92                                                   */
X0223,/* 9/ 3  223  147  93                                                   */
X0224,/* 9/ 4  224  148  94                                                   */
X0225,/* 9/ 5  225  149  95                                                   */
X0226,/* 9/ 6  226  150  96                                                   */
X0227,/* 9/ 7  227  151  97                                                   */
X0230,/* 9/ 8  230  152  98                                                   */
X0231,/* 9/ 9  231  153  99                                                   */
X0232,/* 9/10  232  154  9A                                                   */
X0233,/* 9/11  233  155  9B                                                   */
X0234,/* 9/12  234  156  9C                                                   */
X0235,/* 9/13  235  157  9D                                                   */
X0236,/* 9/14  236  158  9E                                                   */
X0237,/* 9/15  237  159  9F                                                   */
X0240,/*10/ 0  240  160  A0  NBSP (no-break space)                            */
X0241,/*10/ 1  241  161  A1  inverted exclamation mark                        */
X0242,/*10/ 2  242  162  A2  cent sign                                        */
X0243,/*10/ 3  243  163  A3  pound sign                                       */
X0244,/*10/ 4  244  164  A4  general currency sign                            */
X0245,/*10/ 5  245  165  A5  yen sign                                         */
X0246,/*10/ 6  246  166  A6  broken vertical line                             */
X0247,/*10/ 7  247  167  A7  section sign                                     */
X0250,/*10/ 8  250  168  A8  diaeresis                                        */
X0251,/*10/ 9  251  169  A9  copyright sign                                   */
X0252,/*10/10  252  170  AA  ordinal indicator, feminine                      */
X0253,/*10/11  253  171  AB  angle quotation mark left                        */
X0254,/*10/12  254  172  AC  not sign                                         */
X0255,/*10/13  255  173  AD  soft hyphen                                      */
X0256,/*10/14  256  174  AE  registered sign                                  */
X0257,/*10/15  257  175  AF  macron                                           */
X0260,/*11/ 0  260  176  B0  degree sign                                      */
X0261,/*11/ 1  261  177  B1  plus or minus sign                               */
X0262,/*11/ 2  262  178  B2  superscript two                                  */
X0263,/*11/ 3  263  179  B3  superscript three                                */
X0264,/*11/ 4  264  180  B4  acute accent                                     */
X0265,/*11/ 5  265  181  B5  micro sign                                       */
X0266,/*11/ 6  266  182  B6  pilcrow                                          */
X0267,/*11/ 7  267  183  B7  middle dot                                       */
X0270,/*11/ 8  270  184  B8  cedilla                                          */
X0271,/*11/ 9  271  185  B9  superscript one                                  */
X0272,/*11/10  272  186  BA  ordinal indicator, masculine                     */
X0273,/*11/11  273  187  BB  angle quotation mark right                       */
X0274,/*11/12  274  188  BC  fraction one-quarter                             */
X0275,/*11/13  275  189  BD  fraction one-half                                */
X0276,/*11/14  276  190  BE  fraction three-quarters                          */
X0277,/*11/15  277  191  BF  inverted question mark                           */
X0300,/*12/ 0  300  192  C0  capital A with grave accent                      */
X0301,/*12/ 1  301  193  C1  capital A with acute accent                      */
X0302,/*12/ 2  302  194  C2  capital A with circumflex accent                 */
X0303,/*12/ 3  303  195  C3  capital A with tilde                             */
X0304,/*12/ 4  304  196  C4  capital A with diaeresis or umlaut mark          */
X0305,/*12/ 5  305  197  C5  capital A with ring                              */
X0306,/*12/ 6  306  198  C6  capital AE diphthong                             */
X0307,/*12/ 7  307  199  C7  capital C with cedilla                           */
X0310,/*12/ 8  310  200  C8  capital E with grave accent                      */
X0311,/*12/ 9  311  201  C9  capital E with acute accent                      */
X0312,/*12/10  312  202  CA  capital E with circumflex accent                 */
X0313,/*12/11  313  203  CB  capital E with diaeresis or umlaut mark          */
X0314,/*12/12  314  204  CC  capital I with grave accent                      */
X0315,/*12/13  315  205  CD  capital I with acute accent                      */
X0316,/*12/14  316  206  CE  capital I with circumflex accent                 */
X0317,/*12/15  317  207  CF  capital I with diaeresis or umlaut mark          */
X0320,/*13/ 0  320  208  D0  capital D with stroke, Icelandic eth             */
X0321,/*13/ 1  321  209  D1  capital N with tilde                             */
X0322,/*13/ 2  322  210  D2  capital O with grave accent                      */
X0323,/*13/ 3  323  211  D3  capital O with acute accent                      */
X0324,/*13/ 4  324  212  D4  capital O with circumflex accent                 */
X0325,/*13/ 5  325  213  D5  capital O with tilde                             */
X0326,/*13/ 6  326  214  D6  capital O with diaeresis or umlaut mark          */
X0327,/*13/ 7  327  215  D7  multiplication sign                              */
X0330,/*13/ 8  330  216  D8  capital O with slash                             */
X0331,/*13/ 9  331  217  D9  capital U with grave accent                      */
X0332,/*13/10  332  218  DA  capital U with acute accent                      */
X0333,/*13/11  333  219  DB  capital U with circumflex accent                 */
X0334,/*13/12  334  220  DC  capital U with diaeresis or umlaut mark          */
X0335,/*13/13  335  221  DD  capital Y with acute accent                      */
X0336,/*13/14  336  222  DE  capital thorn, Icelandic                         */
X0337,/*13/15  337  223  DF  small sharp s, German                            */
X0340,/*14/ 0  340  224  E0  small a with grave accent                        */
X0341,/*14/ 1  341  225  E1  small a with acute accent                        */
X0342,/*14/ 2  342  226  E2  small a with circumflex accent                   */
X0343,/*14/ 3  343  227  E3  small a with tilde                               */
X0344,/*14/ 4  344  228  E4  small a with diaeresis or umlaut mark            */
X0345,/*14/ 5  345  229  E5  small a with ring                                */
X0346,/*14/ 6  346  230  E6  small ae diphthong                               */
X0347,/*14/ 7  347  231  E7  small c with cedilla                             */
X0350,/*14/ 8  350  232  E8  small e with grave accent                        */
X0351,/*14/ 9  351  233  E9  small e with acute accent                        */
X0352,/*14/10  352  234  EA  small e with circumflex accent                   */
X0353,/*14/11  353  235  EB  small e with diaeresis or umlaut mark            */
X0354,/*14/12  354  236  EC  small i with grave accent                        */
X0355,/*14/13  355  237  ED  small i with acute accent                        */
X0356,/*14/14  356  238  EE  small i with circumflex accent                   */
X0357,/*14/15  357  239  EF  small i with diaeresis or umlaut mark            */
X0360,/*15/ 0  360  240  F0  small d with stroke, Icelandic eth               */
X0361,/*15/ 1  361  241  F1  small n with tilde                               */
X0362,/*15/ 2  362  242  F2  small o with grave accent                        */
X0363,/*15/ 3  363  243  F3  small o with acute accent                        */
X0364,/*15/ 4  364  244  F4  small o with circumflex accent                   */
X0365,/*15/ 5  365  245  F5  small o with tilde                               */
X0366,/*15/ 6  366  246  F6  small o with diaeresis or umlaut mark            */
X0367,/*15/ 7  367  247  F7  division sign                                    */
X0370,/*15/ 8  370  248  F8  small o with slash                               */
X0371,/*15/ 9  371  249  F9  small u with grave accent                        */
X0372,/*15/10  372  250  FA  small u with acute accent                        */
X0373,/*15/11  373  251  FB  small u with circumflex accent                   */
X0374,/*15/12  374  252  FC  small u with diaeresis or umlaut mark            */
X0375,/*15/13  375  253  FD  small y with acute accent                        */
X0376,/*15/14  376  254  FE  small thorn, Icelandic                           */
X0377,/*15/15  377  255  FF  small y with diaeresis or umlaut mark            */
X};
X
X#include "78common.h"
X
X/* Different sections in a file: */
X#define S_HDR  1 /* News article header.*/
X#define S_BODY 2 /* News article body.*/
X#define S_SIG  3 /* News article signature.*/
X
XPRIVATE double   attack = 0.65;   /* Smoothing factor.*/
XPRIVATE double   blank  = 0.7;    /* Scale attack/decay on blank lines.*/
XPRIVATE double   bodval = -200.0; /* Score at start of body.*/
XPRIVATE double   colon  = 0.5;    /* Scale attack/decay after colon.*/
XPRIVATE boolT    debug = FALSE;   /* Debug flag.*/
XPRIVATE double   decay = 0.67;    /* Smoothing factor.*/
XPRIVATE boolT    fixbody = FALSE; /* Ordinary file, no header or signature.*/
XPRIVATE double   headval = 0.0;   /* For header values.*/
XPRIVATE boolT    mailbox = FALSE; /* Converting a mailbox.*/
XPRIVATE double   pound1 = -350.0; /* After # at beginning of line.*/
XPRIVATE unsigned siglns = 9;      /* Max lines in a signature.*/
XPRIVATE double   thresh = 0.0;    /* Score above this is Swedish.*/
XPRIVATE triDifT  seustt[TRIMAX + 1];/* Trigram difference table.*/
X
XPRIVATE bStrT sewords[] = /* These are always Swedish.*/
X   {
X   S("D}"),
X   S("p}"),
X   S("s}"),
X   S("{r"),
X   S("|ver"),
X   NULBSTR
X   };
X
XPRIVATE bStrT uswords[] = /* These are never Swedish.*/
X   {
X   S("[]"),
X   S("[The"),
X   NULBSTR
X   };
X
X#include "78heur.h"
X
X/* seus - run heuristics on one file */
X
XPRIVATE void seus (is, fn)
XR9 streamT is; /* Input stream.*/
X   bStrT   fn; /* File name.*/
X
X/* Function:
X *    Copy file to standard output, converting to ISO 8859/1.
X * Algorithm:
X *    Read each line.  Switch on section and look for section
X *    transitions.  Step through the line.  Look for section matches.
X *    Call dif78() on each word.  Compute score.  If word looks Swedish,
X *    convert it.  Write line.
X * Returns:
X *    
X * Notes:
X *    
X */
X{
XR2 rcharT   b;             /* Current input byte.*/
XR4 int      i;             /* General putpose.*/
X   double   cum = 0.0;     /* Cumulative score.*/
X   unsigned ln = 0;        /* Input line number.*/
X   int      lns = -1;      /* Value from Lines: header field; -1 = unknown.*/
XR8 unsigned sigln = 1;     /* Line number in signature.*/
XR5 bStrT    p1;            /* Rest of line after special match.*/
XR1 bStrT    lp;            /* Steps through lb[].*/
XR7 boolT    sigbeg;        /* Line looks like start of signature.*/
XR3 bStrT    wp = NULBSTR;  /* Points to start of word.*/
XR6 unsigned sect;          /* Current section.*/
X   byteT    lb[MLINE + 1]; /* Line buffer.*/
X
Xsect = (fixbody ? S_BODY : S_HDR);
Xlb[0] = ' ';
Xwhile (NULBSTR != (getlin ((lp = &lb[1]), MLINE, is, fn, &ln, 0)))
X   {
X   if (mailbox && (NULBSTR != prefix (S("From "), lp)))
X      {
X      cum = 0.0;
X      ln = 1;
X      lns = -1;
X      sect = S_HDR;
X      sigln = 1;
X      sigbeg = FALSE;
X      }
X   else
X      sigbeg = SigBegP (lp);
X   switch (sect)
X      {
X      case S_HDR:
X         if (EOS == B(*lp))
X            {
X            sect = S_BODY;
X            cum = bodval;
X            ln = 0;
X            }
X         else
X            {
X            if (NULBSTR != (p1 = prefix (S("Lines: "), lp)))
X               (void) a2i (p1, NULBSTR, TRUE, &lns, (bStrT *) NULL);
X            if (NULBSTR != (p1 = bStrChr (lp, ':')))
X               {
X               cum = headval;
X               lp = p1 + 1;
X               }
X            }
X         break;
X      case S_BODY:
X         if (sigbeg || (!fixbody && (lns > siglns) && (ln > (lns - siglns))))
X            sect = S_SIG;
X         else
X            {
X            for (; '>' == B(*lp); ++lp)
X               ;
X            if (('#' == B(*lp)) || ('X' == B(*lp)))
X               cum = MIN (cum, pound1);
X            }
X         break;
X      case S_SIG:
X         if (sigbeg)
X            sigln = 1;
X         else if (sigln <= siglns)
X            ++sigln;
X         else
X            {
X            sigln = 1;
X            sect = S_BODY;
X            }
X         break;
X      default:
X         malf1 (eIntern, "seus 1");
X         break;
X      }
X   if (EOS == B(*lp)) cum *= blank * ((cum > thresh) ? decay : attack);
X   do
X      {
X      b = B(*lp);
X      if ((NULBSTR != (p1 = BraceP (lp, sect))) ||
X          (NULBSTR != (p1 = UunetP (lp))) ||
X          (NULBSTR != (p1 = IPP (lp))) ||
X          (NULBSTR != (p1 = InArtP (lp, sect))) ||
X          (NULBSTR != (p1 = GrafP (lp, sect))) ||
X          (NULBSTR != (p1 = PipeP (lp, sect))) ||
X          (NULBSTR != (p1 = EndP (lp, sect, S(") writes:")))) ||
X          (NULBSTR != (p1 = EndP (lp, sect, S(" \\n\\")))) ||
X          (NULBSTR != (p1 = LaTeXP (lp))))
X         {
X         lp = p1;
X         wp = NULBSTR;
X         }
X      else
X         {
X         if (byte2t[b] <= TRIHI)
X            {
X            if (NULBSTR == wp) wp = lp;
X            }
X         else
X            {
X            if (NULBSTR != wp)
X               {
X               i = dif78 (wp, lp, seustt);
X               cum *= ((i > 0) ? attack : decay);
X               cum += i;
X               if (((cum > thresh) && !wordp (wp, lp, uswords)) ||
X                   wordp (wp, lp, sewords))
X                  {
X                  for (p1 = wp; p1 != lp; ++p1)
X                     *p1 = se8[B(*p1)];
X                  }
X               if (debug)
X                  FPRINTF (stderr, "%c%6.0f %.*s\n", "?hbsf"[sect], cum,
X                           lp - wp, wp);
X               wp = NULBSTR;
X               if (':' == b)
X                  cum *= colon * ((cum > thresh) ? decay : attack);
X               }
X            }
X         ++lp;
X         }
X      }
X   while (EOS != b);
X   puts (&lb[1]);
X   }
X}
X
X/* main - main function							*/
X
XPUBLIC int main (argc, argv)
X   int    argc; /* Number of arguments.*/
XR3 bStrT *argv; /* Points to array of argument strings.*/
X
X/* Function:
X *	
X * Algorithm:
X *    Decode args.  Initialize.  Call seus().
X * Notes:
X *	
X */
X
X{
XR1     rcharT c;     /* Option letter.*/
XR2     bStrT cp;     /* Steps through args.*/
Xextern int optind;   /* See getopt (3).*/
Xextern cStrT optarg; /* See getopt (3).*/
X
Xwhile (EOF != (c = getopt (argc, (cStrT *) argv, "#:A:B:D:b:c:dfh:ms:t:")))
X   {
X   switch (c)
X      {
X      case '?':
X         usage();
X         break;
X      case '#':
X         pound1 = ma2d ((bStrT) optarg, NULBSTR, FALSE, "# Value",
X                       (bStrT *) NULL);
X         break;
X      case 'A':
X         attack = mra2d ((bStrT) optarg, NULBSTR, FALSE, "Attack", 0.001, 0.999,
X                       (bStrT *) NULL);
X         break;
X      case 'B':
X         blank = ma2d ((bStrT) optarg, NULBSTR, FALSE, S("Blank smoothing"),
X                       (bStrT *) NULL);
X         break;
X      case 'D':
X         decay = mra2d ((bStrT) optarg, NULBSTR, FALSE, "Decay", 0.001, 0.999,
X                       (bStrT *) NULL);
X         break;
X      case 'b':
X         bodval = ma2d ((bStrT) optarg, NULBSTR, FALSE, "Body Value",
X                       (bStrT *) NULL);
X         break;
X      case 'c':
X         colon = ma2d ((bStrT) optarg, NULBSTR, FALSE, S("Colon Smoothing"),
X                       (bStrT *) NULL);
X         break;
X      case 'd':
X         debug = TRUE;
X         break;
X      case 'f':
X         fixbody = TRUE;
X         break;
X      case 'h':
X         headval = ma2d ((bStrT) optarg, NULBSTR, FALSE, S("Header Value"),
X                         (bStrT *) NULL);
X         break;
X      case 'm':
X         mailbox = TRUE;
X         break;
X      case 's':
X         siglns = mra2u ((bStrT) optarg, NULBSTR, FALSE, "Max signature lines",
X                         (unsigned) 1, (unsigned) 99, (bStrT *) NULL);
X         break;
X      case 't':
X         thresh = ma2d ((bStrT) optarg, NULBSTR, FALSE, "Threshold",
X                       (bStrT *) NULL);
X         break;
X      default:
X         malf1 (eIntern, "main 1");
X         break;
X      }
X   }
Xargv += optind;
Xcp = *argv++;
Xif (NULBSTR != cp) usage();
Xipath();
Xmrdtri (S("seus"), (bStrT) seustt);
Xseus (stdin, S("Standard Input"));
Xmfflush (stdout, "Standard Output");
Xexit (SUCCESS);
X
X#ifdef lint
Xreturn (SUCCESS);
X#endif
X}
END_OF_FILE
if test 50344 -ne `wc -c <'78seus.c'`; then
    echo shar: \"'78seus.c'\" unpacked with wrong size!
fi
# end of '78seus.c'
fi
echo shar: End of archive 10 \(of 14\).
cp /dev/null ark10isdone
MISSING=""
for I in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ; do
    if test ! -f ark${I}isdone ; then
	MISSING="${MISSING} ${I}"
    fi
done
if test "${MISSING}" = "" ; then
    echo You have unpacked all 14 archives.
    rm -f ark[1-9]isdone ark[1-9][0-9]isdone
else
    echo You still need to unpack the following archives:
    echo "        " ${MISSING}
fi
##  End of shell archive.
exit 0