allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc) (10/01/89)
Posting-number: Volume 8, Issue 74 Submitted-by: howard@dahlbeck.ericsson.se (Howard Gayle) Archive-name: cz/part10 #! /bin/sh # This is a shell archive. Remove anything before this line, then feed it # into a shell via "sh file" or similar. To overwrite existing files, # type "sh file -c". # The tool that generated this appeared in the comp.sources.unix newsgroup; # send mail to comp-sources-unix@uunet.uu.net if you want that tool. # If this archive is complete, you will see the following message at the end: # "End of archive 10 (of 14)." # Contents: 78seus.c # Wrapped by howard@dahlbeck on Mon Sep 25 07:15:23 1989 PATH=/bin:/usr/bin:/usr/ucb ; export PATH if test -f '78seus.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'78seus.c'\" else echo shar: Extracting \"'78seus.c'\" \(50344 characters\) sed "s/^X//" >'78seus.c' <<'END_OF_FILE' X/* X * 78seus - convert Swedish or (US) English from ISO 646 to ISO 8859/1 X */ X X#ifndef lint Xstatic char _cpyrgt[] = "Copyright 1989 Howard Lee Gayle"; X#endif lint X X/* X * This program is free software; you can redistribute it and/or modify X * it under the terms of the GNU General Public License version 1, X * as published by the Free Software Foundation. X * X * This program is distributed in the hope that it will be useful, X * but WITHOUT ANY WARRANTY; without even the implied warranty of X * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the X * GNU General Public License for more details. X * X * You should have received a copy of the GNU General Public License X * along with this program; if not, write to the Free Software X * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. X */ X X#include <stdio.h> X#include <howard/port.h> X#include <howard/version.h> X#include <howard/usage.h> X XMAINVER ("@(#)$Header: 78seus.c,v 1.43 89/08/28 18:39:58 howard Exp $"); XUSAGE ("[-# shar-points] [-A attack] [-B blank-line-smoothing] [-D decay] [-b body-points] [-c colon-smoothing] [-d] [-f] [-m] [-s lines] [-t threshold]"); X X#include <ctype.h> X#include <limits.h> X#include <string.h> X#include <howard/a2.h> X#include <howard/malf.h> X#include <howard/registers.i> X#include "cz.h" X#include "78.h" X XPRIVATE byteT byte2t[256] = /* Map each byte to a trigram code.*/ X{ X32,/* 0/ 0 0 0 0 NUL (null) */ X32,/* 0/ 1 1 1 1 SOH (start of heading) */ X32,/* 0/ 2 2 2 2 STX (start of text) */ X32,/* 0/ 3 3 3 3 ETX (end of text) */ X32,/* 0/ 4 4 4 4 EOT (end of transmission) */ X32,/* 0/ 5 5 5 5 ENQ (enquiry) */ X32,/* 0/ 6 6 6 6 ACK (acknowledge) */ X32,/* 0/ 7 7 7 7 BEL (bell) */ X32,/* 0/ 8 10 8 8 BS (backspace) */ X32,/* 0/ 9 11 9 9 HT (horizontal tabulation) */ X32,/* 0/10 12 10 A LF (line feed) */ X32,/* 0/11 13 11 B VT (vertical tabulation) */ X32,/* 0/12 14 12 C FF (form feed) */ X32,/* 0/13 15 13 D CR (carriage return) */ X32,/* 0/14 16 14 E SO (shift out) */ X32,/* 0/15 17 15 F SI (shift in) */ X32,/* 1/ 0 20 16 10 DLE (data link escape) */ X32,/* 1/ 1 21 17 11 DC1 (device control 1) */ X32,/* 1/ 2 22 18 12 DC2 (device control 2) */ X32,/* 1/ 3 23 19 13 DC3 (device control 3) */ X32,/* 1/ 4 24 20 14 DC4 (device control 4) */ X32,/* 1/ 5 25 21 15 NAK (negative aknowledge) */ X32,/* 1/ 6 26 22 16 SYN (synchronous idle) */ X32,/* 1/ 7 27 23 17 ETB (end of transmission block) */ X32,/* 1/ 8 30 24 18 CAN (cancel) */ X32,/* 1/ 9 31 25 19 EM (end of medium) */ X32,/* 1/10 32 26 1A SUB (substitute character) */ X32,/* 1/11 33 27 1B ESC (escape) */ X32,/* 1/12 34 28 1C IS4/FS (information separator 4 / file separator) */ X32,/* 1/13 35 29 1D IS3/GS (information separator 3 / group separator) */ X32,/* 1/14 36 30 1E IS2/RS (information separator 2 / record separator)*/ X32,/* 1/15 37 31 1F IS1/US (information separator 1 / unit separator) */ X32,/* 2/ 0 40 32 20 space */ X32,/* 2/ 1 41 33 21 exclamation mark */ X32,/* 2/ 2 42 34 22 quotation mark */ X32,/* 2/ 3 43 35 23 number sign */ X32,/* 2/ 4 44 36 24 dollar sign */ X32,/* 2/ 5 45 37 25 percent sign */ X32,/* 2/ 6 46 38 26 ampersand */ X32,/* 2/ 7 47 39 27 apostrophe */ X32,/* 2/ 8 50 40 28 left parenthesis */ X32,/* 2/ 9 51 41 29 right parenthesis */ X32,/* 2/10 52 42 2A asterisk */ X32,/* 2/11 53 43 2B plus sign */ X32,/* 2/12 54 44 2C comma */ X32,/* 2/13 55 45 2D hyphen, minus sign */ X32,/* 2/14 56 46 2E full stop */ X32,/* 2/15 57 47 2F solidus */ X32,/* 3/ 0 60 48 30 digit zero */ X32,/* 3/ 1 61 49 31 digit one */ X32,/* 3/ 2 62 50 32 digit two */ X32,/* 3/ 3 63 51 33 digit three */ X32,/* 3/ 4 64 52 34 digit four */ X32,/* 3/ 5 65 53 35 digit five */ X32,/* 3/ 6 66 54 36 digit six */ X32,/* 3/ 7 67 55 37 digit seven */ X32,/* 3/ 8 70 56 38 digit eight */ X32,/* 3/ 9 71 57 39 digit nine */ X32,/* 3/10 72 58 3A colon */ X32,/* 3/11 73 59 3B semicolon */ X32,/* 3/12 74 60 3C less-than sign */ X32,/* 3/13 75 61 3D equals sign */ X32,/* 3/14 76 62 3E greater-than sign */ X32,/* 3/15 77 63 3F question mark */ X32,/* 4/ 0 100 64 40 commercial at */ X 0,/* 4/ 1 101 65 41 A */ X 1,/* 4/ 2 102 66 42 B */ X 2,/* 4/ 3 103 67 43 C */ X 3,/* 4/ 4 104 68 44 D */ X 4,/* 4/ 5 105 69 45 E */ X 5,/* 4/ 6 106 70 46 F */ X 6,/* 4/ 7 107 71 47 G */ X 7,/* 4/ 8 110 72 48 H */ X 8,/* 4/ 9 111 73 49 I */ X 9,/* 4/10 112 74 4A J */ X10,/* 4/11 113 75 4B K */ X11,/* 4/12 114 76 4C L */ X12,/* 4/13 115 77 4D M */ X13,/* 4/14 116 78 4E N */ X14,/* 4/15 117 79 4F O */ X15,/* 5/ 0 120 80 50 P */ X16,/* 5/ 1 121 81 51 Q */ X17,/* 5/ 2 122 82 52 R */ X18,/* 5/ 3 123 83 53 S */ X19,/* 5/ 4 124 84 54 T */ X20,/* 5/ 5 125 85 55 U */ X21,/* 5/ 6 126 86 56 V */ X22,/* 5/ 7 127 87 57 W */ X23,/* 5/ 8 130 88 58 X */ X24,/* 5/ 9 131 89 59 Y */ X25,/* 5/10 132 90 5A Z */ X27,/* 5/11 133 91 5B left square bracket */ X28,/* 5/12 134 92 5C reverse solidus */ X26,/* 5/13 135 93 5D right square bracket */ X32,/* 5/14 136 94 5E circumflex accent */ X32,/* 5/15 137 95 5F low line, underline */ X29,/* 6/ 0 140 96 60 grave accent */ X 0,/* 6/ 1 141 97 61 a */ X 1,/* 6/ 2 142 98 62 b */ X 2,/* 6/ 3 143 99 63 c */ X 3,/* 6/ 4 144 100 64 d */ X 4,/* 6/ 5 145 101 65 e */ X 5,/* 6/ 6 146 102 66 f */ X 6,/* 6/ 7 147 103 67 g */ X 7,/* 6/ 8 150 104 68 h */ X 8,/* 6/ 9 151 105 69 i */ X 9,/* 6/10 152 106 6A j */ X10,/* 6/11 153 107 6B k */ X11,/* 6/12 154 108 6C l */ X12,/* 6/13 155 109 6D m */ X13,/* 6/14 156 110 6E n */ X14,/* 6/15 157 111 6F o */ X15,/* 7/ 0 160 112 70 p */ X16,/* 7/ 1 161 113 71 q */ X17,/* 7/ 2 162 114 72 r */ X18,/* 7/ 3 163 115 73 s */ X19,/* 7/ 4 164 116 74 t */ X20,/* 7/ 5 165 117 75 u */ X21,/* 7/ 6 166 118 76 v */ X22,/* 7/ 7 167 119 77 w */ X23,/* 7/ 8 170 120 78 x */ X24,/* 7/ 9 171 121 79 y */ X25,/* 7/10 172 122 7A z */ X27,/* 7/11 173 123 7B left curly bracket */ X28,/* 7/12 174 124 7C vertical line */ X26,/* 7/13 175 125 7D right curly bracket */ X32,/* 7/14 176 126 7E tilde */ X32,/* 7/15 177 127 7F DEL (delete) */ X32,/* 8/ 0 200 128 80 */ X32,/* 8/ 1 201 129 81 */ X32,/* 8/ 2 202 130 82 */ X32,/* 8/ 3 203 131 83 */ X32,/* 8/ 4 204 132 84 */ X32,/* 8/ 5 205 133 85 */ X32,/* 8/ 6 206 134 86 */ X32,/* 8/ 7 207 135 87 */ X32,/* 8/ 8 210 136 88 */ X32,/* 8/ 9 211 137 89 */ X32,/* 8/10 212 138 8A */ X32,/* 8/11 213 139 8B */ X32,/* 8/12 214 140 8C */ X32,/* 8/13 215 141 8D */ X32,/* 8/14 216 142 8E */ X32,/* 8/15 217 143 8F */ X32,/* 9/ 0 220 144 90 */ X32,/* 9/ 1 221 145 91 */ X32,/* 9/ 2 222 146 92 */ X32,/* 9/ 3 223 147 93 */ X32,/* 9/ 4 224 148 94 */ X32,/* 9/ 5 225 149 95 */ X32,/* 9/ 6 226 150 96 */ X32,/* 9/ 7 227 151 97 */ X32,/* 9/ 8 230 152 98 */ X32,/* 9/ 9 231 153 99 */ X32,/* 9/10 232 154 9A */ X32,/* 9/11 233 155 9B */ X32,/* 9/12 234 156 9C */ X32,/* 9/13 235 157 9D */ X32,/* 9/14 236 158 9E */ X32,/* 9/15 237 159 9F */ X32,/*10/ 0 240 160 A0 NBSP (no-break space) */ X32,/*10/ 1 241 161 A1 inverted exclamation mark */ X32,/*10/ 2 242 162 A2 cent sign */ X32,/*10/ 3 243 163 A3 pound sign */ X32,/*10/ 4 244 164 A4 general currency sign */ X32,/*10/ 5 245 165 A5 yen sign */ X32,/*10/ 6 246 166 A6 broken vertical line */ X32,/*10/ 7 247 167 A7 section sign */ X32,/*10/ 8 250 168 A8 diaeresis */ X32,/*10/ 9 251 169 A9 copyright sign */ X32,/*10/10 252 170 AA ordinal indicator, feminine */ X32,/*10/11 253 171 AB angle quotation mark left */ X32,/*10/12 254 172 AC not sign */ X32,/*10/13 255 173 AD soft hyphen */ X32,/*10/14 256 174 AE registered sign */ X32,/*10/15 257 175 AF macron */ X32,/*11/ 0 260 176 B0 degree sign */ X32,/*11/ 1 261 177 B1 plus or minus sign */ X32,/*11/ 2 262 178 B2 superscript two */ X32,/*11/ 3 263 179 B3 superscript three */ X32,/*11/ 4 264 180 B4 acute accent */ X32,/*11/ 5 265 181 B5 micro sign */ X32,/*11/ 6 266 182 B6 pilcrow */ X32,/*11/ 7 267 183 B7 middle dot */ X32,/*11/ 8 270 184 B8 cedilla */ X32,/*11/ 9 271 185 B9 superscript one */ X32,/*11/10 272 186 BA ordinal indicator, masculine */ X32,/*11/11 273 187 BB angle quotation mark right */ X32,/*11/12 274 188 BC fraction one-quarter */ X32,/*11/13 275 189 BD fraction one-half */ X32,/*11/14 276 190 BE fraction three-quarters */ X32,/*11/15 277 191 BF inverted question mark */ X32,/*12/ 0 300 192 C0 capital A with grave accent */ X32,/*12/ 1 301 193 C1 capital A with acute accent */ X32,/*12/ 2 302 194 C2 capital A with circumflex accent */ X32,/*12/ 3 303 195 C3 capital A with tilde */ X27,/*12/ 4 304 196 C4 capital A with diaeresis or umlaut mark */ X26,/*12/ 5 305 197 C5 capital A with ring */ X32,/*12/ 6 306 198 C6 capital AE diphthong */ X32,/*12/ 7 307 199 C7 capital C with cedilla */ X32,/*12/ 8 310 200 C8 capital E with grave accent */ X32,/*12/ 9 311 201 C9 capital E with acute accent */ X32,/*12/10 312 202 CA capital E with circumflex accent */ X32,/*12/11 313 203 CB capital E with diaeresis or umlaut mark */ X32,/*12/12 314 204 CC capital I with grave accent */ X32,/*12/13 315 205 CD capital I with acute accent */ X32,/*12/14 316 206 CE capital I with circumflex accent */ X32,/*12/15 317 207 CF capital I with diaeresis or umlaut mark */ X32,/*13/ 0 320 208 D0 capital D with stroke, Icelandic eth */ X32,/*13/ 1 321 209 D1 capital N with tilde */ X32,/*13/ 2 322 210 D2 capital O with grave accent */ X32,/*13/ 3 323 211 D3 capital O with acute accent */ X32,/*13/ 4 324 212 D4 capital O with circumflex accent */ X32,/*13/ 5 325 213 D5 capital O with tilde */ X28,/*13/ 6 326 214 D6 capital O with diaeresis or umlaut mark */ X32,/*13/ 7 327 215 D7 multiplication sign */ X32,/*13/ 8 330 216 D8 capital O with slash */ X32,/*13/ 9 331 217 D9 capital U with grave accent */ X32,/*13/10 332 218 DA capital U with acute accent */ X32,/*13/11 333 219 DB capital U with circumflex accent */ X32,/*13/12 334 220 DC capital U with diaeresis or umlaut mark */ X32,/*13/13 335 221 DD capital Y with acute accent */ X32,/*13/14 336 222 DE capital thorn, Icelandic */ X32,/*13/15 337 223 DF small sharp s, German */ X32,/*14/ 0 340 224 E0 small a with grave accent */ X32,/*14/ 1 341 225 E1 small a with acute accent */ X32,/*14/ 2 342 226 E2 small a with circumflex accent */ X32,/*14/ 3 343 227 E3 small a with tilde */ X27,/*14/ 4 344 228 E4 small a with diaeresis or umlaut mark */ X26,/*14/ 5 345 229 E5 small a with ring */ X32,/*14/ 6 346 230 E6 small ae diphthong */ X32,/*14/ 7 347 231 E7 small c with cedilla */ X32,/*14/ 8 350 232 E8 small e with grave accent */ X29,/*14/ 9 351 233 E9 small e with acute accent */ X32,/*14/10 352 234 EA small e with circumflex accent */ X32,/*14/11 353 235 EB small e with diaeresis or umlaut mark */ X32,/*14/12 354 236 EC small i with grave accent */ X32,/*14/13 355 237 ED small i with acute accent */ X32,/*14/14 356 238 EE small i with circumflex accent */ X32,/*14/15 357 239 EF small i with diaeresis or umlaut mark */ X32,/*15/ 0 360 240 F0 small d with stroke, Icelandic eth */ X32,/*15/ 1 361 241 F1 small n with tilde */ X32,/*15/ 2 362 242 F2 small o with grave accent */ X32,/*15/ 3 363 243 F3 small o with acute accent */ X32,/*15/ 4 364 244 F4 small o with circumflex accent */ X32,/*15/ 5 365 245 F5 small o with tilde */ X28,/*15/ 6 366 246 F6 small o with diaeresis or umlaut mark */ X32,/*15/ 7 367 247 F7 division sign */ X32,/*15/ 8 370 248 F8 small o with slash */ X32,/*15/ 9 371 249 F9 small u with grave accent */ X32,/*15/10 372 250 FA small u with acute accent */ X32,/*15/11 373 251 FB small u with circumflex accent */ X32,/*15/12 374 252 FC small u with diaeresis or umlaut mark */ X32,/*15/13 375 253 FD small y with acute accent */ X32,/*15/14 376 254 FE small thorn, Icelandic */ X32,/*15/15 377 255 FF small y with diaeresis or umlaut mark */ X}; X X XPRIVATE byteT se8[256] = /* Map Swedish ISO 646 to ISO 8859/1.*/ X{ X0000,/* 0/ 0 0 0 0 NUL (null) */ X0001,/* 0/ 1 1 1 1 SOH (start of heading) */ X0002,/* 0/ 2 2 2 2 STX (start of text) */ X0003,/* 0/ 3 3 3 3 ETX (end of text) */ X0004,/* 0/ 4 4 4 4 EOT (end of transmission) */ X0005,/* 0/ 5 5 5 5 ENQ (enquiry) */ X0006,/* 0/ 6 6 6 6 ACK (acknowledge) */ X0007,/* 0/ 7 7 7 7 BEL (bell) */ X0010,/* 0/ 8 10 8 8 BS (backspace) */ X0011,/* 0/ 9 11 9 9 HT (horizontal tabulation) */ X0012,/* 0/10 12 10 A LF (line feed) */ X0013,/* 0/11 13 11 B VT (vertical tabulation) */ X0014,/* 0/12 14 12 C FF (form feed) */ X0015,/* 0/13 15 13 D CR (carriage return) */ X0016,/* 0/14 16 14 E SO (shift out) */ X0017,/* 0/15 17 15 F SI (shift in) */ X0020,/* 1/ 0 20 16 10 DLE (data link escape) */ X0021,/* 1/ 1 21 17 11 DC1 (device control 1) */ X0022,/* 1/ 2 22 18 12 DC2 (device control 2) */ X0023,/* 1/ 3 23 19 13 DC3 (device control 3) */ X0024,/* 1/ 4 24 20 14 DC4 (device control 4) */ X0025,/* 1/ 5 25 21 15 NAK (negative aknowledge) */ X0026,/* 1/ 6 26 22 16 SYN (synchronous idle) */ X0027,/* 1/ 7 27 23 17 ETB (end of transmission block) */ X0030,/* 1/ 8 30 24 18 CAN (cancel) */ X0031,/* 1/ 9 31 25 19 EM (end of medium) */ X0032,/* 1/10 32 26 1A SUB (substitute character) */ X0033,/* 1/11 33 27 1B ESC (escape) */ X0034,/* 1/12 34 28 1C IS4/FS (information separator 4 / file separator)*/ X0035,/* 1/13 35 29 1D IS3/GS (information separator 3 / group separator) */ X0036,/* 1/14 36 30 1E IS2/RS (information separator 2 / record separator)*/ X0037,/* 1/15 37 31 1F IS1/US (information separator 1 / unit separator)*/ X0040,/* 2/ 0 40 32 20 space */ X0041,/* 2/ 1 41 33 21 exclamation mark */ X0042,/* 2/ 2 42 34 22 quotation mark */ X0043,/* 2/ 3 43 35 23 number sign */ X0044,/* 2/ 4 44 36 24 dollar sign */ X0045,/* 2/ 5 45 37 25 percent sign */ X0046,/* 2/ 6 46 38 26 ampersand */ X0047,/* 2/ 7 47 39 27 apostrophe */ X0050,/* 2/ 8 50 40 28 left parenthesis */ X0051,/* 2/ 9 51 41 29 right parenthesis */ X0052,/* 2/10 52 42 2A asterisk */ X0053,/* 2/11 53 43 2B plus sign */ X0054,/* 2/12 54 44 2C comma */ X0055,/* 2/13 55 45 2D hyphen, minus sign */ X0056,/* 2/14 56 46 2E full stop */ X0057,/* 2/15 57 47 2F solidus */ X0060,/* 3/ 0 60 48 30 digit zero */ X0061,/* 3/ 1 61 49 31 digit one */ X0062,/* 3/ 2 62 50 32 digit two */ X0063,/* 3/ 3 63 51 33 digit three */ X0064,/* 3/ 4 64 52 34 digit four */ X0065,/* 3/ 5 65 53 35 digit five */ X0066,/* 3/ 6 66 54 36 digit six */ X0067,/* 3/ 7 67 55 37 digit seven */ X0070,/* 3/ 8 70 56 38 digit eight */ X0071,/* 3/ 9 71 57 39 digit nine */ X0072,/* 3/10 72 58 3A colon */ X0073,/* 3/11 73 59 3B semicolon */ X0074,/* 3/12 74 60 3C less-than sign */ X0075,/* 3/13 75 61 3D equals sign */ X0076,/* 3/14 76 62 3E greater-than sign */ X0077,/* 3/15 77 63 3F question mark */ X0100,/* 4/ 0 100 64 40 commercial at */ X0101,/* 4/ 1 101 65 41 A */ X0102,/* 4/ 2 102 66 42 B */ X0103,/* 4/ 3 103 67 43 C */ X0104,/* 4/ 4 104 68 44 D */ X0105,/* 4/ 5 105 69 45 E */ X0106,/* 4/ 6 106 70 46 F */ X0107,/* 4/ 7 107 71 47 G */ X0110,/* 4/ 8 110 72 48 H */ X0111,/* 4/ 9 111 73 49 I */ X0112,/* 4/10 112 74 4A J */ X0113,/* 4/11 113 75 4B K */ X0114,/* 4/12 114 76 4C L */ X0115,/* 4/13 115 77 4D M */ X0116,/* 4/14 116 78 4E N */ X0117,/* 4/15 117 79 4F O */ X0120,/* 5/ 0 120 80 50 P */ X0121,/* 5/ 1 121 81 51 Q */ X0122,/* 5/ 2 122 82 52 R */ X0123,/* 5/ 3 123 83 53 S */ X0124,/* 5/ 4 124 84 54 T */ X0125,/* 5/ 5 125 85 55 U */ X0126,/* 5/ 6 126 86 56 V */ X0127,/* 5/ 7 127 87 57 W */ X0130,/* 5/ 8 130 88 58 X */ X0131,/* 5/ 9 131 89 59 Y */ X0132,/* 5/10 132 90 5A Z */ X0304,/* 5/11 133 91 5B left square bracket */ X0326,/* 5/12 134 92 5C reverse solidus */ X0305,/* 5/13 135 93 5D right square bracket */ X0136,/* 5/14 136 94 5E circumflex accent */ X0137,/* 5/15 137 95 5F low line, underline */ X0351,/* 6/ 0 140 96 60 grave accent */ X0141,/* 6/ 1 141 97 61 a */ X0142,/* 6/ 2 142 98 62 b */ X0143,/* 6/ 3 143 99 63 c */ X0144,/* 6/ 4 144 100 64 d */ X0145,/* 6/ 5 145 101 65 e */ X0146,/* 6/ 6 146 102 66 f */ X0147,/* 6/ 7 147 103 67 g */ X0150,/* 6/ 8 150 104 68 h */ X0151,/* 6/ 9 151 105 69 i */ X0152,/* 6/10 152 106 6A j */ X0153,/* 6/11 153 107 6B k */ X0154,/* 6/12 154 108 6C l */ X0155,/* 6/13 155 109 6D m */ X0156,/* 6/14 156 110 6E n */ X0157,/* 6/15 157 111 6F o */ X0160,/* 7/ 0 160 112 70 p */ X0161,/* 7/ 1 161 113 71 q */ X0162,/* 7/ 2 162 114 72 r */ X0163,/* 7/ 3 163 115 73 s */ X0164,/* 7/ 4 164 116 74 t */ X0165,/* 7/ 5 165 117 75 u */ X0166,/* 7/ 6 166 118 76 v */ X0167,/* 7/ 7 167 119 77 w */ X0170,/* 7/ 8 170 120 78 x */ X0171,/* 7/ 9 171 121 79 y */ X0172,/* 7/10 172 122 7A z */ X0344,/* 7/11 173 123 7B left curly bracket */ X0366,/* 7/12 174 124 7C vertical line */ X0345,/* 7/13 175 125 7D right curly bracket */ X0176,/* 7/14 176 126 7E tilde */ X0177,/* 7/15 177 127 7F DEL (delete) */ X0200,/* 8/ 0 200 128 80 */ X0201,/* 8/ 1 201 129 81 */ X0202,/* 8/ 2 202 130 82 */ X0203,/* 8/ 3 203 131 83 */ X0204,/* 8/ 4 204 132 84 */ X0205,/* 8/ 5 205 133 85 */ X0206,/* 8/ 6 206 134 86 */ X0207,/* 8/ 7 207 135 87 */ X0210,/* 8/ 8 210 136 88 */ X0211,/* 8/ 9 211 137 89 */ X0212,/* 8/10 212 138 8A */ X0213,/* 8/11 213 139 8B */ X0214,/* 8/12 214 140 8C */ X0215,/* 8/13 215 141 8D */ X0216,/* 8/14 216 142 8E */ X0217,/* 8/15 217 143 8F */ X0220,/* 9/ 0 220 144 90 */ X0221,/* 9/ 1 221 145 91 */ X0222,/* 9/ 2 222 146 92 */ X0223,/* 9/ 3 223 147 93 */ X0224,/* 9/ 4 224 148 94 */ X0225,/* 9/ 5 225 149 95 */ X0226,/* 9/ 6 226 150 96 */ X0227,/* 9/ 7 227 151 97 */ X0230,/* 9/ 8 230 152 98 */ X0231,/* 9/ 9 231 153 99 */ X0232,/* 9/10 232 154 9A */ X0233,/* 9/11 233 155 9B */ X0234,/* 9/12 234 156 9C */ X0235,/* 9/13 235 157 9D */ X0236,/* 9/14 236 158 9E */ X0237,/* 9/15 237 159 9F */ X0240,/*10/ 0 240 160 A0 NBSP (no-break space) */ X0241,/*10/ 1 241 161 A1 inverted exclamation mark */ X0242,/*10/ 2 242 162 A2 cent sign */ X0243,/*10/ 3 243 163 A3 pound sign */ X0244,/*10/ 4 244 164 A4 general currency sign */ X0245,/*10/ 5 245 165 A5 yen sign */ X0246,/*10/ 6 246 166 A6 broken vertical line */ X0247,/*10/ 7 247 167 A7 section sign */ X0250,/*10/ 8 250 168 A8 diaeresis */ X0251,/*10/ 9 251 169 A9 copyright sign */ X0252,/*10/10 252 170 AA ordinal indicator, feminine */ X0253,/*10/11 253 171 AB angle quotation mark left */ X0254,/*10/12 254 172 AC not sign */ X0255,/*10/13 255 173 AD soft hyphen */ X0256,/*10/14 256 174 AE registered sign */ X0257,/*10/15 257 175 AF macron */ X0260,/*11/ 0 260 176 B0 degree sign */ X0261,/*11/ 1 261 177 B1 plus or minus sign */ X0262,/*11/ 2 262 178 B2 superscript two */ X0263,/*11/ 3 263 179 B3 superscript three */ X0264,/*11/ 4 264 180 B4 acute accent */ X0265,/*11/ 5 265 181 B5 micro sign */ X0266,/*11/ 6 266 182 B6 pilcrow */ X0267,/*11/ 7 267 183 B7 middle dot */ X0270,/*11/ 8 270 184 B8 cedilla */ X0271,/*11/ 9 271 185 B9 superscript one */ X0272,/*11/10 272 186 BA ordinal indicator, masculine */ X0273,/*11/11 273 187 BB angle quotation mark right */ X0274,/*11/12 274 188 BC fraction one-quarter */ X0275,/*11/13 275 189 BD fraction one-half */ X0276,/*11/14 276 190 BE fraction three-quarters */ X0277,/*11/15 277 191 BF inverted question mark */ X0300,/*12/ 0 300 192 C0 capital A with grave accent */ X0301,/*12/ 1 301 193 C1 capital A with acute accent */ X0302,/*12/ 2 302 194 C2 capital A with circumflex accent */ X0303,/*12/ 3 303 195 C3 capital A with tilde */ X0304,/*12/ 4 304 196 C4 capital A with diaeresis or umlaut mark */ X0305,/*12/ 5 305 197 C5 capital A with ring */ X0306,/*12/ 6 306 198 C6 capital AE diphthong */ X0307,/*12/ 7 307 199 C7 capital C with cedilla */ X0310,/*12/ 8 310 200 C8 capital E with grave accent */ X0311,/*12/ 9 311 201 C9 capital E with acute accent */ X0312,/*12/10 312 202 CA capital E with circumflex accent */ X0313,/*12/11 313 203 CB capital E with diaeresis or umlaut mark */ X0314,/*12/12 314 204 CC capital I with grave accent */ X0315,/*12/13 315 205 CD capital I with acute accent */ X0316,/*12/14 316 206 CE capital I with circumflex accent */ X0317,/*12/15 317 207 CF capital I with diaeresis or umlaut mark */ X0320,/*13/ 0 320 208 D0 capital D with stroke, Icelandic eth */ X0321,/*13/ 1 321 209 D1 capital N with tilde */ X0322,/*13/ 2 322 210 D2 capital O with grave accent */ X0323,/*13/ 3 323 211 D3 capital O with acute accent */ X0324,/*13/ 4 324 212 D4 capital O with circumflex accent */ X0325,/*13/ 5 325 213 D5 capital O with tilde */ X0326,/*13/ 6 326 214 D6 capital O with diaeresis or umlaut mark */ X0327,/*13/ 7 327 215 D7 multiplication sign */ X0330,/*13/ 8 330 216 D8 capital O with slash */ X0331,/*13/ 9 331 217 D9 capital U with grave accent */ X0332,/*13/10 332 218 DA capital U with acute accent */ X0333,/*13/11 333 219 DB capital U with circumflex accent */ X0334,/*13/12 334 220 DC capital U with diaeresis or umlaut mark */ X0335,/*13/13 335 221 DD capital Y with acute accent */ X0336,/*13/14 336 222 DE capital thorn, Icelandic */ X0337,/*13/15 337 223 DF small sharp s, German */ X0340,/*14/ 0 340 224 E0 small a with grave accent */ X0341,/*14/ 1 341 225 E1 small a with acute accent */ X0342,/*14/ 2 342 226 E2 small a with circumflex accent */ X0343,/*14/ 3 343 227 E3 small a with tilde */ X0344,/*14/ 4 344 228 E4 small a with diaeresis or umlaut mark */ X0345,/*14/ 5 345 229 E5 small a with ring */ X0346,/*14/ 6 346 230 E6 small ae diphthong */ X0347,/*14/ 7 347 231 E7 small c with cedilla */ X0350,/*14/ 8 350 232 E8 small e with grave accent */ X0351,/*14/ 9 351 233 E9 small e with acute accent */ X0352,/*14/10 352 234 EA small e with circumflex accent */ X0353,/*14/11 353 235 EB small e with diaeresis or umlaut mark */ X0354,/*14/12 354 236 EC small i with grave accent */ X0355,/*14/13 355 237 ED small i with acute accent */ X0356,/*14/14 356 238 EE small i with circumflex accent */ X0357,/*14/15 357 239 EF small i with diaeresis or umlaut mark */ X0360,/*15/ 0 360 240 F0 small d with stroke, Icelandic eth */ X0361,/*15/ 1 361 241 F1 small n with tilde */ X0362,/*15/ 2 362 242 F2 small o with grave accent */ X0363,/*15/ 3 363 243 F3 small o with acute accent */ X0364,/*15/ 4 364 244 F4 small o with circumflex accent */ X0365,/*15/ 5 365 245 F5 small o with tilde */ X0366,/*15/ 6 366 246 F6 small o with diaeresis or umlaut mark */ X0367,/*15/ 7 367 247 F7 division sign */ X0370,/*15/ 8 370 248 F8 small o with slash */ X0371,/*15/ 9 371 249 F9 small u with grave accent */ X0372,/*15/10 372 250 FA small u with acute accent */ X0373,/*15/11 373 251 FB small u with circumflex accent */ X0374,/*15/12 374 252 FC small u with diaeresis or umlaut mark */ X0375,/*15/13 375 253 FD small y with acute accent */ X0376,/*15/14 376 254 FE small thorn, Icelandic */ X0377,/*15/15 377 255 FF small y with diaeresis or umlaut mark */ X}; X X#include "78common.h" X X/* Different sections in a file: */ X#define S_HDR 1 /* News article header.*/ X#define S_BODY 2 /* News article body.*/ X#define S_SIG 3 /* News article signature.*/ X XPRIVATE double attack = 0.65; /* Smoothing factor.*/ XPRIVATE double blank = 0.7; /* Scale attack/decay on blank lines.*/ XPRIVATE double bodval = -200.0; /* Score at start of body.*/ XPRIVATE double colon = 0.5; /* Scale attack/decay after colon.*/ XPRIVATE boolT debug = FALSE; /* Debug flag.*/ XPRIVATE double decay = 0.67; /* Smoothing factor.*/ XPRIVATE boolT fixbody = FALSE; /* Ordinary file, no header or signature.*/ XPRIVATE double headval = 0.0; /* For header values.*/ XPRIVATE boolT mailbox = FALSE; /* Converting a mailbox.*/ XPRIVATE double pound1 = -350.0; /* After # at beginning of line.*/ XPRIVATE unsigned siglns = 9; /* Max lines in a signature.*/ XPRIVATE double thresh = 0.0; /* Score above this is Swedish.*/ XPRIVATE triDifT seustt[TRIMAX + 1];/* Trigram difference table.*/ X XPRIVATE bStrT sewords[] = /* These are always Swedish.*/ X { X S("D}"), X S("p}"), X S("s}"), X S("{r"), X S("|ver"), X NULBSTR X }; X XPRIVATE bStrT uswords[] = /* These are never Swedish.*/ X { X S("[]"), X S("[The"), X NULBSTR X }; X X#include "78heur.h" X X/* seus - run heuristics on one file */ X XPRIVATE void seus (is, fn) XR9 streamT is; /* Input stream.*/ X bStrT fn; /* File name.*/ X X/* Function: X * Copy file to standard output, converting to ISO 8859/1. X * Algorithm: X * Read each line. Switch on section and look for section X * transitions. Step through the line. Look for section matches. X * Call dif78() on each word. Compute score. If word looks Swedish, X * convert it. Write line. X * Returns: X * X * Notes: X * X */ X{ XR2 rcharT b; /* Current input byte.*/ XR4 int i; /* General putpose.*/ X double cum = 0.0; /* Cumulative score.*/ X unsigned ln = 0; /* Input line number.*/ X int lns = -1; /* Value from Lines: header field; -1 = unknown.*/ XR8 unsigned sigln = 1; /* Line number in signature.*/ XR5 bStrT p1; /* Rest of line after special match.*/ XR1 bStrT lp; /* Steps through lb[].*/ XR7 boolT sigbeg; /* Line looks like start of signature.*/ XR3 bStrT wp = NULBSTR; /* Points to start of word.*/ XR6 unsigned sect; /* Current section.*/ X byteT lb[MLINE + 1]; /* Line buffer.*/ X Xsect = (fixbody ? S_BODY : S_HDR); Xlb[0] = ' '; Xwhile (NULBSTR != (getlin ((lp = &lb[1]), MLINE, is, fn, &ln, 0))) X { X if (mailbox && (NULBSTR != prefix (S("From "), lp))) X { X cum = 0.0; X ln = 1; X lns = -1; X sect = S_HDR; X sigln = 1; X sigbeg = FALSE; X } X else X sigbeg = SigBegP (lp); X switch (sect) X { X case S_HDR: X if (EOS == B(*lp)) X { X sect = S_BODY; X cum = bodval; X ln = 0; X } X else X { X if (NULBSTR != (p1 = prefix (S("Lines: "), lp))) X (void) a2i (p1, NULBSTR, TRUE, &lns, (bStrT *) NULL); X if (NULBSTR != (p1 = bStrChr (lp, ':'))) X { X cum = headval; X lp = p1 + 1; X } X } X break; X case S_BODY: X if (sigbeg || (!fixbody && (lns > siglns) && (ln > (lns - siglns)))) X sect = S_SIG; X else X { X for (; '>' == B(*lp); ++lp) X ; X if (('#' == B(*lp)) || ('X' == B(*lp))) X cum = MIN (cum, pound1); X } X break; X case S_SIG: X if (sigbeg) X sigln = 1; X else if (sigln <= siglns) X ++sigln; X else X { X sigln = 1; X sect = S_BODY; X } X break; X default: X malf1 (eIntern, "seus 1"); X break; X } X if (EOS == B(*lp)) cum *= blank * ((cum > thresh) ? decay : attack); X do X { X b = B(*lp); X if ((NULBSTR != (p1 = BraceP (lp, sect))) || X (NULBSTR != (p1 = UunetP (lp))) || X (NULBSTR != (p1 = IPP (lp))) || X (NULBSTR != (p1 = InArtP (lp, sect))) || X (NULBSTR != (p1 = GrafP (lp, sect))) || X (NULBSTR != (p1 = PipeP (lp, sect))) || X (NULBSTR != (p1 = EndP (lp, sect, S(") writes:")))) || X (NULBSTR != (p1 = EndP (lp, sect, S(" \\n\\")))) || X (NULBSTR != (p1 = LaTeXP (lp)))) X { X lp = p1; X wp = NULBSTR; X } X else X { X if (byte2t[b] <= TRIHI) X { X if (NULBSTR == wp) wp = lp; X } X else X { X if (NULBSTR != wp) X { X i = dif78 (wp, lp, seustt); X cum *= ((i > 0) ? attack : decay); X cum += i; X if (((cum > thresh) && !wordp (wp, lp, uswords)) || X wordp (wp, lp, sewords)) X { X for (p1 = wp; p1 != lp; ++p1) X *p1 = se8[B(*p1)]; X } X if (debug) X FPRINTF (stderr, "%c%6.0f %.*s\n", "?hbsf"[sect], cum, X lp - wp, wp); X wp = NULBSTR; X if (':' == b) X cum *= colon * ((cum > thresh) ? decay : attack); X } X } X ++lp; X } X } X while (EOS != b); X puts (&lb[1]); X } X} X X/* main - main function */ X XPUBLIC int main (argc, argv) X int argc; /* Number of arguments.*/ XR3 bStrT *argv; /* Points to array of argument strings.*/ X X/* Function: X * X * Algorithm: X * Decode args. Initialize. Call seus(). X * Notes: X * X */ X X{ XR1 rcharT c; /* Option letter.*/ XR2 bStrT cp; /* Steps through args.*/ Xextern int optind; /* See getopt (3).*/ Xextern cStrT optarg; /* See getopt (3).*/ X Xwhile (EOF != (c = getopt (argc, (cStrT *) argv, "#:A:B:D:b:c:dfh:ms:t:"))) X { X switch (c) X { X case '?': X usage(); X break; X case '#': X pound1 = ma2d ((bStrT) optarg, NULBSTR, FALSE, "# Value", X (bStrT *) NULL); X break; X case 'A': X attack = mra2d ((bStrT) optarg, NULBSTR, FALSE, "Attack", 0.001, 0.999, X (bStrT *) NULL); X break; X case 'B': X blank = ma2d ((bStrT) optarg, NULBSTR, FALSE, S("Blank smoothing"), X (bStrT *) NULL); X break; X case 'D': X decay = mra2d ((bStrT) optarg, NULBSTR, FALSE, "Decay", 0.001, 0.999, X (bStrT *) NULL); X break; X case 'b': X bodval = ma2d ((bStrT) optarg, NULBSTR, FALSE, "Body Value", X (bStrT *) NULL); X break; X case 'c': X colon = ma2d ((bStrT) optarg, NULBSTR, FALSE, S("Colon Smoothing"), X (bStrT *) NULL); X break; X case 'd': X debug = TRUE; X break; X case 'f': X fixbody = TRUE; X break; X case 'h': X headval = ma2d ((bStrT) optarg, NULBSTR, FALSE, S("Header Value"), X (bStrT *) NULL); X break; X case 'm': X mailbox = TRUE; X break; X case 's': X siglns = mra2u ((bStrT) optarg, NULBSTR, FALSE, "Max signature lines", X (unsigned) 1, (unsigned) 99, (bStrT *) NULL); X break; X case 't': X thresh = ma2d ((bStrT) optarg, NULBSTR, FALSE, "Threshold", X (bStrT *) NULL); X break; X default: X malf1 (eIntern, "main 1"); X break; X } X } Xargv += optind; Xcp = *argv++; Xif (NULBSTR != cp) usage(); Xipath(); Xmrdtri (S("seus"), (bStrT) seustt); Xseus (stdin, S("Standard Input")); Xmfflush (stdout, "Standard Output"); Xexit (SUCCESS); X X#ifdef lint Xreturn (SUCCESS); X#endif X} END_OF_FILE if test 50344 -ne `wc -c <'78seus.c'`; then echo shar: \"'78seus.c'\" unpacked with wrong size! fi # end of '78seus.c' fi echo shar: End of archive 10 \(of 14\). cp /dev/null ark10isdone MISSING="" for I in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ; do if test ! -f ark${I}isdone ; then MISSING="${MISSING} ${I}" fi done if test "${MISSING}" = "" ; then echo You have unpacked all 14 archives. rm -f ark[1-9]isdone ark[1-9][0-9]isdone else echo You still need to unpack the following archives: echo " " ${MISSING} fi ## End of shell archive. exit 0