KLUNDE@VMS.MACC.WISC.EDU ("Ken R. Lunde") (10/16/90)
Archive-name: kanji-values/13-Oct-90 Original-posting-by: KLUNDE@VMS.MACC.WISC.EDU ("Ken R. Lunde") Original-subject: Latest version of VALUES.C (with automatic KANJI code detection) Reposted-by: emv@math.lsa.umich.edu (Edward Vielmetti) [Reposted from sci.lang.japan. Comments on this service to emv@math.lsa.umich.edu (Edward Vielmetti).] /* VALUES.C version of 14 October 1990 */ /* A utility for displaying the values of Japanese characters. */ /* Written by Ken R. Lunde, University of Wisconsin-Madison */ /* EMAIL: klunde@vms.macc.wisc.edu */ /* Available at the ucdavis.edu (128.120.2.1) FTP archive in pub/JIS/C. */ /* I do not consider myself to be a very advanced programmer, but perhaps one */ /* other person may have a use for this program. Please feel free to use this */ /* source code anyway you wish. The conversion algorithms for the major codes */ /* for Japanese are used, and are very reusable. The algorithm which detects */ /* the input file's Japanese code automatically is also quite useful. */ /* This program was written as a tool for determining the values for Japanese */ /* and ASCII characters. It is written in ANSI C, so should be compilable on */ /* almost any platform, but I do not offer any guarantees. :-) */ /* This version accepts SHIFT-JIS, EUC, or the 7-bit JIS codes as valid input */ /* for the file it reads. This program automatically detects which KANJI code */ /* is used in the input file. The output file will use the same code that the */ /* input file used. */ /* This program creates a file containing the contents of the input file, and */ /* displays each character's SHIFT-JIS, EUC, and JIS values in one of three */ /* different styles: octal, decimal, or hexdecimal -- the user must specify */ /* which one to use. ASCII and KUTEN values are also given. A tab separates */ /* the fields in the output file. I find that a tab width of 14 characters is */ /* best when printing. The SJIS, EUC, and JIS columns are padded with zero's */ /* for octal and decimal output. This makes the output more "readable." */ /* For SHIFT-JIS input files only, half-size KATAKANA are treated. Only their */ /* ASCII value is displayed since they are single-byte characters. Printable */ /* ASCII characters are handled with all the Japanese codes. */ /* Please send comments and suggestions! ENJOY! */ #include <stdio.h> int DetectCodeType(FILE *in); int fclose(FILE *fp); int isodd(int number); void exit(int data); void Introduction(FILE *out,int choice,int code); void print1byte(FILE *out,int choice,int one); void print2byte(FILE *out,int code,int choice,int one,int two,int data[8]); void seven2shift(int *ptr1,int *ptr2); void shift2seven(int *ptr1,int *ptr2); void Skip_ESC_Seq(FILE *in,int data,int *ptr); void TreatEUC(FILE *in,FILE *out,int code,int choice); void TreatJIS(FILE *in,FILE *out,int code,int choice); void TreatSJIS(FILE *in,FILE *out,int code,int choice); #define NOT_SET 0 #define NEW 1 #define OLD 2 #define NEC 3 #define EUC 4 #define SJIS 5 #define TRUE 1 #define FALSE 0 #define ESC 27 #define SJIS1 0 #define SJIS2 1 #define EUC1 2 #define EUC2 3 #define JIS1 4 #define JIS2 5 #define KT1 6 #define KT2 7 #define OCT 8 #define DEC 10 #define HEX 16 #define KI_NEW "$B" #define KO_NEW "(J" #define KI_OLD "$@" #define KO_OLD "(J" #define KI_NEC "K" #define KO_NEC "H" main() { FILE *in,*out; int code,choice; char infilename[80],outfilename[80]; printf("\nInfile name -> "); gets(infilename); if ((in = fopen(infilename,"r")) == NULL) { printf("\nCannot open %s",infilename); exit(1); } if ((code = DetectCodeType(in)) == NOT_SET) { printf("\nNo KANJI code detected in %s",infilename); exit(1); } if ((in = fopen(infilename,"r"))==NULL) { printf("\nCannot open %s",infilename); exit(1); } printf("Outfile name -> "); gets(outfilename); if ((out = fopen(outfilename,"w"))==NULL) { printf("\nCannot open %s",outfilename); exit(1); } printf("Output (8 = octal, 10 = decimal, 16 = hexadecimal) -> "); scanf("%d",&choice); if ((choice != OCT) && (choice != DEC) && (choice != HEX)) { printf("\nInvalid choice! Bye!"); exit(1); } Introduction(out,choice,code); switch (code) { case SJIS : TreatSJIS(in,out,code,choice); break; case EUC : TreatEUC(in,out,code,choice); break; case NEW : case OLD : case NEC : TreatJIS(in,out,code,choice); break; } fclose(out); fclose(in); return 0; } int DetectCodeType(FILE *in) { int p1,p2,p3,whatcode; whatcode = NOT_SET; while (((p1 = getc(in)) != EOF) && (whatcode == NOT_SET)) { if (p1 == ESC) { p2 = getc(in); if (p2 == '$') { p3 = getc(in); if (p3 == 'B') whatcode = NEW; else if (p3 == '@') whatcode = OLD; } else if (p2 == 'K') whatcode = NEC; } else if ((p1 >= 129) && (p1 <= 254)) { p2 = getc(in); if (((p1 >= 129) && (p1 <= 159)) && ((p2 >= 64) && (p2 <= 160))) whatcode = SJIS; else if (((p1 >= 161) && (p1 <= 254)) && ((p2 >= 161) && (p2 <= 254))) whatcode = EUC; } } fclose(in); return whatcode; } int isodd(int number) { return ((number % 2) ? 1 : 0); } void Introduction(FILE *out,int choice,int code) { switch (choice) { case OCT : fprintf(out,"Character values (in octal):\n\n"); break; case DEC : fprintf(out,"Character values (in decimal):\n\n"); break; case HEX : fprintf(out,"Character values (in hexadecimal):\n\n"); break; } switch (code) { case SJIS : fprintf(out,"Output KANJI code will be SHIFT-JIS\n\n"); break; case EUC : fprintf(out,"Output KANJI code will be EUC\n\n"); break; case NEW : fprintf(out,"Output KANJI code will be JIS 7-bit (NEW-JIS)\n\n"); break; case OLD : fprintf(out,"Output KANJI code will be JIS 7-bit (OLD-JIS)\n\n"); break; case NEC : fprintf(out,"Output KANJI code will be JIS 7-bit (NEC-JIS)\n\n"); break; } fprintf(out,"CHARACTER\tSHIFT-JIS or\tEUC\tJIS\tASCII\tKUTEN\n"); fprintf(out,"\tsingle-byte\n\n"); } void print1byte(FILE *out,int choice,int one) { switch (choice) { case OCT : fprintf(out,"%c\t%03o\n",one,one); break; case DEC : fprintf(out,"%c\t%03d\n",one,one); break; case HEX : fprintf(out,"%c\t%X\n",one,one); break; } } void print2byte(FILE *out,int code,int choice,int one,int two,int data[8]) { switch (code) { case NEW : fprintf(out,"%c%s%c%c%c%s\t",ESC,KI_NEW,one,two,ESC,KO_NEW); break; case OLD : fprintf(out,"%c%s%c%c%c%s\t",ESC,KI_OLD,one,two,ESC,KO_OLD); break; case NEC : fprintf(out,"%c%s%c%c%c%s\t",ESC,KI_NEC,one,two,ESC,KO_NEC); break; default : fprintf(out,"%c%c\t",one,two); break; } switch (choice) { case OCT : fprintf(out,"%03o-%03o\t",data[SJIS1],data[SJIS2]); fprintf(out,"%03o-%03o\t",data[EUC1],data[EUC2]); fprintf(out,"%03o-%03o\t",data[JIS1],data[JIS2]); break; case DEC : fprintf(out,"%03d-%03d\t",data[SJIS1],data[SJIS2]); fprintf(out,"%03d-%03d\t",data[EUC1],data[EUC2]); fprintf(out,"%03d-%03d\t",data[JIS1],data[JIS2]); break; case HEX : fprintf(out,"%X-%X\t",data[SJIS1],data[SJIS2]); fprintf(out,"%X-%X\t",data[EUC1],data[EUC2]); fprintf(out,"%X-%X\t",data[JIS1],data[JIS2]); break; } fprintf(out,"%c%c\t",data[JIS1],data[JIS2]); fprintf(out,"%02d-%02d\n",data[KT1],data[KT2]); } void seven2shift (int *p1,int *p2) { if (isodd(*p1)) *p2 += 31; else *p2 += 126; if ((*p2 >= 127) && (*p2 < 158)) (*p2)++; if ((*p1 >= 33) && (*p1 <= 94)) { if (isodd(*p1)) *p1 = ((*p1 - 1) / 2) + 113; else if (!isodd(*p1)) *p1 = (*p1 / 2) + 112; } else if ((*p1 >= 95) && (*p1 <= 126)) { if (isodd(*p1)) *p1 = ((*p1 - 1) / 2) + 177; else if (!isodd(*p1)) *p1 = (*p1 / 2) + 176; } } void shift2seven(int *p1,int *p2) { int temp; temp = *p2; if ((*p2 >= 64) && (*p2 <= 158)) *p2 -= 31; else if ((*p2 >= 159) && (*p2 <= 252)) *p2 -= 126; if ((temp > 127) && (temp <= 158)) (*p2)--; if ((*p1 >= 129) && (*p1 <= 159) && (temp >= 64) && (temp <= 158)) *p1 = ((*p1 - 113) * 2) + 1; else if ((*p1 >= 129) && (*p1 <= 159) && (temp >= 159) && (temp <= 252)) *p1 = (*p1 - 112) * 2; else if ((*p1 >= 224) && (*p1 <= 239) && (temp >= 64) && (temp <= 158)) *p1 = ((*p1 - 177) * 2) + 1; else if ((*p1 >= 224) && (*p1 <= 239) && (temp >= 159) && (temp <= 252)) *p1 = (*p1 - 176) * 2; } void Skip_ESC_Seq(FILE *in,int temp,int *shifted_in) { int junk; if ((temp == '$') || (temp == '(')) junk = getc(in); if ((temp == 'K') || (temp == '$')) *shifted_in = TRUE; else *shifted_in = FALSE; } void TreatEUC(FILE *in,FILE *out,int code,int choice) { int one,two; int data[8]; while ((one = getc(in)) != EOF) { if ((one >= 161) && (one <= 254)) { two = getc(in); data[SJIS1] = data[EUC1] = data[JIS1] = data[KT1] = one; data[SJIS2] = data[EUC2] = data[JIS2] = data[KT2] = two; data[SJIS1] -= 128; data[SJIS2] -= 128; seven2shift(&data[SJIS1],&data[SJIS2]); data[JIS1] -= 128; data[JIS2] -= 128; data[KT1] -= 160; data[KT2] -= 160; print2byte(out,code,choice,one,two,data); } else if ((one >= 33) && (one <= 126)) print1byte(out,choice,one); } } void TreatJIS(FILE *in,FILE *out,int code,int choice) { int shifted_in,temp,one,two; int data[8]; shifted_in = FALSE; while ((one = getc(in)) != EOF) { if (one == ESC) { temp = getc(in); Skip_ESC_Seq(in,temp,&shifted_in); if ((one = getc(in)) == EOF) exit(1); } if (shifted_in) { two = getc(in); data[SJIS1] = data[EUC1] = data[JIS1] = data[KT1] = one; data[SJIS2] = data[EUC2] = data[JIS2] = data[KT2] = two; seven2shift(&data[SJIS1],&data[SJIS2]); data[EUC1] += 128; data[EUC2] += 128; data[KT1] -= 32; data[KT2] -= 32; print2byte(out,code,choice,one,two,data); } else if ((!shifted_in) && ((one >= 33) && (one <= 126))) print1byte(out,choice,one); } } void TreatSJIS(FILE *in,FILE *out,int code,int choice) { int one,two; int data[8]; while ((one = getc(in)) != EOF) { if (((one >= 129) && (one <= 159)) || ((one >= 224) && (one <= 239))) { two = getc(in); data[SJIS1] = data[EUC1] = data[JIS1] = data[KT1] = one; data[SJIS2] = data[EUC2] = data[JIS2] = data[KT2] = two; shift2seven(&data[EUC1],&data[EUC2]); data[EUC1] += 128; data[EUC2] += 128; shift2seven(&data[JIS1],&data[JIS2]); shift2seven(&data[KT1],&data[KT2]); data[KT1] -= 32; data[KT2] -= 32; print2byte(out,code,choice,one,two,data); } else if (((one >= 33) && (one <= 126)) || ((one >= 161) && (one <= 223))) print1byte(out,choice,one); } }