-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmmotype.w
468 lines (415 loc) · 14.1 KB
/
mmotype.w
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
% This file is part of the MMIXware package (c) Donald E Knuth 1999
@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
\def\title{MMOTYPE}
\def\MMIX{\.{MMIX}}
\def\MMIXAL{\.{MMIXAL}}
\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant
@* Introduction. This program reads a binary \.{mmo} file output by
the \MMIXAL\ processor and lists it in human-readable form. It lists
only the symbol table, if invoked with the \.{-s} option. It lists
also the tetrabytes of input, if invoked with the \.{-v} option.
@s tetra int
@c
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
@<Prototype preparations@>@;
@<Type definitions@>@;
@<Global variables@>@;
@<Subroutines@>@;
@#
int main(argc,argv)
int argc;@+char*argv[];
{
register int j,delta,postamble=0;
register char *p;
@<Process the command line@>;
@<Initialize everything@>;
@<List the preamble@>;
do @<List the next item@>@;@+while (!postamble);
@<List the postamble@>;
@<List the symbol table@>;
return 0;
}
@ @<Process the command line@>=
listing=1, verbose=0;
for (j=1;j<argc-1 && argv[j][0]=='-' && argv[j][2]=='\0';j++) {
if (argv[j][1]=='s') listing=0;
else if (argv[j][1]=='v') verbose=1;
else break;
}
if (j!=argc-1) {
fprintf(stderr,"Usage: %s [-s] [-v] mmofile\n",argv[0]);
@.Usage: ...@>
exit(-1);
}
@ @<Initialize everything@>=
mmo_file=fopen(argv[argc-1],"rb");
if (!mmo_file) {
fprintf(stderr,"Can't open file %s!\n",argv[argc-1]);
@.Can't open...@>
exit(-2);
}
@ @<Glob...@>=
int listing; /* are we listing everything? */
int verbose; /* are we also showing the tetras of input as they are read? */
FILE *mmo_file; /* the input file */
@ @<Prototype preparations@>=
#ifdef __STDC__
#define ARGS(list) list
#else
#define ARGS(list) ()
#endif
@ A complete definition of \.{mmo} format appears in the \MMIXAL\ document.
Here we need to define only the basic constants used for interpretation.
@d mm 0x98 /* the escape code of \.{mmo} format */
@d lop_quote 0x0 /* the quotation lopcode */
@d lop_loc 0x1 /* the location lopcode */
@d lop_skip 0x2 /* the skip lopcode */
@d lop_fixo 0x3 /* the octabyte-fix lopcode */
@d lop_fixr 0x4 /* the relative-fix lopcode */
@d lop_fixrx 0x5 /* extended relative-fix lopcode */
@d lop_file 0x6 /* the file name lopcode */
@d lop_line 0x7 /* the file position lopcode */
@d lop_spec 0x8 /* the special hook lopcode */
@d lop_pre 0x9 /* the preamble lopcode */
@d lop_post 0xa /* the postamble lopcode */
@d lop_stab 0xb /* the symbol table lopcode */
@d lop_end 0xc /* the end-it-all lopcode */
@* Low-level arithmetic. This program is intended to work correctly
whenever an |int| has at least 32 bits.
@<Type...@>=
typedef unsigned char byte; /* a monobyte */
typedef unsigned int tetra; /* a tetrabyte */
typedef struct {@+tetra h,l;}@+octa; /* an octabyte */
@ The |incr| subroutine adds a signed integer to an (unsigned) octabyte.
@<Sub...@>=
octa incr @,@,@[ARGS((octa,int))@];
octa incr(o,delta)
octa o;
int delta;
{
register tetra t;
octa x;
if (delta>=0) {
t=0xffffffff-delta;
if (o.l<=t) x.l=o.l+delta, x.h=o.h;
else x.l=o.l-t-1, x.h=o.h+1;
} else {
t=-delta;
if (o.l>=t) x.l=o.l-t, x.h=o.h;
else x.l=o.l+(0xffffffff+delta)+1, x.h=o.h-1;
}
return x;
}
@* Low-level input. The tetrabytes of an \.{mmo} file are stored in
friendly big-endian fashion, but this program is supposed to work also
on computers that are little-endian. Therefore we read four successive bytes
and pack them into a tetrabyte, instead of reading a single tetrabyte.
@<Sub...@>=
void read_tet @,@,@[ARGS((void))@];
void read_tet()
{
if (fread(buf,1,4,mmo_file)!=4) {
fprintf(stderr,"Unexpected end of file after %d tetras!\n",count);
@.Unexpected end of file...@>
exit(-3);
}
yz=(buf[2]<<8)+buf[3];
tet=(((buf[0]<<8)+buf[1])<<16)+yz;
if (verbose) printf(" %08x\n",tet);
count++;
}
@ @<Sub...@>=
byte read_byte @,@,@[ARGS((void))@];
byte read_byte()
{
register byte b;
if (!byte_count) read_tet();
b=buf[byte_count];
byte_count=(byte_count+1)&3;
return b;
}
@ @<Glob...@>=
int count; /* the number of tetrabytes we've read */
int byte_count; /* index of the next-to-be-read byte */
byte buf[4]; /* the most recently read bytes */
int yz; /* the two least significant bytes */
tetra tet; /* |buf| bytes packed big-endianwise */
@ @<Init...@>=
count=byte_count=0;
@* The main loop. Now for the bread-and-butter part of this program.
@<List the next item@>=
{
read_tet();
loop:@+if (buf[0]==mm) switch (buf[1]) {
case lop_quote:@+if (yz!=1)
err("YZ field of lop_quote should be 1");
@.YZ field...should be 1@>
read_tet();@+break;
@t\4@>@<Cases for lopcodes in the main loop@>@;
default: err("Unknown lopcode");
@.Unknown lopcode@>
}
if (listing) @<List |tet| as a normal item@>;
}
@ We want to catch all cases where the rules of \.{mmo} format are
not obeyed. The |err| macro ameliorates this somewhat tedious chore.
@d err(m) {@+fprintf(stderr,"Error in tetra %d: %s!\n",count,m);@+ continue;@+}
@.Error in tetra...@>
@ In a normal situation, the newly read tetrabyte is simply supposed
to be loaded into the current location. We list not only the current
location but also the current file position, if |cur_line| is nonzero
and |cur_loc| belongs to segment~0.
@<List |tet| as a normal item@>=
{
printf("%08x%08x: %08x",cur_loc.h,cur_loc.l,tet);
if (!cur_line) printf("\n");
else {
if (cur_loc.h&0xe0000000) printf("\n");
else {
if (cur_file==listed_file) printf(" (line %d)\n",cur_line);
else {
printf(" (\"%s\", line %d)\n", file_name[cur_file], cur_line);
listed_file=cur_file;
}
}
cur_line++;
}
cur_loc=incr(cur_loc,4);@+ cur_loc.l &=-4;
}
@ @<Glob...@>=
octa cur_loc; /* the current location */
int listed_file; /* the most recently listed file number */
int cur_file; /* the most recently selected file number */
int cur_line; /* the current position in |cur_file| */
char *file_name[256]; /* file names seen */
octa tmp; /* an octabyte of temporary interest */
@ @<Init...@>=
cur_loc.h=cur_loc.l=0;
listed_file=cur_file=-1;
cur_line=0;
@* The simple lopcodes. We have already implemented |lop_quote|, which
falls through to the normal case after reading an extra tetrabyte.
Now let's consider the other lopcodes in turn.
@d y buf[2] /* the next-to-least significant byte */
@d z buf[3] /* the least significant byte */
@<Cases...@>=
case lop_loc:@+if (z==2) {
j=y;@+ read_tet();@+ cur_loc.h=(j<<24)+tet;
}@+else if (z==1) cur_loc.h=y<<24;
else err("Z field of lop_loc should be 1 or 2");
@:Z field of lop_loc...}\.{Z field of lop\_loc...@>
read_tet();@+ cur_loc.l=tet;
continue;
case lop_skip: cur_loc=incr(cur_loc,yz);@+continue;
@ Fixups load information out of order, when future references have
been resolved. The current file name and line number are not considered
relevant.
@<Cases...@>=
case lop_fixo:@+if (z==2) {
j=y;@+ read_tet();@+ tmp.h=(j<<24)+tet;
}@+else if (z==1) tmp.h=y<<24;
else err("Z field of lop_fixo should be 1 or 2");
@:Z field of lop_fixo...}\.{Z field of lop\_fixo...@>
read_tet();@+ tmp.l=tet;
if (listing) printf("%08x%08x: %08x%08x\n",tmp.h,tmp.l,cur_loc.h,cur_loc.l);
continue;
case lop_fixr: delta=yz; goto fixr;
case lop_fixrx:j=yz;@+if (j!=16 && j!=24)
err("YZ field of lop_fixrx should be 16 or 24");
@:YZ field of lop_fixrx...}\.{YZ field of lop\_fixrx...@>
read_tet(); delta=tet;
if (delta&0xfe000000) err("increment of lop_fixrx is too large");
@.increment...too large@>
fixr: tmp=incr(cur_loc,-(delta>=0x1000000? (delta&0xffffff)-(1<<j): delta)<<2);
if (listing) printf("%08x%08x: %08x\n",tmp.h,tmp.l,delta);
continue;
@ The space for file names isn't allocated until we are sure we need it.
@<Cases...@>=
case lop_file:@+if (file_name[y]) {
for (j=z;j>0;j--) read_tet();
cur_file=y;
if (z) err("Two file names with the same number");
@.Two file names...@>
}@+else {
if (!z) err("No name given for newly selected file");
@.No name given...@>
file_name[y]=(char*)calloc(4*z+1,1);
if (!file_name[y]) {
fprintf(stderr,"No room to store the file name!\n");@+exit(-4);
@.No room...@>
}
cur_file=y;
for (j=z,p=file_name[y]; j>0; j--,p+=4) {
read_tet();
*p=buf[0];@+*(p+1)=buf[1];@+*(p+2)=buf[2];@+*(p+3)=buf[3];
}
}
cur_line=0;@+continue;
case lop_line:@+if (cur_file<0) err("No file was selected for lop_line");
@.No file was selected...@>
cur_line=yz;@+continue;
@ Special bytes in the file might be in synch with the current location
and/or the current file position, so we list those parameters too.
@<Cases...@>=
case lop_spec:@+if (listing) {
printf("Special data %d at loc %08x%08x", yz, cur_loc.h, cur_loc.l);
if (!cur_line) printf("\n");
else if (cur_file==listed_file) printf(" (line %d)\n",cur_line);
else {
printf(" (\"%s\", line %d)\n", file_name[cur_file], cur_line);
listed_file=cur_file;
}
}
while(1) {
read_tet();
if (buf[0]==mm) {
if (buf[1]!=lop_quote || yz!=1) goto loop; /* end of special data */
read_tet();
}
if (listing) printf(" %08x\n",tet);
}
@ The other cases shouldn't appear in the main loop.
@<Cases...@>=
case lop_pre: err("Can't have another preamble");
@.Can't have another...@>
case lop_post: postamble=1;
if (y) err("Y field of lop_post should be zero");
@:Y field of lop_post...}\.{Y field of lop\_post...@>
if (z<32) err("Z field of lop_post must be 32 or more");
@:Z field of lop_post...}\.{Z field of lop\_post...@>
continue;
case lop_stab: err("Symbol table must follow postamble");
@.Symbol table...@>
case lop_end: err("Symbol table can't end before it begins");
@* The preamble and postamble. Now here's what we do before and after
the main loop.
@<List the preamble@>=
read_tet(); /* read the first tetrabyte of input */
if (buf[0]!=mm || buf[1]!=lop_pre) {
fprintf(stderr,"Input is not an MMO file (first two bytes are wrong)!\n");
@.Input is not...@>
exit(-5);
}
if (y!=1) fprintf(stderr,
"Warning: I'm reading this file as version 1, not version %d!\n",y);
@.I'm reading this file...@>
if (z>0) {
j=z;
read_tet();
if (listing) {
time_t t=tet;
printf("File was created %s",asctime(localtime((time_t*)&t)));
}
for (j--;j>0;j--) {
read_tet();
if (listing) printf("Preamble data %08x\n",tet);
}
}
@ @<List the postamble@>=
for (j=z;j<256;j++) {
read_tet();@+tmp.h=tet;@+read_tet();
if (listing) {
if (tmp.h || tet) printf("g%03d: %08x%08x\n",j,tmp.h,tet);
else printf("g%03d: 0\n",j);
}
}
@* The symbol table. Finally we come to the symbol table, which is
the most interesting part of this program because it recursively
traces an implicit ternary trie structure.
@<List the symbol table@>=
read_tet();
if (buf[0]!=mm || buf[1]!=lop_stab) {
fprintf(stderr,"Symbol table does not follow the postamble!\n");
@.Symbol table...@>
exit(-6);
}
if (yz) fprintf(stderr,"YZ field of lop_stab should be zero!\n");
@.YZ field...should be zero@>
printf("Symbol table (beginning at tetra %d):\n",count);
stab_start=count;
sym_ptr=sym_buf;
print_stab();
@<Check the |lop_end|@>;
@ The main work is done by a recursive subroutine called |print_stab|,
which manipulates a global array |sym_buf| containing the current
symbol prefix; the global variable |sym_ptr| points to the first
unfilled character of that array.
@<Sub...@>=
void print_stab @,@,@[ARGS((void))@];
void print_stab()
{
register int m=read_byte(); /* the master control byte */
register int c; /* the character at the current trie node */
register int j,k;
if (m&0x40) print_stab(); /* traverse the left subtrie, if it is nonempty */
if (m&0x2f) {
@<Read the character |c|@>;
*sym_ptr++=c;
if (sym_ptr==&sym_buf[sym_length_max]) {
fprintf(stderr,"Oops, the symbol is too long!\n");@+exit(-7);
@.Oops...too long@>
}
if (m&0xf)
@<Print the current symbol with its equivalent and serial number@>;
if (m&0x20) print_stab(); /* traverse the middle subtrie */
sym_ptr--;
}
if (m&0x10) print_stab(); /* traverse the right subtrie, if it is nonempty */
}
@ The present implementation doesn't support Unicode; characters with
more than 8-bit codes are printed as `\.?'. However, the changes
for 16-bit codes would be quite easy if proper fonts for Unicode output
were available. In that case, |sym_buf| would be an array of wyde characters.
@^Unicode@>
@^system dependencies@>
@<Read the character |c|@>=
if (m&0x80) j=read_byte(); /* 16-bit character */
else j=0;
c=read_byte();
if (j) c='?'; /* oops, we can't print |(j<<8)+c| easily at this time */
@ @<Print the current symbol with its equivalent and serial number@>=
{
*sym_ptr='\0';
j=m&0xf;
if (j==15) sprintf(equiv_buf,"$%03d",read_byte());
else if (j<=8) {
strcpy(equiv_buf,"#");
for (;j>0;j--) sprintf(equiv_buf+strlen(equiv_buf),"%02x",read_byte());
if (strcmp(equiv_buf,"#0000")==0) strcpy(equiv_buf,"?"); /* undefined */
}@+else {
strncpy(equiv_buf,"#20000000000000",33-2*j);
equiv_buf[33-2*j]='\0';
for (;j>8;j--) sprintf(equiv_buf+strlen(equiv_buf),"%02x",read_byte());
}
for (j=k=read_byte();; k=read_byte(),j=(j<<7)+k) if (k>=128) break;
/* the serial number is now $j-128$ */
printf(" %s = %s (%d)\n",sym_buf+1,equiv_buf,j-128);
}
@ @d sym_length_max 1000
@<Glob...@>=
int stab_start; /* where the symbol table began */
char sym_buf[sym_length_max];
/* the characters on middle transitions to current node */
char *sym_ptr; /* the character in |sym_buf| following the current prefix */
char equiv_buf[20]; /* equivalent of the current symbol */
@ @<Check the |lop_end|@>=
while (byte_count)
if (read_byte()) fprintf(stderr,"Nonzero byte follows the symbol table!\n");
@.Nonzero byte follows...@>
read_tet();
if (buf[0]!=mm || buf[1]!=lop_end)
fprintf(stderr,"The symbol table isn't followed by lop_end!\n");
@.The symbol table isn't...@>
else if (count!=stab_start+yz+1)
fprintf(stderr,"YZ field at lop_end should have been %d!\n",count-yz-1);
@:YZ field at lop_end...}\.{YZ field at lop\_end...@>
else {
if (verbose) printf("Symbol table ends at tetra %d.\n",count);
if (fread(buf,1,1,mmo_file))
fprintf(stderr,"Extra bytes follow the lop_end!\n");
@.Extra bytes follow...@>
}
@* Index.