-
Notifications
You must be signed in to change notification settings - Fork 4
/
unpack.c
291 lines (253 loc) · 10.5 KB
/
unpack.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
/*
Unpack an ITS file stored in a WEENIX file using Alan Bawden's evacuated file
format.
By John Wilson.
04/11/1993 JMBW Created.
08/09/1993 JMBW Convert dates, uncompress .Z files automatically.
07/14/1998 JMBW Separated from DUMP.C.
This file is part of itstar.
itstar is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
itstar is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with itstar. If not, see <http://www.gnu.org/licenses/>.
*/
#include <fcntl.h>
#define zopen apple_zopen
#include <stdio.h>
#undef zopen
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <string.h>
#include "itstar.h"
FILE *in, *out;
FILE *zopen(char *);
static void flush();
/*
Message: 2881200, 91 lines
Posted: 4:07pm EDT, Thu May 14/92, imported: 4:05pm EDT, Thu May 14/92
Subject: ITS filesystems
To: John Wilson, bruce@think.com
From: alan%ai.mit.edu@life.ai.mit.edu
Date: Wed, 13 May 92 21:56:02 EDT
From: John_Wilson@mts.rpi.edu
...
I have a question -- how are the 36-bit words packed into 8-bit bytes?
...
From: Bruce Walker <bruce@think.com>
Date: Wed, 13 May 92 18:17:20 EDT
...
What is the representation of binary files?
...
-------
Storing 36-Bit Words
In 8-Bit Bytes
Here are the details of the encoding used to store 36-bit PDP-10 words in
8-bit byte file systems. It is easiest to explain the encoding by
describing how to reconstruct the 36-bit words from the 8-bit bytes. Going
the other direction is harder (you will see why).
Algorithm to decode a sequence of 8-bit bytes into a sequence of 36-bit
words:
Each 8-bit byte between 0 and 357 (octal) is decoded into one or two
7-bit bytes (see table below). These 7-bit bytes are then assembled
into 36-bit words in the usual PDP-10 byte ordering. The lowest order
bit of each such word is always set to 0.
Each 8-bit byte between 360 and 377 (octal) is combined with the next 4
8-bit bytes to form a complete 36-bit word (see figure below). It is
an error to encounter such a byte when there is a partially assembled
output word.
If there is a partially assembled output word at the end of the
sequence of 8-bit bytes, it is padded out with 7-bit bytes that contain
the value 3 (control-C in ASCII).
Here is the table for decoding bytes between 0 and 357 (all values are
in octal):
input byte 1st output 2nd output
---------- ---------- ----------
0 -- 11 0 -- 11 none
12 15 12
13 -- 14 13 -- 14 none
15 12 none
16 -- 176 16 -- 176 none
177 177 7
200 -- 206 177 0 -- 6
207 177 177
210 -- 211 177 10 -- 11
212 177 15
213 -- 214 177 13 -- 14
215 177 12
216 -- 355 177 16 -- 155
356 15 none
357 177 none
For bytes between 360 and 377 (octal) the 36-bit word is reconstructed
as follows:
byte: 000011111111222222 223333333344444444
bit: 321076543210765432 107654321076543210
Where byte 0 is the current byte (the one between 360 and 377), byte 1
is the next in sequence, and so forth.
Going in the other direction, from 36-bit words to 8-bit bytes, is harder
only because there are choices to be made. For example, you can encode
every 36-bit word using 5 bytes where the first is between 360 and 377 --
but if you did this, files that were stored as ASCII packed in 36-bit words
in the usual way wouldn't be readable. A -good- encoder will produce a
sequence of 8-bit bytes that can be read as an ordinary text file under
Unix whenever the input words contain only PDP-10 ASCII.
(You may be puzzled by all two byte sequences starting with 177 in the
table above. The explanation is that this is done to preserve not just
ASCII files, but also the files written by the Lisp Machine system using
the Lisp Machine character set. But you don't really need to worry about
this, as long as you have the above table, you know all you need to know
about it.)
I have a library of C routines that know how to encode and decode files in
this format. I'm not willing to make a widely public release of it, but if
you really need to use it let me know. (It is best to keep the number of
different programs that understand the format small in order to minimize
the chances of introducing incompatibilities.)
-------
*/
/* first code written for each input code from 000 to 357 */
char first[0360] = {
0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
0010, 0011, 0015, 0013, 0014, 0012, 0016, 0017, /* '\n' => CRLF, '\r' => LF */
0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177,
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 200 */
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 210 */
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 220 */
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 230 */
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 240 */
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 250 */
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 260 */
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 270 */
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 300 */
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 310 */
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 320 */
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 330 */
0177, 0177, 0177, 0177, 0177, 0177, 0177, 0177, /* 340 */
0177, 0177, 0177, 0177, 0177, 0177, 0015, 0177 /* 350 */
};
/* use the sign bit for NONE, maybe cc's optimizer will catch on (yeah right) */
#define NONE 0200
/* second code written for each input code from 000 to 357, or NONE if none */
char second[0360] = {
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 000 */
NONE, NONE, 0012, NONE, NONE, NONE, NONE, NONE, /* 010 -- '\n' => CRLF */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 020 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 030 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 040 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 050 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 060 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 070 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 100 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 110 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 120 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 130 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 140 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 150 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, /* 160 */
NONE, NONE, NONE, NONE, NONE, NONE, NONE, 0007, /* 170 */
0000, 0001, 0002, 0003, 0004, 0005, 0006, 0177, /* 200 */
0010, 0011, 0015, 0013, 0014, 0012, 0016, 0017, /* 210 */
0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027, /* 220 */
0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037, /* 230 */
0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047, /* 240 */
0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057, /* 250 */
0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, /* 260 */
0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077, /* 270 */
0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, /* 300 */
0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, /* 310 */
0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, /* 320 */
0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, /* 330 */
0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147, /* 340 */
0150, 0151, 0152, 0153, 0154, 0155, NONE, NONE /* 350 */
};
void unpack(char *file)
{
register int c;
register char b;
register int i;
static unsigned long word[5];
static unsigned long incnt;
in=zopen(file); /* uncompress/open file */
if(in==NULL) {
perror(file);
exit(1);
}
incnt=0L; /* used for error msgs if file invalid */
while((incnt++,c=getc(in))!=EOF) {
if(c>=0360) { /* quoted binary word */
word[0]=(c&017);
for(i=1;i<=4;i++) { /* 4 more bytes */
if((incnt++,word[i]=getc(in))==EOF) {
fprintf(stderr,
"?Unexpected EOF: %s\n",file);
exit(1);
}
}
/* assemble the 36-bit binary word */
outword((word[0]<<14L)|(word[1]<<6L)|
((word[2]>>2L)&077L),
((word[2]&003L)<<16L)|
(word[3]<<8L)|word[4]);
}
else {
word[0]=first[c], i=1; /* write first char */
if(!((b=second[c])&NONE)) word[i++]=b;
/* loop until word boundary */
while(i) {
/* start next sequence */
if((incnt++,c=getc(in))==EOF) {
/* pad with ^C's on EOF */
while(i<5) word[i++]=003;
flush(word);
goto done;
}
/* quoted word not allowed mid-word */
if(c>=0360) {
fprintf(stderr,
"?Invalid input file: %s, char %lu\n",
file,incnt);
exit(1);
}
/* save the first char */
word[i++]=first[c];
if(i==5) flush(word), i=0;
/* save 2nd char if any */
if(!((b=second[c])&NONE)) {
word[i++]=b;
if(i==5) flush(word), i=0;
}
}
}
}
done: fclose(in);
// unlink(file); /* delete when done - /tmp isn't big enough on */
/* CIEUNIX.RPI.EDU */
}
/* flush 5 7-bit ASCII chars as a 36-bit word */
static void flush(unsigned long word[5])
{
register unsigned long l, r;
l=(word[0]<<11L)|(word[1]<<4L)|((word[2]>>3L)&017L);
r=((word[2]&07L)<<15L)|(word[3]<<8L)|(word[4]<<1L);
outword(l,r);
}