forked from uyjulian/zbspac
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ScriptUnpacker.c
299 lines (260 loc) · 9.92 KB
/
ScriptUnpacker.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
/**
* @file ScriptUnpacker.c
* @brief Extracts the game's "screenplay" from the script files.
* @copyright Covered by 2-clause BSD, please refer to license.txt.
* @author CloudiDust
* @date 2010.02
*/
/**
* As of 2010.05.25, the script is unpacked into a directory,
* just like a package would be.
* There are three files in the directory:
* head.bin : the data in the original binary script, before the text section
* tail.bin : the data after the text section
* script.txt : the text section, unpacked.
*
* This change has two advantages:
*
* 1. The original script packer needs an unmodified binary script file when
* packing the translated strings back, but in reality the binary may
* already be updated with some translations, and packing will produce
* a corrupted bin file. Now that we have all the original data in our
* hands, we are not afraid of the existing bin file being changed. In fact,
* we now need no bin file at all, once we have extracted the texts.
*
* 2. The PAC package generator skips subdirectoies in the source dir, so now
* the unpacked scripts will not be packed back into the package even if
* they are in the source dir. And we like defaults, don't we? ;)
*/
/**
* As of 2010.05.23, the output format is changed.
* I suspect that the game doesn't have pointers to individual script segments,
* rather it reads the text section sequentially, but the numbers of nulls
* between two segments are fixed, changing them would crash the game.
* So for the text section as a whole, now I will just store its beginning and
* end, then for each segment, I record the number of spaces following it.
*/
#include <stdio.h>
#include <wchar.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "Logger.h"
#include "StringUtils.h"
#include "Filesystem.h"
#include "ScriptFile.h"
struct ScriptFile {
u64 textOffset;
u32 fileLength;
FILE* file;
};
typedef struct ScriptFile ScriptFile;
static void closeScriptFile(ScriptFile* script) {
if (script == NULL) return;
if (script->file != NULL) fclose(script->file);
free(script);
script = NULL;
}
static ScriptFile* openScriptFile(const wchar_t* sourcePath) {
ScriptFile* script = malloc(sizeof(ScriptFile));
memset(script, 0, sizeof(ScriptFile));
if (!(script->file = _wfopen(sourcePath, L"rb"))) {
writeLog(LOG_QUIET, L"ERROR: Unable to open the script file.");
closeScriptFile(script);
return NULL;
}
writeLog(LOG_VERBOSE, L"Script File Opened.");
return script;
}
static bool validateHeaderAndGetTextOffset(ScriptFile* script) {
/**
* The first 8 bytes of the script file is an unsigned integer n,
* and (n + 1) * 8 is the offset of the text section. (The text
* section may have some leading null bytes with a length smaller
* than 4).
* From the patterns of the data before the text section, we may
* assume that the data is an array of '8-byte-long data entries',
* and 'n' is indeed the size of this array.
* It's not clear what those entries actually are, (typically they
* would be some kind of pointers into the text) but they don't
* seem to matter as long as we only mess with the text itself.
*
* In my implementation, the textOffset member of ScriptFile consumes
* 8 bytes, but actually it won't exceed u32's range. The use of a u64
* here is for simplifying the file type validation : There is now no
* need to deal with Byte 4 to 7 separately.
*/
if (fread(&(script->textOffset), 8, 1, script->file) != 1) {
writeLog(LOG_QUIET, L"ERROR: Unable to read the text offset from the script!");
return false;
}
script->textOffset = (script->textOffset + 1) * 8;
fseek(script->file, 0, SEEK_END);
script->fileLength = ftell(script->file);
if (script->fileLength == -1) {
writeLog(LOG_QUIET, L"ERROR: Unable to get the length of the script!");
return false;
}
if (script->textOffset > script->fileLength) {
writeLog(LOG_QUIET, L"ERROR: The source file is not a vaild script file!");
return false;
}
writeLog(LOG_VERBOSE, L"The source file's length is %u, and the text begins at %u",
script->fileLength, script->textOffset);
return true;
}
static bool extractText(ScriptFile* script, const wchar_t* targetPath) {
if (!fsEnsureDirectoryExists(targetPath)) {
writeLog(LOG_QUIET, L"ERROR: Unable to open or create the target directory!");
return false;
}
wchar_t* headPath = wcsAppend(targetPath, L"\\head.bin");
wchar_t* tailPath = wcsAppend(targetPath, L"\\tail.bin");
wchar_t* textPath = wcsAppend(targetPath, L"\\script.txt");
fseek(script->file, 0, SEEK_SET);
char* data = malloc(script->fileLength);
if (fread(data, 1, script->fileLength, script->file) != script->fileLength) {
writeLog(LOG_QUIET, L"ERROR: Unable to read text data from the script file!");
free(headPath); free(tailPath); free(textPath);
free(data);
return false;
}
/**
* Put the head section into head.bin.
* We need to know where the text truly starts.
* (There may be some nulls before the text segments.)
*/
u32 index = script->textOffset;
while (data[index] == '\0') ++index;
FILE* headFile;
if ((headFile = _wfopen(headPath, L"wb")) == NULL) {
writeLog(LOG_QUIET, L"ERROR: Unable to create head.bin!");
free(headPath); free(tailPath); free(textPath);
free(data);
return false;
}
if (fwrite(data, sizeof(byte), index, headFile) != index) {
writeLog(LOG_QUIET, L"ERROR: Unable to write to head.bin!");
free(headPath); free(tailPath); free(textPath);
free(data);
fclose(headFile);
return false;
}
fclose(headFile);
/**
* Now extract the texts.
* Somehow opening the text file in text mode results in a mess after
* texts are written. So I use binary mode here.
* And remember to output '\r\n' when writing newlines.
*/
FILE* textFile;
if ((textFile = _wfopen(textPath, L"wb")) == NULL) {
writeLog(LOG_QUIET, L"ERROR: Unable to create script.txt!");
free(headPath); free(tailPath); free(textPath);
free(data);
return false;
}
/// The UTF16-LE BOM, UTF16-LE is the encoding used for wchar_t in Windows.
fputs("\xFF\xFE", textFile);
/// Header
fwprintf(textFile, L"ZBSPAC-TRANSLATION ENCODING japanese COUNT \r\n\r\n");
/**
* Text segments are Shift-JIS encoded and separated (or more precisely,
* terminated) by one or more null bytes, so we can treat them just like
* multibyte C strings.
*/
u32 extractedCount = 0;
u32 textCount = 0;
while (true) {
/**
* After the text section there is a ending section that consists of
* 0x00, 0xFF and maybe some bytes with small value, detect them and
* store its starting offset, so the packer can perserve this section
* in the recompiled script.
*
* The type cast is used because char is unsigned on this platform,
* without the cast the following condition will be true for leading
* bytes of Japanese characters, and terminate the process too early.
*
* Now the following code behaves correctly no matter chars are
* signed or not.
*/
byte temp = (byte)(data[index]);
if (temp < 32 || temp == 0xFF) {
break;
}
/**
* Extract the text segment and write it to the output file.
* The output format is designed to ease translation.
*
* Now there is no need to restrict the texts to fit in a
* given length, but we need to know how many nulls follow.
*
* If the first byte of the text segment represents an English letter
* or a digit, or this file's name ends in '.bin', then this segment is
* not regular text but special effects or name of other script files
* (For storyline branching).
* DO NOT ignore them, as they have their places in the script.
* Mark them as 'NOT-TEXT'.
*/
bool notText = isdigit(data[index]) || isupper(data[index]);
wchar_t* text = toWCString(data + index, L"japanese");
u32 textLen = wcslen(text);
if (textLen > 4 && wcscmp(text + textLen - 4, L".bin") == 0) notText = true;
u32 rawLength = strlen(data + index);
index += rawLength;
u32 followingNulls = 0;
while (data[index] == '\0') {
++followingNulls;
++index;
}
fwprintf(textFile, L"SEG %u NULL %u %s\r\n",
extractedCount, followingNulls,
notText ? L"NOT-TEXT" : L"");
fwprintf(textFile, L"%s\r\n", text);
for (int i = 0; i < rawLength; ++i) {
fputwc('-', textFile);
}
fwprintf(textFile, L"\r\n%s\r\n\r\n", text);
free(text);
++extractedCount;
if (!notText) ++textCount;
}
/// This position is in the header, just after "COUNT"
fseek(textFile, 88, SEEK_SET);
fwprintf(textFile, L"%5u", extractedCount);
fclose(textFile);
/// Now store the tail part.
FILE* tailFile;
if ((tailFile = _wfopen(tailPath, L"wb")) == NULL) {
writeLog(LOG_QUIET, L"ERROR: Unable to open tail.bin!");
free(headPath); free(tailPath); free(textPath);
free(data);
return false;
}
u32 tailLen = script->fileLength - index;
if (fwrite(data + index, sizeof(byte), tailLen, tailFile) != tailLen) {
writeLog(LOG_QUIET, L"ERROR: Unable to write to tail.bin!");
free(headPath); free(tailPath); free(textPath);
free(data);
fclose(tailFile);
return false;
}
fclose(tailFile);
free(headPath); free(tailPath); free(textPath);
free(data);
writeLog(LOG_NORMAL, L"%u strings translatable, %u not, %u total.",
textCount, extractedCount - textCount, extractedCount);
return true;
}
bool unpackScript(const wchar_t* sourcePath, const wchar_t* targetPath) {
writeLog(LOG_NORMAL, L"Unpacking Script: %s", sourcePath);
writeLog(LOG_NORMAL, L"To Directory: %s", targetPath);
ScriptFile* script = openScriptFile(sourcePath);
if (!script) return false;
bool result = validateHeaderAndGetTextOffset(script)
&& extractText(script, targetPath);
closeScriptFile(script);
writeLog(LOG_NORMAL, (result) ? L"Unpacking Successful." : L"ERROR: Unpacking Failed.");
return result;
}