-
Notifications
You must be signed in to change notification settings - Fork 0
/
charbyte.asm
454 lines (380 loc) · 11 KB
/
charbyte.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
; low-level utility to display a table of byte sized characters
;
; fasm2 -e 50 charbyte.asm
; link @charbyte.response charbyte.obj
; Obviously, this tool is not meant to support non-byte code pages or locales.
; Configure ANSI Colors:
BRDR equ 27,'[90m' ; boarder
KHEX equ 27,'[32m' ; axis key
KHAR equ 27,'[m' ; character
KONT equ 27,'[35m' ; " control
KERR equ 27,'[91m' ; " invalid
FLAG_CODEPAGE := 0
FLAG_LOCALE := 1
include 'console.g'
include 'winnls.g'
extrn wtoi64_RDI
; just a console output helper (w/ caching)
calminstruction ?? line&
local C,var,i
init i
match =$ line,line
jyes conout
assemble line
exit
cash: take C,line
exit
conout: match =$ line,line ; note '$$' produces an error
jyes cash
rev: take line,C
jyes rev
arrange var,=var.i
arrange C,=COFF.2.=CONST var:
assemble C
dat: arrange C,=COFF.2.=CONST =du line
assemble C
take ,line
take line,line
jyes dat
arrange C,=COFF.2.=CONST var.=chars =:== ( =$ - var ) =shr 1
assemble C
arrange C, =WriteConsoleW [.=hOutput], & var, var.=chars, 0, 0
assemble C
compute i,i+1
clr: take ,C
jyes clr
end calminstruction
{const:16} hextab db '0123456789ABCDEF'
; invalid codepage: 1200, 1259, ...
macro jcc_error JCC*,text* ; reduce interference from error checking
local message,characters
COFF.2.CONST message du characters,27,'[91m',text
COFF.2.CONST characters := ($ - message - 2) shr 1
lea rdx, [message+2]
JCC .error
end macro
:Main.error: ; TODO: show an error string ... fall into usage.
.hOutput equ Main.hOutput
movzx r8d, word [rdx-2] ; characters
WriteConsoleW [.hOutput], rdx, r8d, 0, 0
:Main.display_usage:
.hOutput equ Main.hOutput
$ $ 10,27,'[97m'
$ $ 'Byte Character Table Utility version 0.1',10,10,27,'[32m'
$ $ ' Usage:',27,'[m',' charbyte [help|locale|codepage] <value>',10
$ $ ' the default mode is [codepage] (i.e. optional)',10
$ $ ' the default codepage is ??',10
$ $ ' LOCALE_USER_DEFAULT is the default locale',10
$ ' <value> can be a number, name or string',10
:Main.done:
ExitProcess [Main.result]
jmp $
public Main as 'mainCRTStartup' ; linker expects this default entry point name
:Main:
virtual at rbp - .local
.lpCmdLine dq ?
.argv dq ?
.argn dd ?
align.assume rbp,16
align 16
.local := $-$$
rq 2
.hOutput dq ?
.result dd ?
.wide rw 4
.char db ?
assert $-.hOutput < 33 ; shadowspace limitation
end virtual
enter .frame + .local, 0
mov [.result], 1
; default settings:
{data:4} .locale dd LOCALE_USER_DEFAULT ; LCID
{data:4} .codepage dd 437
{data:4} .flags dd 0
GetStdHandle STD_OUTPUT_HANDLE
mov [.hOutput], rax
GetCommandLineW
mov [.lpCmdLine], rax
test rax, rax
jz .display_usage
xchg rcx, rax
CommandLineToArgvW rcx, & .argn
test rax, rax
jz .display_usage
mov [.argv], rax
xchg rsi, rax
lodsq ; skip program name
test rax, rax
jz .display_usage
.process_args:
cmp qword [rsi], 0
jz .args_processed
lstrcmpiW [rsi], W "help"
test eax, eax
jz .display_usage
lstrcmpiW [rsi], W "codepage"
test eax, eax
jz .mode_codepage
lstrcmpiW [rsi], W "locale"
test eax, eax
jz .mode_locale
mov rdi, [rsi]
call wtoi64_RDI
jnz .arg_number
test rax, rax
jnz .display_usage ; too many digits
jmp .arg_string ; assume value is a string
.bad_arg:
stc
.skip_arg:
lodsq
jc .display_usage ; argument unknown or possible error condition
jmp .process_args
.mode_codepage:
bts [.flags], FLAG_CODEPAGE
jmp .skip_arg
.mode_locale:
bts [.flags], FLAG_LOCALE
jmp .skip_arg
.arg_number: ; support 32-bit [un]signed range
cmp word [rdi], 0
jnz .display_usage ; invalid form, numbers need to be complete arg
movsxd rdx, eax
mov ecx, eax
cmp rdx, rax
jz @F
cmp rcx, rax
jnz .bad_arg
@@:
assert FLAG_CODEPAGE=0 ; needed for default routing
mov ecx, [.flags]
and ecx, FLAG_CODEPAGE or FLAG_LOCALE
cmp ecx, FLAG_LOCALE
jc .store_codepage
jnz .display_usage ; ambiguous mode
.store_locale:
mov [.locale], eax
lodsq
jmp .process_args
.store_codepage:
mov [.codepage], eax
lodsq
jmp .process_args
.arg_string:
assert FLAG_CODEPAGE=0 ; needed for default routing
mov ecx, [.flags]
and ecx, FLAG_CODEPAGE or FLAG_LOCALE
cmp ecx, FLAG_LOCALE
jc .store_codepage
jnz .display_usage ; ambiguous mode
.string_locale:
push [rsi]
pop [.lpNameToResolve]
jmp .skip_arg
.string_codepage:
iterate abstract, CP_ACP,CP_OEMCP,CP_MACCP,CP_THREAD_ACP,\
\; these don't really make sense (as they aren't byte encodings):
CP_UTF7,CP_UTF8
lstrcmpiW [rsi], W `abstract
test eax, eax
jnz .CP_.%
mov [.codepage], abstract
jmp .skip_arg
.CP_.%:
end iterate
; probably just an error
push [rsi]
pop [.lpCodePage]
jmp .skip_arg
.args_processed:
test [.flags], 1 shl FLAG_LOCALE
jz .basis_codepage
cmp [.lpNameToResolve], 0
jz .basis_locale ; use numeric locale
{bss:8} .lpCodePage dq ?
{bss:8} .lpNameToResolve dq ?
{bss:2} .LocaleName rw LOCALE_NAME_MAX_LENGTH
ResolveLocaleName [.lpNameToResolve], & .LocaleName, LOCALE_NAME_MAX_LENGTH
test eax, eax
jcc_error jz, "Unable to resolve locale."
LocaleNameToLCID & .LocaleName, LOCALE_ALLOW_NEUTRAL_NAMES
test eax, eax
jcc_error jz, "LCID does not exist for locale."
mov [.locale], eax
jmp .basis_locale
.basis_codepage:
cmp [.lpCodePage], 0
jz .have_codepage ; use numeric codepage
; TODO: resolve code page string
; TODO: find suitable locale for codepage selection:
.basis_locale:
{const:64} .lpSrcStr:
repeat 256
{const:64} db %-1
end repeat
{bss:64} .lpCharType rw 256
GetStringTypeExA [.locale], CT_CTYPE1, & .lpSrcStr, 256, & .lpCharType
test eax, eax ; BOOL
jcc_error jz, "GetStringTypeExA returned false."
; TODO: find suitable codepage for locale:
; LCIDToLocaleName
; char buf[19];
; int ccBuf = GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_SISO639LANGNAME, buf, 9);
; buf[ccBuf++] = '-';
; ccBuf += GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_SISO3166CTRYNAME, buf+ccBuf, 9);
.have_codepage:
{bss:4} .cpiw CPINFOEXW
GetCPInfoExW [.codepage], 0, & .cpiw ; translate identifiers to code page number
test eax, eax ; BOOL
jcc_error jz, "Invalid code page."
cmp [.cpiw.MaxCharSize], 1
jz .SBCS ; single-byte character set
$ 10,27,'[93m',\
'Warning: this tool is designed for use with single-byte character sets.',10
.SBCS:
; partial support for multibyte code pages?
mov ecx, [.cpiw.CodePage]
mov edx, MB_ERR_INVALID_CHARS or MB_USEGLYPHCHARS ; desired flags
xor eax, eax ; the following code pages only support dwFlags of zero:
iterate cp, 42,<50220,50222>,50225,50227,50229,<57002,57011>,65000
match low_cp =, high_cp,cp
cmp ecx, low_cp
jc .%
cmp ecx, high_cp+1
cmovc edx, eax
.%:
else
cmp ecx, cp
cmovz edx, eax
end match
end iterate
mov eax, MB_ERR_INVALID_CHARS ; only supported dwFlags
iterate cp, 54936,65001
cmp ecx, cp
cmovz edx, eax
end iterate
{bss:4} .dwFlags dd ?
mov [.dwFlags], edx
$ 10,27,'[93m',\
' Locale:',10,\
'Code Page:',9
lstrlenW & .cpiw.CodePageName
xchg r8d, eax
WriteConsoleW [.hOutput], & .cpiw.CodePageName, r8d, 0, 0
$ 10,10,KHEX,\
" 0 1 2 3 4 5 6 7 8 9 A B C D E F ",10,BRDR,\
" ╔═══╤═══╤═══╤═══╤═══╤═══╤═══╤═══╤═══╤═══╤═══╤═══╤═══╤═══╤═══╤═══╗ ",10
xor ebx, ebx
.table_outer:
mov eax, ebx
shr eax, 4
mov al, [hextab + rax]
mov [.lead_in.index], ax
mov [.lead_out.index], ax
{data:2} .lead_in du ' ',KHEX
{data:2} .lead_in.index du 'X',BRDR,' ║ ',KHAR
{data:2} .lead_in.end:
.lead_in.chars := (.lead_in.end - .lead_in) shr 1
WriteConsoleW [.hOutput], & .lead_in, .lead_in.chars, 0, 0
.table_inner:
; test [.lpCharType + rbx*2], C1_CNTRL
; jnz .char_control
.char_output:
mov [.char], bl
MultiByteToWideChar [.cpiw.CodePage], [.dwFlags], & .char, 1, & .wide, 4
cmp eax, 1
jnz .char_unknown
; still need to filter out control:
cmp word [.wide], ' '
jc .char_control
; skip 0x007F-0x009F, C1 control block, ISO/IEC 8859, private use controls
cmp word [.wide], 0x007F
jc @F
cmp word [.wide], 0x009F+1
jc .char_control
@@:
WriteConsoleW [.hOutput], & .wide, 1, 0, 0
jmp .tween
.char_control: ; TODO: control lookup?
$ KONT,'�'
jmp .tween
.char_unknown:
$ KERR,'�'
.tween:
$ BRDR,' │ ',KHAR
inc ebx
test ebx, 0x0F
jnz .table_inner
{data:2} .lead_out du 8,8,8,BRDR,' ║ ',KHEX
{data:2} .lead_out.index du 'X ',10
{data:2} .lead_out.end:
.lead_out.chars := (.lead_out.end - .lead_out) shr 1
WriteConsoleW [.hOutput], & .lead_out, .lead_out.chars, 0, 0
test bl, bl
jz .table_footer
cmp bl, 0x80
jz .table_split
$ BRDR,\
" ╟───┼───┼───┼───┼───┼───┼───┼───┼───┼───┼───┼───┼───┼───┼───┼───╢ ",10
jmp .table_outer
.table_split:
$ BRDR,\
" ╠═══╪═══╪═══╪═══╪═══╪═══╪═══╪═══╪═══╪═══╪═══╪═══╪═══╪═══╪═══╪═══╣ ",10
jmp .table_outer
.table_footer:
$ BRDR,\
" ╚═══╧═══╧═══╧═══╧═══╧═══╧═══╧═══╧═══╧═══╧═══╧═══╧═══╧═══╧═══╧═══╝ ",10,KHEX,\
" 0 1 2 3 4 5 6 7 8 9 A B C D E F ",10,27,'[m'
mov [.result], 0
if 0 ; debugging
{data:2} .debug5 du ' ????',10
xor ebx, ebx
mov rsi, qword [.debug5 + 2]
@5: mov [.char], bl
MultiByteToWideChar [.cpiw.CodePage], [.dwFlags], & .char, 1, & .wide, 4
mov qword [.debug5 + 2], rsi ; unknown
cmp eax, 1
jnz @F
movzx eax, byte [.wide+1]
mov ecx, eax
shr eax, 4
and ecx, 0xF
movzx eax, byte [hextab + rax]
movzx ecx, byte [hextab + rcx]
mov [.debug5 + 2], ax
mov [.debug5 + 4], cx
movzx eax, byte [.wide]
mov ecx, eax
shr eax, 4
and ecx, 0xF
movzx eax, byte [hextab + rax]
movzx ecx, byte [hextab + rcx]
mov [.debug5 + 6], ax
mov [.debug5 + 8], cx
@@:
add bl, 1
xor r8, r8
test bl, 0xF
setz r8b
add r8b, 5
WriteConsoleW [.hOutput], & .debug5, r8, 0, 0
test ebx, ebx
jnz @5B
end if
jmp .done
; REFERENCES:
; https://learn.microsoft.com/en-us/windows/win32/Intl/code-page-identifiers
; https://learn.microsoft.com/en-us/windows/win32/intl/locale-information-constants#locale-name-constants
; https://wutils.com/encodings/
virtual as "response" ; configure linker from here:
db '/NOLOGO',10
; db '/VERBOSE',10 ; use to debug process
db '/NODEFAULTLIB',10
db '/BASE:0x10000',10
db '/DYNAMICBASE:NO',10
db '/IGNORE:4281',10 ; bogus warning to scare people away
db '/SUBSYSTEM:CONSOLE,6.02',10
db 'kernel32.lib',10
db 'shell32.lib',10
db 'shlwapi.lib',10
end virtual