-
Notifications
You must be signed in to change notification settings - Fork 12
/
compile.py
executable file
·1587 lines (1322 loc) · 56.9 KB
/
compile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/python
#
# Copyright 2011-2016 Jeff Bush
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Read LISP source files and compile to binary machine code.
./compile.py <source file 1> [<source file 2>] ...
Output file is 'program.hex', which contains the hexadecimal encoded
instruction memory, starting at address 0. Each entry in the hex file
is a 16 bit value value, separated with a newline.
"""
import copy
import math
import os
import re
import shlex
import sys
# 2 bits associated with each data memory location, stored in hardware
# to indicate its type.
TAG_INTEGER = 0 # Make this zero because types default to this when pushed
TAG_CONS = 1
TAG_FUNCTION = 2
TAG_CLOSURE = 3
# Upper 5 bits in each instruction that indicates the operation.
OP_NOP = 0
OP_CALL = 1
OP_RETURN = 2
OP_POP = 3
OP_LOAD = 4
OP_STORE = 5
OP_ADD = 6
OP_SUB = 7
OP_REST = 8
OP_GTR = 9
OP_GTE = 10
OP_EQ = 11
OP_NEQ = 12
OP_DUP = 13
OP_GETTAG = 14
OP_SETTAG = 15
OP_AND = 16
OP_OR = 17
OP_XOR = 18
OP_LSHIFT = 19
OP_RSHIFT = 20
OP_GETBP = 21
OP_RESERVE = 24
OP_PUSH = 25
OP_GOTO = 26
OP_BFALSE = 27
OP_GETLOCAL = 29
OP_SETLOCAL = 30
OP_CLEANUP = 31
class CompileError(Exception):
pass
class Symbol(object):
"""What an identifier refers to."""
LOCAL_VARIABLE = 1
GLOBAL_VARIABLE = 2
FUNCTION = 3
def __init__(self, symtype):
self.type = symtype
self.index = -1
self.initialized = False # For globals
self.function = None
self.closure_source = None
class Label(object):
"""Placeholder for a branch destination."""
def __init__(self):
self.defined = False
self.offset = 0
class Function(object):
"""This is a builder pattern that is used while generating code for a function.
The compiler can call into this to append instructions to a function.
Because of anonymous inner functions, there can be several functions being
built at once. This contains both the raw instructions themselves, as well as
symbol information and fixups.
"""
def __init__(self, name):
self.name = name
self.fixups = []
self.base_address = None
self.num_local_variables = 0
self.prologue = None
self.instructions = []
self.environment = [{}] # Stack of scopes
self.free_variables = []
self.enclosing_function = None
self.referenced = False # Used to strip dead functions
self.referenced_funcs = []
self.entry = Label()
self.emit_label(self.entry)
def enter_scope(self):
"""Start a new region of code where local variables are live."""
self.environment.append({})
def exit_scope(self):
"""Any local variables defined in the previous scope are no longer live."""
self.environment.pop()
def lookup_local_variable(self, name):
"""Attempt to find a local variable visible at the current code location.
This will start with the current scope, then walk outward to enclosing
scopes. This means variables with the same name in inner scopes can
shadow outer ones. This will also found closure variables outside
of a function, which will be handled specially later.
Args:
name (str): Human readable identifier
Returns:
Symbol corresponding to identifier or None
"""
for scope in reversed(self.environment):
if name in scope:
return scope[name]
return None
def reserve_local_variable(self, name):
"""Allocate space for a local variable.
We also record the identifier in the inner-most scope.
Args:
name (str): Human readable identifier
Returns:
Symbol object corresponding to location.
"""
sym = Symbol(Symbol.LOCAL_VARIABLE)
self.environment[-1][name] = sym
# Skip return address and base pointer
sym.index = -(self.num_local_variables + 2)
self.num_local_variables += 1
return sym
def set_param(self, name, index):
"""Record information about a paramter to this function.
Args:
name (str): Human readable identifier used to access
the contents from within the function.
index (int): Offset in the stack frame used to find
the value of this parameter, passed from caller.
"""
sym = Symbol(Symbol.LOCAL_VARIABLE)
self.environment[-1][name] = sym
sym.index = index + 1
def emit_label(self, label):
"""Set the instruction address of a label.
The address of this label will be set to the *next* instruction to be
emitted. During later fixups, all instructions that reference this
label will be adjusted to point here.
The label will already have been created, and may already have been
used as a forward branch target.
Args:
label (Label): the label to assign to this location.
"""
assert not label.defined
label.defined = True
label.offset = len(self.instructions)
def emit_branch_instruction(self, opcode, label):
"""Append a branch instruction to the end of the function.
Args:
opcode (int): opcode of the branch instruction (e.g.
conditional vs. unconditional)
label (Label): destination of the branch.
"""
self.emit_instruction(opcode, 0)
self.add_fixup(label)
def emit_instruction(self, opcode, param=0):
"""Append an instruction to the end of this function.
Args:
opcode (int): Upper 5 bits of instruction that indicate the
operation.
param: Lower 16 bits of instruction.
"""
if param > 32767 or param < -32768:
raise CompileError('param out of range ' + str(param))
self.instructions.append((opcode << 16) | (param & 0xffff))
def add_prologue(self):
"""Emit code that is executed at the top of the function.
This code is invoked immediately after a function is called, before
any of the user defined code. It includes basic local variable setup
and some bookkeeping for closures.
"""
prologue = []
if self.num_local_variables > 0:
prologue.append((OP_RESERVE << 16) | self.num_local_variables)
if self.free_variables:
prologue.append((OP_PUSH << 16) | 1) # Address of $closure
prologue.append(OP_LOAD << 16)
# Read the list
for index, var in enumerate(self.free_variables):
if index:
prologue.append(OP_REST << 16) # Read next pointer
if index != len(self.free_variables) - 1:
# Save pointer for call to rest in next loop iteration
prologue.append(OP_DUP << 16)
prologue.append(OP_LOAD << 16) # Read value (car)
prologue.append((OP_SETLOCAL << 16) |
(var.index & 0xffff)) # save
prologue.append(OP_POP << 16)
self.prologue = prologue
def get_size(self):
"""Return the current number of instructions in this function."""
return len(self.prologue) + len(self.instructions)
def add_fixup(self, target):
"""Record instruction reference to memory location for later patching.
This remembers that the last emitted instruction should have its
data field adjusted later to point to 'target'. This can't always
be done immediately because it may be a forward reference.
Args:
target (Label|Function|Symbol): memory location reference.
"""
self.fixups.append((len(self.instructions) - 1, target))
def patch(self, offset, value):
"""Update instruction to point to a memory location.
Change the immediate field of the instruction, leaving its opcode
unmodified.
Args:
offset (int): index of the instruction, starting after the prologue
value (int): new value for immediate field
"""
self.instructions[offset] &= ~0xffff
self.instructions[offset] |= (value & 0xffff)
def apply_fixups(self):
"""Update all memory references in instructions in this function.
Walk through all previously recorded calls to add_fixup and adjust
the instructions to point to the proper targets. These *should*
all be known at this point, as the labels will have been emitted,
but, if not, this will raise an exception (which would indicate a bug
in this code).
"""
base_address = self.base_address + len(self.prologue)
for pc, target in self.fixups:
if isinstance(target, Label):
assert target.defined
self.patch(pc, base_address + target.offset)
elif isinstance(target, Function):
self.patch(pc, target.base_address)
elif isinstance(target, Symbol):
if target.type == Symbol.GLOBAL_VARIABLE:
self.patch(pc, target.index)
elif target.type == Symbol.FUNCTION:
self.patch(pc, target.function.base_address)
else:
raise CompileError(
'internal error: unknown fixup type ' + target.__name__)
def mark_callees(self):
"""Recursively mark all functions called indirectly or directly by this one.
This ensures they won't be removed by dead code stripping. The
reference list contains either functions (for anonymous functions
or declared functions) or symbols (for forward references)
"""
self.referenced = True
for ref in self.referenced_funcs:
if isinstance(ref, Symbol):
function = ref.function
else:
function = ref
if not function.referenced:
function.mark_callees()
class Parser(object):
"""Convert text LISP source into a nested set of python lists."""
def __init__(self):
self.lexer = None
self.program = []
self.filename = None
def parse_file(self, filename):
self.filename = filename
with open(filename, 'r') as stream:
self.lexer = shlex.shlex(stream)
self.lexer.commenters = ';'
self.lexer.quotes = '"'
self.lexer.wordchars += '?+<>!@#$%^&*;:.=-_\\'
while True:
expr = self.parse_expr()
if expr == '':
break
self.program.append(expr)
def parse_paren_list(self):
parenlist = []
while True:
lookahead = self.lexer.get_token()
if lookahead == '':
raise CompileError('missing )')
elif lookahead == ')':
break
self.lexer.push_token(lookahead)
parenlist.append(self.parse_expr())
return parenlist
def parse_expr(self):
token = self.lexer.get_token()
if token == '':
return ''
elif token == '\'':
return ['quote', self.parse_expr()]
elif token == '`':
return ['backquote', self.parse_expr()]
elif token == ',':
return ['unquote', self.parse_expr()]
elif token == '(':
return self.parse_paren_list()
elif token.isdigit() or (token[0] == '-' and len(token) > 1):
return int(token)
elif token == ')':
raise CompileError('unmatched )')
elif token[:2] == '#\\': # character code
if token[2:] == 'newline':
return ord('\n')
elif token[2:] == 'space':
return ord(' ')
else:
return ord(token[2])
else:
return token
class Compiler(object):
def __init__(self):
self.globals = {}
self.next_global_slot = 0
self.current_function = None
self.function_list = []
self.break_stack = []
def lookup_symbol(self, name):
"""Given an identifier, find its type and where it's stored.
Start lookup in the current scope and works outward to enclosing
scopes. If the symbol doesn't exist, create it in the global scope.
If it is defined outside the current function closure, there is a
bunch of extra accounting to do.
Args:
name (string): human readable symbol text
Returns:
Symbol object.
"""
# Check for local variable
sym = self.current_function.lookup_local_variable(name)
if sym:
return sym
# A variable is 'free' if it is defined in the enclosing
# function. In this example, a is a free variable:
#
# (function foo (a) (function (b) (+ a b)))
#
is_free_var = False
function = self.current_function.enclosing_function
while function:
sym = function.lookup_local_variable(name)
if sym:
is_free_var = True
break
function = function.enclosing_function
if is_free_var:
# Create a new local variable to contain the value
sym = self.current_function.reserve_local_variable(name)
self.current_function.free_variables.append(sym)
# Determine where to capture this from in the source environment
dest = sym
func = self.current_function.enclosing_function
while True:
dest.closure_source = func.lookup_local_variable(name)
if dest.closure_source:
break
# The variable is from a function above this one, eg:
# (function foo (a) (function bar (b) (function baz(c) (+ c a))))
# In this example, we want to create a variable 'a' in the scope
# of function b, so it is copied through properly. This is
# named so, if there are subseqent uses of 'a', they will reference
# the same variable.
dest.closure_source = func.reserve_local_variable(name)
func.free_variables.append(dest.closure_source)
dest = dest.closure_source
func = func.enclosing_function
return sym
# Check if this is a global variable
if name in self.globals:
return self.globals[name]
# Not found, create a new global variable implicitly
sym = Symbol(Symbol.GLOBAL_VARIABLE)
sym.index = self.next_global_slot
self.next_global_slot += 1
self.globals[name] = sym
return sym
def compile(self, program):
"""Convert the entire program into machine code.
All code not in function blocks will be emitted into an implicitly
created dummy function 'main'
"""
self.current_function = Function('main')
self.function_list.append(self.current_function)
# Create a built-in variable that indicates where the heap starts
# (will be patched at the end of compilation with the proper address)
# The code to patch this below assumes it is the first instruction
# emitted.
heapstart = self.lookup_symbol('$heapstart')
heapstart.initialized = True
self.current_function.emit_instruction(OP_PUSH, 0)
self.current_function.emit_instruction(OP_PUSH, heapstart.index)
self.current_function.emit_instruction(OP_STORE)
self.current_function.emit_instruction(OP_POP)
# This is used during calls with closures. Code expects this variable
# to be at address 1 (which it will be because it is the second global
# variable that was created).
closure_ptr = self.lookup_symbol('$closure')
closure_ptr.initialized = True
# Do a pass to register all functions in the global scope. This is
# necessary to avoid creating these symbols as global variables
# when there are forward references.
for expr in program:
if expr[0] == 'function':
self.globals[expr[1]] = Symbol(Symbol.FUNCTION)
# Compile
for expr in program:
if expr[0] == 'function':
self.compile_function(expr)
else:
self.compile_expression(expr)
self.current_function.emit_instruction(OP_POP) # Clean up stack
# Call (halt) library call at end
self.compile_identifier('halt')
self.current_function.emit_instruction(OP_CALL)
self.current_function.emit_instruction(OP_CLEANUP, 2)
# Strip out functions that aren't called
self.current_function.mark_callees()
self.function_list = [
function for function in self.function_list if function.referenced]
# Check for globals that are referenced but not assigned
for name, sym in self.globals.items():
if not sym.initialized:
raise CompileError('unknown variable {}'.format(name))
# Generate prologues and determine function addresses
pc = 0
for function in self.function_list:
function.base_address = pc
function.add_prologue()
pc += function.get_size()
# Fix up all references
for function in self.function_list:
function.apply_fixups()
# Patch $heapsize now that we know the number of global variables.
# This assumes the push of the size is the first instruction emitted
# above.
self.function_list[0].patch(0, len(self.globals))
# Flatten all generated instructions into an array
instructions = []
for function in self.function_list:
instructions += function.prologue
instructions += function.instructions
with open('program.hex', 'w') as outfile:
for instr in instructions:
outfile.write('{:06x}\n'.format(instr))
# For debugging: create a list file with the disassembly
with open('program.lst', 'w') as listfile:
# Write out table of global variables
listfile.write('Globals:\n')
for var in sorted(self.globals, key=lambda key: self.globals[key].index):
sym = self.globals[var]
if sym.type != Symbol.FUNCTION:
listfile.write(' {:4d} {} \n'.format(sym.index, var))
disassemble(listfile, instructions, self.function_list)
def compile_function(self, expr):
"""Compile named function definition.
These are only supported in the global scope.
(function name (param param...) body)
Args:
expr (List): List containing the S-expr
"""
function = self.compile_function_body(expr[1], expr[2], expr[3:])
self.function_list.append(function)
assert expr[1] in self.globals # Added by first pass
sym = self.globals[expr[1]]
if sym.initialized:
raise CompileError('Global variable {} redefined as function'
.format(expr[1]))
sym.initialized = True
sym.function = function
def compile_expression(self, expr, is_tail_call=False):
"""The mother code generation function.
Compiles an arbitrary lisp expression. Each expression consumes
all parameters--which are pushed onto the stack from right to left--
and leaves a result value on the stack. All expressions have values in LISP.
Determining if something is a tail call is straightforward in
S-Expression form. The outermost function call is a tail call.
This may be wrapped in control flow form. If a sequence is outermost,
the last function call will be a tail call.
Args:
expr (List): List containing the S-expr
is_tail_call (bool): True if there are no other expressions evaluated
after this one before returning from the function.
"""
if isinstance(expr, list):
if len(expr) == 0:
# Empty expression
self.current_function.emit_instruction(OP_PUSH, 0)
else:
self.compile_combination(expr, is_tail_call)
elif isinstance(expr, int):
self.compile_integer_literal(expr)
elif expr[0] == '"':
self.compile_string(expr[1:-1])
elif expr == 'nil' or expr == 'false':
self.current_function.emit_instruction(OP_PUSH, 0)
elif expr == 'true':
self.current_function.emit_instruction(OP_PUSH, 1)
else:
# This is a variable.
self.compile_identifier(expr)
def compile_integer_literal(self, expr):
self.current_function.emit_instruction(OP_PUSH, expr)
def compile_identifier(self, expr):
"""Look up variable and emit code to push its value onto the stack.
Args:
expr (str): Human readable of the variable to reference
"""
variable = self.lookup_symbol(expr)
if variable.type == Symbol.LOCAL_VARIABLE:
self.current_function.emit_instruction(OP_GETLOCAL,
variable.index)
elif variable.type == Symbol.GLOBAL_VARIABLE:
self.current_function.emit_instruction(OP_PUSH, 0)
self.current_function.add_fixup(variable)
self.current_function.emit_instruction(OP_LOAD)
elif variable.type == Symbol.FUNCTION:
self.current_function.referenced_funcs.append(variable)
self.current_function.emit_instruction(OP_PUSH, 0)
self.current_function.add_fixup(variable)
else:
raise CompileError(
'internal error: symbol {} does not have a valid type'
.format(expr))
def compile_combination(self, expr, is_tail_call=False):
"""Compile a list expression (op param param...).
This may be a special form (like if) or a function call.
Any expression that isn't an atom or a number will be compiled here.
Args:
expr (List): List containing the S-expr
is_tail_call (bool): True if there are no other expressions evaluated
after this one before returning from the function.
"""
if isinstance(expr[0], str):
function_name = expr[0]
if function_name in self.PRIMITIVES:
self.compile_primitive(expr)
elif function_name == 'function':
self.compile_anonymous_function(expr)
elif function_name == 'begin':
self.compile_sequence(expr[1:], is_tail_call)
elif function_name == 'while':
self.compile_while(expr)
elif function_name == 'break':
self.compile_break(expr)
elif function_name == 'if':
self.compile_if(expr, is_tail_call)
elif function_name == 'assign':
self.compile_assign(expr)
elif function_name == 'quote':
self.compile_quote(expr[1])
elif function_name == 'list':
self.compile_list(expr)
elif function_name == 'let':
self.compile_let(expr, is_tail_call)
elif function_name == 'getbp':
self.current_function.emit_instruction(OP_GETBP)
elif function_name == 'and' or function_name == 'or' or function_name == 'not':
self.compile_boolean_expression(expr)
else:
# Call to a user defined function.
self.compile_function_call(expr, is_tail_call)
else:
self.compile_function_call(expr)
def compile_list(self, expr):
"""Emit code to construct a literal list
Args:
expr (List): list of expression values
"""
for value in reversed(expr[1:]):
self.compile_expression(value)
# Emit a call to the cons library function
self.compile_identifier('cons')
self.current_function.emit_instruction(OP_CALL)
self.current_function.emit_instruction(OP_CLEANUP, 2)
def compile_quote(self, expr):
"""Convert quoted expression to a series of cons calls.
Args:
expr (List|int|str): literal value to be quoted.
"""
if isinstance(expr, list):
if len(expr) == 3 and expr[1] == '.':
# This is a pair, which has special syntax: ( expr . expr )
# Create a single cons cell for it.
self.compile_quote(expr[2])
self.compile_quote(expr[0])
# Emit a call to the cons library function
self.compile_identifier('cons')
self.current_function.emit_instruction(OP_CALL)
self.current_function.emit_instruction(OP_CLEANUP, 2)
elif len(expr) == 0:
# Empty list
self.compile_integer_literal(0)
else:
# List, create a chain of cons calls
self.compile_quoted_list(expr)
elif isinstance(expr, int):
self.compile_integer_literal(expr)
else:
self.compile_string(expr)
def compile_quoted_list(self, tail):
if len(tail) == 1:
self.compile_integer_literal(0)
else:
# Do the tail first to avoid allocating a bunch of temporaries
self.compile_quoted_list(tail[1:])
self.compile_quote(tail[0])
# Emit a call to the cons library function
self.compile_identifier('cons')
self.current_function.emit_instruction(OP_CALL)
self.current_function.emit_instruction(OP_CLEANUP, 2)
def compile_string(self, string):
"""Convert string literal to a cons call for each character
There isn't not a native string type.
Args:
string (str): literal to be encoded.
"""
if len(string) == 1:
self.compile_integer_literal(0)
else:
self.compile_string(string[1:])
self.compile_integer_literal(ord(string[0]))
# Emit a call to the cons library function
self.compile_identifier('cons')
self.current_function.emit_instruction(OP_CALL)
self.current_function.emit_instruction(OP_CLEANUP, 2)
def compile_assign(self, expr):
"""Emit code to write to a variable (assign variable value).
This will leave the value of the expression on the stack, since all
expressions do that.
Args:
expr (List): List containing the S-expr
"""
variable = self.lookup_symbol(expr[1])
if variable.type == Symbol.LOCAL_VARIABLE:
self.compile_expression(expr[2])
self.current_function.emit_instruction(OP_SETLOCAL, variable.index)
elif variable.type == Symbol.GLOBAL_VARIABLE:
self.compile_expression(expr[2])
self.current_function.emit_instruction(OP_PUSH, 0)
self.current_function.add_fixup(variable)
self.current_function.emit_instruction(OP_STORE)
variable.initialized = True
elif variable.type == Symbol.FUNCTION:
raise CompileError('cannot assign function {}'.format(expr))
else:
raise CompileError(
'Internal error: what kind of variable is {}?'.format(expr[1]))
def compile_boolean_expression(self, expr):
"""Compile boolean expression (and, or, not) that stores value on stack.
This is not used for conditional form like if or while. This will push
the result (1 or 0) on the stack.
Args:
expr (List): List containing the S-expr
"""
false_label = Label()
done_label = Label()
self.compile_predicate(expr, false_label)
self.current_function.emit_instruction(OP_PUSH, 1)
self.current_function.emit_branch_instruction(OP_GOTO, done_label)
self.current_function.emit_label(false_label)
self.current_function.emit_instruction(OP_PUSH, 0)
self.current_function.emit_label(done_label)
def compile_predicate(self, expr, false_label):
"""Compile boolean expression that is part of a control flow expression.
If the expression is false, this will jump to the label 'false_label',
otherwise it will fall through. This performs short circuit evaluation
where possible.
Args:
expr (List): List containing the S-expr
false_label (Label): Where to jump to if express is false.
"""
if isinstance(expr, list):
if expr[0] == 'and':
if len(expr) < 2:
raise CompileError('two few arguments for and')
# Short circuit if any condition is false
for cond in expr[1:]:
self.compile_predicate(cond, false_label)
return
elif expr[0] == 'or':
if len(expr) < 2:
raise CompileError('two few arguments for or')
true_target = Label()
for cond in expr[1:-1]:
test_next = Label()
self.compile_predicate(cond, test_next)
self.current_function.emit_branch_instruction(
OP_GOTO, true_target)
self.current_function.emit_label(test_next)
# Final test short circuits to false target
self.compile_predicate(expr[-1], false_label)
self.current_function.emit_label(true_target)
return
elif expr[0] == 'not':
if len(expr) != 2:
raise CompileError('two few arguments for not')
skip_to = Label()
self.compile_predicate(expr[1], skip_to)
self.current_function.emit_branch_instruction(
OP_GOTO, false_label)
self.current_function.emit_label(skip_to)
return
self.compile_expression(expr)
self.current_function.emit_branch_instruction(OP_BFALSE, false_label)
def compile_if(self, expr, is_tail_call=False):
"""Compile conditional branch.
(if expr true [false])
Args:
expr (List): List containing the S-expr
is_tail_call (bool): True if there are no other expressions evaluated
after this one before returning from the function.
"""
false_label = Label()
done_label = Label()
self.compile_predicate(expr[1], false_label)
self.compile_expression(expr[2], is_tail_call) # True clause
self.current_function.emit_branch_instruction(OP_GOTO, done_label)
self.current_function.emit_label(false_label)
# False clause
if len(expr) > 3:
self.compile_expression(expr[3], is_tail_call)
else:
self.current_function.emit_instruction(OP_PUSH, 0)
self.current_function.emit_label(done_label)
def compile_while(self, expr):
"""Compile loop.
That body is implicitly a (begin... and can use a sequence
If the loop terminates normally (the condition is false), the
result is zero. If (break val) is called, 'val' will be the result.
(while condition body)
Args:
expr (List): List containing the S-expr
"""
top_of_loop = Label()
bottom_of_loop = Label()
break_loop = Label()
self.break_stack.append(break_loop)
self.current_function.emit_label(top_of_loop)
self.compile_predicate(expr[1], bottom_of_loop)
self.compile_sequence(expr[2:])
self.current_function.emit_instruction(OP_POP) # Clean up stack
self.current_function.emit_branch_instruction(OP_GOTO, top_of_loop)
self.current_function.emit_label(bottom_of_loop)
self.break_stack.pop()
self.current_function.emit_instruction(OP_PUSH, 0) # Default value
self.current_function.emit_label(break_loop)
def compile_break(self, expr):
"""Emit instruction to jump past the end of the current loop body.
(break [value])
Args:
expr (List): List containing the S-expr
"""
label = self.break_stack[-1]
if len(expr) > 1:
self.compile_expression(expr[1]) # Push value on stack
else:
self.current_function.emit_instruction(OP_PUSH, 0)
self.current_function.emit_branch_instruction(OP_GOTO, label)
# Primitives look like function calls, but map to machine
# instructions
PRIMITIVES = {
'+': (OP_ADD, 2),
'-': (OP_SUB, 2),
'>': (OP_GTR, 2),
'>=': (OP_GTE, 2),
'<': (-1, 2), # These are handled by switching operand order
'<=': (-1, 2),
'=': (OP_EQ, 2),
'<>': (OP_NEQ, 2),
'load': (OP_LOAD, 1),
'store': (OP_STORE, 2),
'first': (OP_LOAD, 1),
'rest': (OP_REST, 1),
'second': (OP_REST, 1), # Alias for rest
'settag': (OP_SETTAG, 2),
'gettag': (OP_GETTAG, 1),
'bitwise-and': (OP_AND, 2),
'bitwise-or': (OP_OR, 2),
'bitwise-xor': (OP_XOR, 2),
'rshift': (OP_RSHIFT, 2),
'lshift': (OP_LSHIFT, 2)
}
def compile_primitive(self, expr):
opcode, nargs = self.PRIMITIVES[expr[0]]
if len(expr) - 1 != nargs:
raise CompileError('wrong number of arguments for {}'.format(expr[0]))
# Synthesize lt and lte operators by switching order and using the
# opposite operators
if expr[0] == '<':
self.compile_expression(expr[1])
self.compile_expression(expr[2])
self.current_function.emit_instruction(OP_GTR)
elif expr[0] == '<=':
self.compile_expression(expr[1])
self.compile_expression(expr[2])
self.current_function.emit_instruction(OP_GTE)
else:
if len(expr) > 2:
self.compile_expression(expr[2])
self.compile_expression(expr[1])
self.current_function.emit_instruction(opcode)
def compile_function_call(self, expr, is_tail_call=False):
"""Emit code to call a function.
Args:
expr (List): List containing the S-expr
is_tail_call (bool): True if there are no other expressions evaluated
after this one before returning from the function.
"""
if isinstance(expr[0], int):
raise CompileError('Cannot use integer as function')
# Push arguments
for param_expr in reversed(expr[1:]):
self.compile_expression(param_expr)
if self.current_function.name == expr[0] and is_tail_call:
# This is a recursive tail call. Copy parameters back into
# frame and then jump to entry
for opnum in range(len(expr) - 1):
self.current_function.emit_instruction(OP_SETLOCAL, opnum + 1)
self.current_function.emit_instruction(OP_POP)
self.current_function.emit_branch_instruction(
OP_GOTO, self.current_function.entry)
else:
may_be_closure = True
if isinstance(expr[0], str):
func = self.lookup_symbol(expr[0])