| 1 | import Program |
|---|
| 2 | A = " " |
|---|
| 3 | B = "\t" |
|---|
| 4 | C = "\n" |
|---|
| 5 | """ |
|---|
| 6 | Input to the whitespace VM. |
|---|
| 7 | For convenience, three input characters |
|---|
| 8 | A => space, B => tab, C => either of CR/LF |
|---|
| 9 | |
|---|
| 10 | Numbers are binary sign-and-modulus (A=0, B=1, C=terminator) |
|---|
| 11 | Strings are sequences of binary characters, terminated by C. |
|---|
| 12 | |
|---|
| 13 | We have: |
|---|
| 14 | |
|---|
| 15 | * Stack instructions (Preceded by A) |
|---|
| 16 | Push (Integer) A |
|---|
| 17 | Dup CA |
|---|
| 18 | Swap CB |
|---|
| 19 | Discard CC |
|---|
| 20 | |
|---|
| 21 | * Arithmetic (Preceded by BA) |
|---|
| 22 | Plus AA |
|---|
| 23 | Minus AB |
|---|
| 24 | Times AC |
|---|
| 25 | Divide BA |
|---|
| 26 | Modulo BB |
|---|
| 27 | |
|---|
| 28 | * Heap access (Preceded by BB) |
|---|
| 29 | Store A |
|---|
| 30 | Retrieve B |
|---|
| 31 | |
|---|
| 32 | * Control (Preceded by C) |
|---|
| 33 | Label String AA |
|---|
| 34 | Call Label AB |
|---|
| 35 | Jump Label AC |
|---|
| 36 | If Zero Label BA |
|---|
| 37 | If Neg Label BB |
|---|
| 38 | Return BC |
|---|
| 39 | End CC |
|---|
| 40 | |
|---|
| 41 | * IO instructions (Preceded by BC) |
|---|
| 42 | OutputChar AA |
|---|
| 43 | OutputNum AB |
|---|
| 44 | ReadChar BA |
|---|
| 45 | ReadNum BB |
|---|
| 46 | """ |
|---|
| 47 | |
|---|
| 48 | def load(fname): |
|---|
| 49 | fp = open(fname,"r") |
|---|
| 50 | src = fp.read() |
|---|
| 51 | fp.close() |
|---|
| 52 | execute(src) |
|---|
| 53 | |
|---|
| 54 | def execute(src): |
|---|
| 55 | prog = parse(src) |
|---|
| 56 | if prog != None: |
|---|
| 57 | #print repr(prog) # uncomment to dump the contents of the program to stdout |
|---|
| 58 | Program.vm (prog) |
|---|
| 59 | |
|---|
| 60 | # DFA for the opcode tree |
|---|
| 61 | # format: DFA[(state,letter)] = state or (opcodeclass, None|int|str) or "tokenlist for error msg" |
|---|
| 62 | DFA = {(0,A): 1, (0,B): 4, (0,C): 12, |
|---|
| 63 | (1,A): (Program.Push, int), (1,B): 2, (1,C): 3, |
|---|
| 64 | (2,A): (Program.Ref, int), (2,B): "[SP][TB][TB]", (2,C): (Program.Slide, int), |
|---|
| 65 | (3,A): (Program.Dup, None), (3,B): (Program.Swap, None), (3,C): (Program.Discard, None), |
|---|
| 66 | (4,A): 5, (4,B): 8, (4,C): 9, |
|---|
| 67 | (5,A): 6, (5,B): 7, (5,C): "[TB][SP][LF]", |
|---|
| 68 | (6,A): (Program.Plus, None), (6,B): (Program.Minus, None), (6,C): (Program.Times, None), |
|---|
| 69 | (7,A): (Program.Divide, None), (7,B): (Program.Modulo, None), (7,C): "[TB][SP][TB][LF]", |
|---|
| 70 | (8,A): (Program.Store, None), (8,B): (Program.Retrieve, None), (8,C): "[TB][TB][LF]", |
|---|
| 71 | (9,A): 10, (9,B): 11, (9,C): "[TB][LF][LF]", |
|---|
| 72 | (10,A): (Program.OutputChar, None), (10,B): (Program.OutputNum, None), (10,C): "[TB][LF][SP][LF]", |
|---|
| 73 | (11,A): (Program.InputChar, None), (11,B): (Program.InputNum, None), (11,C): "[TB][LF][TB][LF]", |
|---|
| 74 | (12,A): 13, (12,B): 14, (12,C): 15, |
|---|
| 75 | (13,A): (Program.Label, str), (13,B): (Program.Call, str), (13,C): (Program.Jump, str), |
|---|
| 76 | (14,A): (Program.JumpZero, str), (14,B): (Program.JumpNeg, str), (14,C): (Program.Return, None), |
|---|
| 77 | (15,A): "[LF][LF][SP]", (15,B): (Program.Trace, None), (15,C): (Program.End, None)} |
|---|
| 78 | |
|---|
| 79 | def parse(src): |
|---|
| 80 | i = 0; |
|---|
| 81 | prog = Program.Program(); |
|---|
| 82 | lines,lastnewline = 1,-1 # so can give error locations as line:char |
|---|
| 83 | tokenstart = 0,1,1 # byte, line, char |
|---|
| 84 | state = 0 |
|---|
| 85 | while i < len(src): |
|---|
| 86 | while src[i] not in (A,B,C): |
|---|
| 87 | i += 1 |
|---|
| 88 | if state == 0: |
|---|
| 89 | tokenstart = (i,lines,i-lastnewline) |
|---|
| 90 | if src[i] == "\n": |
|---|
| 91 | lines += 1 |
|---|
| 92 | lastnewline = i |
|---|
| 93 | next = DFA[(state,src[i])] |
|---|
| 94 | action = type(next) |
|---|
| 95 | if action is int: # next state in tree |
|---|
| 96 | state = next |
|---|
| 97 | elif action is tuple: # next opcode |
|---|
| 98 | state = 0 |
|---|
| 99 | opcode,parameter = next |
|---|
| 100 | if parameter is int: |
|---|
| 101 | (x,i) = parseNumber(src, i+1) |
|---|
| 102 | lines += 1 # parseNumber will end on a \n |
|---|
| 103 | lastnewline = i |
|---|
| 104 | prog.programdata.append(opcode(tokenstart, x)) |
|---|
| 105 | elif parameter is str: |
|---|
| 106 | (x,i) = parseString(src, i+1) |
|---|
| 107 | lines += 1 |
|---|
| 108 | lastnewline = i |
|---|
| 109 | prog.programdata.append(opcode(tokenstart, x)) |
|---|
| 110 | else: |
|---|
| 111 | prog.programdata.append(opcode(tokenstart)) |
|---|
| 112 | else: # invalid state |
|---|
| 113 | print "Error parsing script: %s is not a valid command at byte 0x%X (line %d char %d)\n" % ((next,) + tokenstart) |
|---|
| 114 | print "Program so far: ", repr(prog); |
|---|
| 115 | return None |
|---|
| 116 | i += 1 |
|---|
| 117 | if next == (Program.End, None): |
|---|
| 118 | break |
|---|
| 119 | # find all the labels |
|---|
| 120 | for i in range(len(prog.programdata)): |
|---|
| 121 | if prog.programdata[i].opcode == Program.Opcodes.Label: |
|---|
| 122 | prog.labels[prog.programdata[i].label] = i; |
|---|
| 123 | return prog; |
|---|
| 124 | |
|---|
| 125 | def parseNumber(string, index): |
|---|
| 126 | a = 0L; |
|---|
| 127 | neg = None |
|---|
| 128 | while neg == None: |
|---|
| 129 | if string[index] == A: |
|---|
| 130 | neg = False |
|---|
| 131 | elif string[index] == B: |
|---|
| 132 | neg = True |
|---|
| 133 | elif string[index] == C: |
|---|
| 134 | return (0,index) |
|---|
| 135 | index += 1 |
|---|
| 136 | while string[index] != C: |
|---|
| 137 | if string[index] == A: |
|---|
| 138 | a <<= 1 |
|---|
| 139 | elif string[index] == B: |
|---|
| 140 | a <<= 1 |
|---|
| 141 | a += 1 |
|---|
| 142 | index += 1 |
|---|
| 143 | if neg: |
|---|
| 144 | return (-a,index) |
|---|
| 145 | else: |
|---|
| 146 | return (a,index) |
|---|
| 147 | |
|---|
| 148 | # storing a string (only used for labels) as eg "100101001" or "\t \t \t \t" would take up a lot |
|---|
| 149 | # of pointless space, but storing it as a number like parseNumber would mean 001 and 0001 wouldn't |
|---|
| 150 | # be unique - so store as a tuple of length and value, so they'd be (3,1) and (4,1) and hence unique |
|---|
| 151 | def parseString(string, index): |
|---|
| 152 | a = 0L; |
|---|
| 153 | length = 0; |
|---|
| 154 | while string[index] != '\n': |
|---|
| 155 | if string[index] == " ": |
|---|
| 156 | a <<= 1 |
|---|
| 157 | length += 1 |
|---|
| 158 | elif string[index] == "\t": |
|---|
| 159 | a <<= 1 |
|---|
| 160 | a += 1 |
|---|
| 161 | length += 1 |
|---|
| 162 | index += 1 |
|---|
| 163 | return ((length,a),index) |
|---|