; nut compiler in nut ; nut completion kit ; include lib.txt ; -------- header ---------- (enum 127 EOF) (enum 0 NIL) (enum 3000 MAXNAMES) (enum 3000 MAXSTR) (enum 3000 MEMMAX) (enum 1 xIF xWHILE xDO xUD1 xNEW xADD xSUB xMUL xDIV xEQ xLT xGT xCALL xGET xPUT xLIT xLDX xSTX xFUN xSYS xSET xSETV xVEC xUD2 xLD xST xLDY xSTY xUD3 xUD4 xUD5 xSTR xBAND xSHR xSHL) (enum 2 tyVAR tyFUN tyOP tyOPX tySYS tyUD tyGVAR tyXX tyENUM) ; tok -- token string ; LP RP -- string parenthesis ; mem -- data segment ; DP -- pointer to mem[] (let tok LP RP DP mem) (def init () () (do (set mem (new MEMMAX)) ; global var area (set DP 0) (set LP "(") (set RP ")"))) ; ------- symbol table -------- ; symbol table is an array ; each item has five elements: name, type, val, arity, lv ; name pointed to a string which is allocated separately (enum 5 esize) ; size of each element ; symtab -- symbol table ; symstr -- symbol string table ; symp -- pointer to symstr[] ; numNames -- number of symbol in symtab ; numLocal -- number of local var (let symtab symstr symp numNames numLocal) ; access functions (def getName idx () (vec symtab idx)) (def getType idx () (vec symtab (+ idx 1))) (def getVal idx () (vec symtab (+ idx 2))) (def getArity idx () (vec symtab (+ idx 3))) (def getLv idx () (vec symtab (+ idx 4))) (def setName (idx nm) () (setv symtab idx nm)) (def setType (idx ty) () (setv symtab (+ idx 1) ty)) (def setVal (idx v) () (setv symtab (+ idx 2) v)) (def setArity (idx v) () (setv symtab (+ idx 3) v)) (def setLv (idx v) () (setv symtab (+ idx 4) v)) ; allocate new string from symstr[] (def newName nm (k v) (do (set k (+ (strlen nm) 1)) (set v (+ symstr symp)) (set symp (+ symp k)) (if (> symp MAXSTR) (error "symbol string full")) (strcpy v nm) v)) ; search symtab for nm, ; if found, return its index, else, insert it (def install nm (i flag end) (do (set i 0) (set flag 1) (set end (* esize numNames)) (while (and flag (< i end)) (if (str= (getName i) nm) ; sequential search (set flag 0) ; else (set i (+ i esize)))) (if flag ; not found (do (if (> i MAXNAMES) (error "symtab overflow")) (setName i (newName nm)) (setType i tyUD) (set numNames (+ numNames 1)))) i)) ; install nm as local variable (def installLocal nm idx (do (set numLocal (+ numLocal 1)) (set idx (install nm)) (setType idx tyVAR) (setVal idx numLocal))) (def dumpsym () (i end) (do (print (- numNames 18)) (nl) (set i 90) ; 18 keywords (set end (* esize numNames)) (while (< i end) (do (prstr (getName i)) (space) (print (getType i)) (space) (print (getVal i)) (space) (print (getArity i)) (space) (print (getLv i)) (nl) (set i (+ i esize)))))) (def insertsym (nm ty op) (idx) (do (set idx (install nm)) (setType idx ty) (setVal idx op))) ; initially keywords are inserted into symtab ; its value is its opcode (def initsym () () (do (set symtab (new MAXNAMES)) (set symstr (new MAXSTR)) ; symbol string (set symp 0) ; symstr pointer (set numNames 0) (set numLocal 0) (insertsym "if" tyOP xIF) (insertsym "while" tyOP xWHILE) (insertsym "set" tyOPX xSET) (insertsym "setv" tyOPX xSETV) (insertsym "do" tyOP xDO) (insertsym "new" tyOP xNEW) (insertsym "+" tyOP xADD) (insertsym "-" tyOP xSUB) (insertsym "*" tyOP xMUL) (insertsym "/" tyOP xDIV) (insertsym "=" tyOP xEQ) (insertsym "<" tyOP xLT) (insertsym ">" tyOP xGT) (insertsym "vec" tyOPX xVEC) (insertsym "sys" tySYS xSYS) (insertsym "&" tyOP xBAND) (insertsym ">>" tyOP xSHR) (insertsym "<<" tyOP xSHL))) ; ---------- data --------- ; allocate n int from mem[] (def newdata n (a) (do (if (>= DP MEMMAX) (error "out of memory")) (set a DP) (set DP (+ DP n)) a)) (def isATOM e () (< e 0)) ; MSB bit 1 (def mkATOM (op arg) () (+ (<< (+ (& op 127) 128) 24) (& arg 16777215))) ; --------- parser ---------- (def tokenise () () (do (set tok (sys 3)) ; (printc 34) (pstrs tok) (space) (nop))) (def expect s () (if (not (str= tok s)) (do (prstr "expect ") (error s)))) (def prList e () (sys 10 e)) ; parse name list, in fun header ; NL can be singleton or list (def parseNL () () (do (tokenise) (if (str= tok LP) (do (tokenise) (while (not (str= tok RP)) (do (installLocal tok) (tokenise)))) ; else (installLocal tok)))) ; parse local var (def doVar (op arg) (v) (do (if (= op xSET) (set v (mkATOM xPUT arg)) (if (= op xSETV) (set v (mkATOM xSTX arg)) (if (= op xVEC) (set v (mkATOM xLDX arg)) (error "unknown op")))) v)) ; parse global var (def doGvar (op arg) () 0) ; parse each enum sym (def doEnum n () 0) ; parse a name (def parseName () (idx n n2 ty v) (do (set idx (install tok)) (set n (getVal idx)) (set ty (getType idx)) (if (= ty tyOP) (set v (mkATOM n 0)) (if (= ty tyVAR) (set v (mkATOM xGET n)) (if (= ty tyGVAR) (set v (mkATOM xLD n)) (if (= ty tyFUN) (set v (mkATOM xCALL idx)) (if (= ty tyOPX) (do (tokenise) ; get var name (set idx (install tok)) (set n2 (getVal idx)) (if (= (getType idx) tyVAR) (set v (doVar n n2)) ; else it is Gvar (set v (doGvar n n2)))) (if (= ty tySYS) (do (tokenise) ; get sys num (set v (mkATOM xSYS (atoi tok)))) (if (= ty tyENUM) (set v (doEnum n))))))))) v)) (def parseExp () () 0) ; forward declaration ; parse expression list (def parseEL () v (do (tokenise) (if (str= tok RP) (set v NIL) ; else (set v (cons (parseExp) (parseEL)))) v)) (def doString s () 0) ; parse expression (def parseExp () v (do (if (str= tok LP) ; it is a list (do (tokenise) (set v (cons (parseName) (parseEL)))) (if (isNumber tok) ; it is a number (set v (mkATOM xLIT (atoi tok))) (if (isString tok) (set v (doString (+ tok 1))) (set v (parseName))))) v)) ; parse function definition (def parseDef () (idx arity e k) (do (tokenise) (set idx (install tok)) (setType idx tyFUN) (set numLocal 0) (parseNL) (set arity numLocal) (parseNL) (tokenise) (set e (parseExp)) (tokenise) ; skip RP (if (isATOM e) (set e (cons e NIL))) ; body must be list (setArity idx arity) (setLv idx numLocal) (set k (+ (* arity 256) (& numLocal 255))) (setVal idx (cons (mkATOM xFUN k) (cons e NIL))) idx)) (def prName idx () (prstr (getName idx))) ; parse "let" expression (def parseLet () (idx x) (do (tokenise) (while (not (str= tok RP)) (do (set idx (install tok)) (if (!= (getType idx) tyUD) (error "redefine global var")) (setType idx tyGVAR) (setVal idx (newdata 1)) (prstr tok) (nl) (tokenise))))) ; parse "enum" expression (def parseEnum () () 0) ; the main parser (def parse () (idx) (do (tokenise) (while (!= (vec tok 0) EOF) (do (expect LP) (tokenise) (if (str= tok "def") (do (set idx (parseDef)) (prName idx) (nl) (prList (getVal idx)) (nl)) (if (str= tok "let") (parseLet) (if (str= tok "enum") (parseEnum) ; else (error "unknown keyword")))) (tokenise))))) ; ------- rename --------- ; resolve scans symtab to find fun def and rename it ; rename traverses n-code and rename local var from 1..n to n..1 ; and instantiates call.idx to the actual reference (let LV) ; number of local var in a fun def (def de_op x () (& (>> x 24) 127)) (def de_arg x () (& x 16777215)) ; x & 0x0ffffff ; get/put/ldx/stx rename 1..n to n..1 ; call instantiate ref (def reATOM e (op arg) (do (set op (de_op (head e))) (set arg (de_arg (head e))) (if (= op xFUN) (set LV (& arg 255)) (if (or (= op xGET) (= op xPUT)) (sethead e (mkATOM op (+ (- LV arg) 1))) (if (or (= op xLDX) (= op xSTX)) (sethead e (mkATOM op (+ (- LV arg) 1))) (if (= op xCALL) (sethead e (mkATOM op (getVal arg))))))))) ; traverse n-code with one lookahead (def reName e () (if (!= e NIL) (if (isATOM (head e)) (do (reATOM e) (reName (tail e))) ; else (do (reName (head e)) (reName (tail e)))))) ; scan symtab for fundef and reName its body (def resolve () (i end e) (do (set i 90) (set end (* esize numNames)) (while (< i end) ; scan symtab (do (if (= (getType i) tyFUN) (do (set e (getVal i)) ; body (reName e))) (set i (+ i esize)))))) ; relocate arg in atom call.arg by disp (def reloc (a disp) (v op arg) (do (set op (de_op a)) (set arg (de_arg a)) (if (= op xCALL) (set v (mkATOM op (+ (- arg disp) 2))) ; else (set v a)) v)) ; shift a dot-pair by disp, except NIL (def shift (a disp) () (if a (+ (- a disp) 2) 0)) ; relocate start to 2 (def outobj2 (start end) (i a b ty) (do (set a (getVal (install "main"))) (print (shift a start)) (space) (print (- (shift end start) 2)) (nl) (set i start) (while (< i end) ; code segment (do (set a (head i)) (set b (tail i)) (if (isATOM a) (do (set ty 1) (set a (reloc a start))) ; else dot-pair (do (set ty 0) (set a (shift a start)))) (print (shift i start)) (space) (print ty) (space) (print (de_op a)) (space) (print (de_arg a)) (space) (print (shift b start)) (nl) (set i (+ i 2)))) (print DP) (nl) ; data segment (set i 0) (while (< i DP) (do (print (vec mem i)) (space) (set i (+ i 1)) (if (= (& i 7) 0) (nl)))) (nl) (dumpsym))) (def main () (start end) (do (init) (initsym) (set start (sys 9)) (sys 11) ; readinfile (parse) (resolve) (set end (sys 9)) (outobj2 start end) (nop))) ; End