; nut compiler in nut 18 June 2006 ; nut completion kit 4 July 2006 ; ------- lib ------------ (def != (a b) () (if (= a b) 0 1)) (def >= (a b) () (if (< a b) 0 1)) (def and (a b) ()(if a b 0)) (def or (a b) () (if a 1 b)) (def not a () (if a 0 1)) (def nop () () 0) (def print a () (sys 1 a)) (def printc c () (sys 2 c)) (def space () () (sys 2 32)) (def nl () () (sys 2 10)) (def exit () () (sys 13)) ; -------- header ---------- (enum 127 EOF) (enum 0 NIL) (enum 3000 MAXNAMES) (enum 3000 MAXSTR) (enum 3000 MEMMAX) (enum 1 xIF xWHILE xDO xUD1 xNEW xADD xSUB xMUL xDIV xEQ xLT xGT xCALL xGET xPUT xLIT xLDX xSTX xFUN xSYS xSET xSETV xVEC xUD2 xLD xST xLDY xSTY xUD3 xUD4 xUD5 xSTR xBAND xSHR xSHL) (enum 2 tyVAR tyFUN tyOP tyOPX tySYS tyUD tyGVAR tyXX tyENUM) ; tok -- token string ; LP RP -- string parenthesis ; mem -- data segment ; DP -- pointer to mem[] (let tok LP RP DP mem) (def init () () (do (set mem (new MEMMAX)) ; global var area (set DP 0) (set LP "(") (set RP ")"))) ; --------- string ---------- ; input output of string functions are pointers ; to dereference it, use (vec s 0) ; print string (def prstr s () (while (vec s 0) (do (printc (vec s 0)) (set s (+ s 1))))) ; test string equal (def str= (s1 s2) (flag i c1 c2) (do (set flag 1) (set i 0) (while flag (do (set c1 (vec s1 i)) (set c2 (vec s2 i)) (if (!= c1 c2) (set flag 0) (if (= c1 0) (set flag 0) (if (= c2 0) (set flag 0)))) (set i (+ i 1)))) (and (= c1 0) (= c2 0)))) ; find string length, recursive version (def strlen s () (if (vec s 0) (+ 1 (strlen (+ s 1))) 0)) ; copy string (def strcpy (s1 s2) a (do (set a (vec s2 0)) (while a (do (setv s1 0 a) (set s1 (+ s1 1)) (set s2 (+ s2 1)) (set a (vec s2 0)))) (setv s1 0 0))) ; check is string a number, no sign (def isNumber s a (do (set a (vec s 0)) (while (and (> a 47) (< a 58)) ; isdigit a (do (set s (+ s 1)) (set a (vec s 0)))) (= a 0))) ; true if reach end (def isString s () (= (vec s 0) 34)) ; start with quote ; check is string a number, no sign (def atoi s (a v) (do (set v 0) (set a (vec s 0)) (while a (do (set v (- (+ (* v 10) a) 48)) (set s (+ s 1)) (set a (vec s 0)))) v)) ; ------- symbol table -------- (def error s () (do (prstr s) (sys 12) ; line no (sys 13))) ; exit ; symbol table is an array ; each item has five elements: name, type, val, arity, lv ; name pointed to a string which is allocated separately (enum 5 esize) ; size of each element ; symtab -- symbol table ; symstr -- symbol string table ; symp -- pointer to symstr[] ; numNames -- number of symbol in symtab ; numLocal -- number of local var (let symtab symstr symp numNames numLocal) ; access functions (def getName idx () (vec symtab idx)) (def getType idx () (vec symtab (+ idx 1))) (def getVal idx () (vec symtab (+ idx 2))) (def getArity idx () (vec symtab (+ idx 3))) (def getLv idx () (vec symtab (+ idx 4))) (def setName (idx nm) () (setv symtab idx nm)) (def setType (idx ty) () (setv symtab (+ idx 1) ty)) (def setVal (idx v) () (setv symtab (+ idx 2) v)) (def setArity (idx v) () (setv symtab (+ idx 3) v)) (def setLv (idx v) () (setv symtab (+ idx 4) v)) ; allocate new string from symstr[] (def newName nm (k v) (do (set k (+ (strlen nm) 1)) (set v (+ symstr symp)) (set symp (+ symp k)) (if (> symp MAXSTR) (error "symbol string full")) (strcpy v nm) v)) ; search symtab for nm ; if found, return its index ; if not found, insert it (def install nm (i flag end) (do (set i 0) (set flag 1) (set end (* esize numNames)) (while (and flag (< i end)) (if (str= (getName i) nm) ; sequential search (set flag 0) ; else (set i (+ i esize)))) (if flag ; not found (do (if (> i MAXNAMES) (error "symtab overflow")) (setName i (newName nm)) (setType i tyUD) (set numNames (+ numNames 1)))) i)) ; install nm as local variable (def installLocal nm idx (do (set numLocal (+ numLocal 1)) (set idx (install nm)) (setType idx tyVAR) (setVal idx numLocal))) (def dumpsym () (i end) (do (print (- numNames 18)) (nl) (set i 90) ; 18 keywords (set end (* esize numNames)) (while (< i end) (do (prstr (getName i)) (space) (print (getType i)) (space) (print (getVal i)) (space) (print (getArity i)) (space) (print (getLv i)) (nl) (set i (+ i esize)))))) (def insertsym (nm ty op) (idx) (do (set idx (install nm)) (setType idx ty) (setVal idx op))) ; initially keywords are inserted into symtab ; its value is its opcode (def initsym () () (do (set symtab (new MAXNAMES)) (set symstr (new MAXSTR)) ; symbol string (set symp 0) ; symstr pointer (set numNames 0) (set numLocal 0) (insertsym "if" tyOP xIF) (insertsym "while" tyOP xWHILE) (insertsym "set" tyOPX xSET) (insertsym "setv" tyOPX xSETV) (insertsym "do" tyOP xDO) (insertsym "new" tyOP xNEW) (insertsym "+" tyOP xADD) (insertsym "-" tyOP xSUB) (insertsym "*" tyOP xMUL) (insertsym "/" tyOP xDIV) (insertsym "=" tyOP xEQ) (insertsym "<" tyOP xLT) (insertsym ">" tyOP xGT) (insertsym "vec" tyOPX xVEC) (insertsym "sys" tySYS xSYS) (insertsym "&" tyOP xBAND) (insertsym ">>" tyOP xSHR) (insertsym "<<" tyOP xSHL))) ; ---------- data --------- (def cons (e list) h (do (set h (new 2)) (setv h 0 e) ; set head (setv h 1 list) ; set tail h)) ; allocate n int from mem[] (def newdata n (a) (do (if (>= DP MEMMAX) (error "out of memory")) (set a DP) (set DP (+ DP n)) a)) ; allocate string space from mem[] ; copy s to there (def mkSTR s (s2) (do (set s2 (newdata (+ (strlen s) 1))) (strcpy (+ mem s2) s) s2)) (def isATOM e () (< e 0)) ; MSB bit 1 (def mkATOM (op arg) () (+ (<< (+ (& op 127) 128) 24) (& arg 16777215))) ; --------- parser ---------- (def tokenise () () (do (set tok (sys 3)) ; (printc 34) (pstrs tok) (space) (nop))) (def expect s () (if (not (str= tok s)) (do (prstr "expect ") (error s)))) (def prList e () (sys 10 e)) ; parse name list, in fun header ; NL can be singleton or list (def parseNL () () (do (tokenise) (if (str= tok LP) (do (tokenise) (while (not (str= tok RP)) (do (installLocal tok) (tokenise)))) ; else (installLocal tok)))) ; parse local var (def doVar (op arg) (v) (do (if (= op xSET) (set v (mkATOM xPUT arg)) (if (= op xSETV) (set v (mkATOM xSTX arg)) (if (= op xVEC) (set v (mkATOM xLDX arg)) (error "unknown op")))) v)) ; parse global var (def doGvar (op arg) () 0) (def doEnum n () 0) ; parse a name (def parseName () (idx n n2 ty v) (do (set idx (install tok)) (set n (getVal idx)) (set ty (getType idx)) (if (= ty tyOP) (set v (mkATOM n 0)) (if (= ty tyVAR) (set v (mkATOM xGET n)) (if (= ty tyGVAR) (set v (mkATOM xLD n)) (if (= ty tyFUN) (set v (mkATOM xCALL idx)) (if (= ty tyOPX) (do (tokenise) ; get var name (set idx (install tok)) (set n2 (getVal idx)) (if (= (getType idx) tyVAR) (set v (doVar n n2)) ; else it is Gvar (set v (doGvar n n2)))) (if (= ty tySYS) (do (tokenise) ; get sys num (set v (mkATOM xSYS (atoi tok)))) (if (= ty tyENUM) (set v (doEnum n))))))))) v)) (def parseExp () () 0) ; forward declaration ; parse expression list (def parseEL () v (do (tokenise) (if (str= tok RP) (set v NIL) ; else (set v (cons (parseExp) (parseEL)))) v)) (def doString s () 0) ; parse expression (def parseExp () v (do (if (str= tok LP) ; it is a list (do (tokenise) (set v (cons (parseName) (parseEL)))) (if (isNumber tok) ; it is a number (set v (mkATOM xLIT (atoi tok))) (if (isString tok) (set v (doString (+ tok 1))) (set v (parseName))))) v)) ; parse function definition (def parseDef () (idx arity e k) (do (tokenise) (set idx (install tok)) (setType idx tyFUN) (set numLocal 0) (parseNL) (set arity numLocal) (parseNL) (tokenise) (set e (parseExp)) (tokenise) ; skip RP (if (isATOM e) (set e (cons e NIL))) ; body must be list (setArity idx arity) (setLv idx numLocal) (set k (+ (* arity 256) (& numLocal 255))) (setVal idx (cons (mkATOM xFUN k) (cons e NIL))) idx)) (def prName idx () (prstr (getName idx))) ; parse "let" expression (def parseLet () (idx x) (do (tokenise) (while (not (str= tok RP)) (do (set idx (install tok)) (if (!= (getType idx) tyUD) (error "redefine global var")) (setType idx tyGVAR) (setVal idx (newdata 1)) (prstr tok) (nl) (tokenise))))) ; parse "enum" expression (def parseEnum () () 0) ; the main parser (def parse () (idx) (do (tokenise) (while (!= (vec tok 0) EOF) (do (expect LP) (tokenise) (if (str= tok "def") (do (set idx (parseDef)) (prName idx) (nl) (prList (getVal idx)) (nl)) (if (str= tok "let") (parseLet) (if (str= tok "enum") (parseEnum) ; else (error "unknown keyword")))) (tokenise))))) ; ------- rename --------- ; resolve scans symtab to find fun def and rename it ; rename traverses n-code and rename local var from 1..n to n..1 ; and instantiates call.idx to the actual reference (let LV) ; number of local var in a fun def (def head e () (vec e 0)) (def tail e () (vec e 1)) (def sethead (e v) () (setv e 0 v)) (def de_op x () (& (>> x 24) 127)) (def de_arg x () (& x 16777215)) ; x & 0x0ffffff ; get/put/ldx/stx rename 1..n to n..1 ; call instantiate ref (def reATOM e (op arg) (do (set op (de_op (head e))) (set arg (de_arg (head e))) (if (= op xFUN) (set LV (& arg 255)) (if (or (= op xGET) (= op xPUT)) (sethead e (mkATOM op (+ (- LV arg) 1))) (if (or (= op xLDX) (= op xSTX)) (sethead e (mkATOM op (+ (- LV arg) 1))) (if (= op xCALL) (sethead e (mkATOM op (getVal arg))))))))) ; traverse n-code with one lookahead (def reName e () (if (!= e NIL) (if (isATOM (head e)) (do (reATOM e) (reName (tail e))) ; else (do (reName (head e)) (reName (tail e)))))) ; scan symtab for fundef and reName its body (def resolve () (i end e) (do (set i 90) (set end (* esize numNames)) (while (< i end) ; scan symtab (do (if (= (getType i) tyFUN) (do (set e (getVal i)) ; body (reName e))) (set i (+ i esize)))))) ; relocate arg in atom call.arg by disp (def reloc (a disp) (v op arg) (do (set op (de_op a)) (set arg (de_arg a)) (if (= op xCALL) (set v (mkATOM op (+ (- arg disp) 2))) ; else (set v a)) v)) ; shift a dot-pair by disp, except NIL (def shift (a disp) () (if a (+ (- a disp) 2) 0)) ; out n-code from start (not include) end ; out data segment and symtab ;(def outobj (start end) (i a ty) ; (do ; (set i (install "main")) ; (set a (getVal i)) ; (print a) (space) ; (print (- end 2)) (nl) ; (set i start) ; (while (< i end) ; code segment ; (do ; (set a (head i)) ; (set ty 0) ; (if (isATOM a) (set ty 1)) ; (print i) (space) ; (print ty) (space) ; (print (de_op a)) (space) ; (print (de_arg a)) (space) ; (print (tail i)) (nl) ; (set i (+ i 2)))) ; (print 0) (nl) ; data segment ; (dumpsym))) ; relocate start to 2 (def outobj2 (start end) (i a b ty) (do (set a (getVal (install "main"))) (print (shift a start)) (space) (print (- (shift end start) 2)) (nl) (set i start) (while (< i end) ; code segment (do (set a (head i)) (set b (tail i)) (if (isATOM a) (do (set ty 1) (set a (reloc a start))) ; else dot-pair (do (set ty 0) (set a (shift a start)))) (print (shift i start)) (space) (print ty) (space) (print (de_op a)) (space) (print (de_arg a)) (space) (print (shift b start)) (nl) (set i (+ i 2)))) (print DP) (nl) ; data segment (set i 0) (while (< i DP) (do (print (vec mem i)) (space) (set i (+ i 1)) (if (= (& i 7) 0) (nl)))) (nl) (dumpsym))) ; ---------- test --------- ;(def testsym () (idx) ; (do ; (install "abc") ; (install "def") ; (install "123") ; (set idx (install "abc")) ; (prstr (getName idx)) (nl) ; (dumpsym))) ;(def testparsedef () (idx) ; (do ; (tokenise) ; LP ; (tokenise) ; def ; (set idx (parseDef)) ; (nl) ; (prList (getVal idx)))) ;(def testtok () () ; (do ; (tokenise) ; (while (!= (vec tok 0) EOF) ; (do ; (prstr tok) ; (space) ; (tokenise))))) ;(def testlist () (one two three a b) ; (do ; (set one (mkATOM xLIT 1)) ; (set two (mkATOM xLIT 2)) ; (set three (mkATOM xLIT 3)) ; (set b (cons two (cons three NIL))) ; (set a (cons b (cons one NIL))) ; (prList a) (nl) ; (prList (head a)) (nl) ; (prList (tail a)) (nl) ; (prList (tail (tail a))) (nl) ; (sethead a two) ; (reName a) ; (prList a) (nl))) (def main () (start end) (do (init) (initsym) (set start (sys 9)) (sys 11) ; readinfile (parse) (resolve) (set end (sys 9)) (outobj2 start end) (nop))) ; End