; nut compiler in nut 18 June 2006 ; extend to full set 24 June 2006 ; modify beautify 22 June 2010 ; ------- lib ------------ (def != (a b) () (if (= a b) 0 1)) (def >= (a b) () (if (< a b) 0 1)) (def and (a b) ()(if a b 0)) (def or (a b) () (if a 1 b)) (def not a () (if a 0 1)) (def nop () () 0) (def print a () (sys 1 a)) (def printc c () (sys 2 c)) (def space () () (sys 2 32)) (def nl () () (sys 2 10)) (def exit () () (sys 13)) ; -------- header ---------- (enum 127 EOF) (enum 0 NIL) (enum 3000 MAXNAMES) (enum 5000 MAXSTR) (enum 5000 MEMMAX) (enum 1 xIF xWHILE xDO xUD1 xNEW xADD xSUB xMUL xDIV xEQ xLT xGT xCALL xGET xPUT xLIT xLDX xSTX xFUN xSYS xSET xSETV xVEC xUD2 xLD xST xLDY xSTY xUD3 xUD4 xUD5 xSTR xBAND xSHR xSHL) (enum 2 tyVAR tyFUN tyOP tyOPX tySYS tyUD tyGVAR tyXX tyENUM) ; tok -- token string ; LP RP -- string parenthesis ; mem -- data segment ; DP -- pointer to mem[] ; Start -- start of user code segment (let tok LP RP DP mem Start) (def init () () (do (set mem (new MEMMAX)) ; global var area (set DP 0) (set LP "(") (set RP ")"))) ; --------- string ---------- ; input output of string functions are pointers ; to dereference it, use (vec s 0) ; print string (def prstr s () (while (vec s 0) (do (printc (vec s 0)) (set s (+ s 1))))) ; test string equal (def str= (s1 s2) (i) (do (set i 0) (while (and (vec s1 i) (= (vec s1 i) (vec s2 i))) (set i (+ i 1))) (= (vec s1 i) (vec s2 i)))) (def strlen s (n i) (do (set n 0) (set i 0) (while (vec s i) (do (set n (+ n 1)) (set i (+ i 1)))) n)) ; find string length, recursive version ;(def strlen s () ; (if (vec s 0) ; (+ 1 (strlen (+ s 1))) ; 0)) ; copy string (def strcpy (s1 s2) (i) (do (set i 0) (while (vec s2 i) (do (setv s1 i (vec s2 i)) (set i (+ i 1)))) (setv s1 i 0))) ; check is string a number, no sign (def isNumber s a (do (set a (vec s 0)) (while (and (> a 47) (< a 58)) ; isdigit a (do (set s (+ s 1)) (set a (vec s 0)))) (= a 0))) ; true if reach end (def isString s () (= (vec s 0) 34)) ; start with quote ; convert string to number, no sign (def atoi s (a v) (do (set v 0) (set a (vec s 0)) (while a (do (set v (- (+ (* v 10) a) 48)) (set s (+ s 1)) (set a (vec s 0)))) v)) ; ------- symbol table -------- (def error s () (do (prstr s) (sys 12) ; line no (sys 13))) ; exit ; shift down a dot-pair by disp, except NIL (def shift (a disp) () (if a (+ (- a disp) 2) 0)) ; symbol table is an array ; each item has five elements: name, type, val, arity, lv ; name pointed to a string which is allocated separately (enum 5 esize) ; size of each element ; symtab -- symbol table ; symstr -- symbol string table ; symp -- pointer to symstr[] ; numNames -- number of symbol in symtab ; numLocal -- number of local var (let symtab symstr symp numNames numLocal) ; access functions (def getName idx () (vec symtab idx)) (def getType idx () (vec symtab (+ idx 1))) (def getVal idx () (vec symtab (+ idx 2))) (def getArity idx () (vec symtab (+ idx 3))) (def getLv idx () (vec symtab (+ idx 4))) (def setName (idx nm) () (setv symtab idx nm)) (def setType (idx ty) () (setv symtab (+ idx 1) ty)) (def setVal (idx v) () (setv symtab (+ idx 2) v)) (def setArity (idx v) () (setv symtab (+ idx 3) v)) (def setLv (idx v) () (setv symtab (+ idx 4) v)) ; allocate new string from symstr[] (def newName nm (k v) (do (set k (+ (strlen nm) 1)) (set v (+ symstr symp)) (set symp (+ symp k)) (if (> symp MAXSTR) (error "symbol string full")) (strcpy v nm) v)) ; search symtab for nm ; if found, return its index ; if not found, insert it (def install nm (i flag end) (do (set i 0) (set flag 1) (set end (* esize numNames)) (while (and flag (< i end)) (if (str= (getName i) nm) ; sequential search (set flag 0) ; else (set i (+ i esize)))) (if flag ; not found (do (if (> i MAXNAMES) (error "symtab overflow")) (setName i (newName nm)) (setType i tyUD) (set numNames (+ numNames 1)))) i)) ; install nm as local variable (def installLocal nm idx (do (set numLocal (+ numLocal 1)) (set idx (install nm)) (setType idx tyVAR) (setVal idx numLocal))) ; shift ref to start at 2 (def dumpsym () (i end n ty) (do ; (print (- numNames 18)) (nl) (set i 90) ; 18 keywords (set end (* esize numNames)) (set n 0) ; count export sym (while (< i end) (do (set ty (getType i)) (if (or (= ty tyFUN) (= ty tyGVAR)) (set n (+ n 1))) (set i (+ i esize)))) (print n) (nl) (set i 90) (while (< i end) (do (set ty (getType i)) (if (or (= ty tyFUN) (= ty tyGVAR)) (do (prstr (getName i)) (space) (print (getType i)) (space) (if (= ty tyFUN) (set n (shift (getVal i) Start)) ; else (set n (getVal i))) (print n) (space) (print (getArity i)) (space) (print (getLv i)) (nl))) (set i (+ i esize)))))) (def insertsym (nm ty op) (idx) (do (set idx (install nm)) (setType idx ty) (setVal idx op))) ; initially keywords are inserted into symtab ; its value is its opcode (def initsym () () (do (set symtab (new MAXNAMES)) (set symstr (new MAXSTR)) ; symbol string (set symp 0) ; symstr pointer (set numNames 0) (set numLocal 0) (insertsym "if" tyOP xIF) (insertsym "while" tyOP xWHILE) (insertsym "set" tyOPX xSET) (insertsym "setv" tyOPX xSETV) (insertsym "do" tyOP xDO) (insertsym "new" tyOP xNEW) (insertsym "+" tyOP xADD) (insertsym "-" tyOP xSUB) (insertsym "*" tyOP xMUL) (insertsym "/" tyOP xDIV) (insertsym "=" tyOP xEQ) (insertsym "<" tyOP xLT) (insertsym ">" tyOP xGT) (insertsym "vec" tyOPX xVEC) (insertsym "sys" tySYS xSYS) (insertsym "&" tyOP xBAND) (insertsym ">>" tyOP xSHR) (insertsym "<<" tyOP xSHL))) ; ---------- data --------- (def cons (e list) h (do (set h (new 2)) (setv h 0 e) ; set head (setv h 1 list) ; set tail h)) ; allocate n int from mem[] (def newdata n a (do (if (>= DP MEMMAX) (error "out of memory")) (set a DP) (set DP (+ DP n)) a)) (def isATOM e () (< e 0)) ; MSB bit 1 (def mkATOM (op arg) () (+ (<< (+ (& op 127) 128) 24) (& arg 16777215))) ; allocate string space from mem[] ; copy s to there (def mkSTR s s2 (do (set s2 (newdata (+ (strlen s) 1))) (strcpy (+ mem s2) s) s2)) ; --------- parser ---------- (def tokenise () () (do (set tok (sys 3)) ; (prstr tok) ; (space) (nop))) (def expect s () (if (not (str= tok s)) (do (prstr "expect ") (error s)))) (def prList e () (sys 10 e)) ; parse name list, in fun header ; NL can be singleton or list (def parseNL () () (do (tokenise) (if (str= tok LP) (do (tokenise) (while (not (str= tok RP)) (do (installLocal tok) (tokenise)))) ; else (installLocal tok)))) ; parse a name (def parseName () (idx n n2 ty) (do (set idx (install tok)) (set n (getVal idx)) (set ty (getType idx)) (if (= ty tyOP) (mkATOM n 0) (if (= ty tyVAR) (mkATOM xGET n) (if (= ty tyGVAR) (mkATOM xLD n) (if (= ty tyFUN) (mkATOM xCALL idx) (if (= ty tyOPX) (do (tokenise) ; get var name (set idx (install tok)) (set n2 (getVal idx)) (if (= (getType idx) tyVAR) (if (= n xSET) (mkATOM xPUT n2) (if (= n xSETV) (mkATOM xSTX n2) (if (= n xVEC) (mkATOM xLDX n2) (error "unknown op")))) ; else, it is gvar (if (= n xSET) (mkATOM xST n2) (if (= n xSETV) (mkATOM xSTY n2) (if (= n xVEC) (mkATOM xLDY n2) (error "unknown op")))))) (if (= ty tySYS) (do (tokenise) ; get sys num (mkATOM xSYS (atoi tok))) (if (= ty tyENUM) (mkATOM xLIT n)))))))))) (def parseExp () () 0) ; forward declaration ; parse expression list (def parseEL () () (do (tokenise) (if (str= tok RP) NIL ; else (cons (parseExp) (parseEL))))) ; parse expression (def parseExp () () (if (str= tok LP) ; it is a list (do (tokenise) (cons (parseName) (parseEL))) (if (isNumber tok) ; it is a number (mkATOM xLIT (atoi tok)) (if (isString tok) (mkATOM xSTR (mkSTR (+ tok 1))) ; else (parseName))))) ; parse function definition (def parseDef () (idx arity e k) (do (tokenise) (set idx (install tok)) (setType idx tyFUN) (set numLocal 0) (parseNL) (set arity numLocal) (parseNL) (tokenise) (set e (parseExp)) (tokenise) ; skip RP (if (isATOM e) (set e (cons e NIL))) ; body must be list (setArity idx arity) (setLv idx numLocal) (set k (+ (* arity 256) (& numLocal 255))) (setVal idx (cons (mkATOM xFUN k) (cons e NIL))) idx)) ; parse "let" expression (def parseLet () (idx) (do (tokenise) (while (not (str= tok RP)) (do (set idx (install tok)) (if (!= (getType idx) tyUD) (error "redefine global var")) (setType idx tyGVAR) (setVal idx (newdata 1)) ; (prstr tok) (nl) (tokenise))))) ; parse "enum" expression (def parseEnum () (idx k) (do (tokenise) (if (not (isNumber tok)) (error "expect number")) (set k (atoi tok)) (tokenise) (while (not (str= tok RP)) (do (set idx (install tok)) (if (!= (getType idx) tyUD) (error "redefine enum name")) (setType idx tyENUM) (setVal idx k) (set k (+ k 1)) (tokenise))))) (def prName idx () (prstr (getName idx))) ; the main parser (def parse () (idx) (do (tokenise) (while (!= (vec tok 0) EOF) (do (expect LP) (tokenise) (if (str= tok "def") (parseDef) ; to print out the fun name ; (do ; (set idx (parseDef))) ; (prName idx) (nl) ; (prList (getVal idx)) (nl)) (if (str= tok "let") (parseLet) (if (str= tok "enum") (parseEnum) ; else (error "unknown keyword")))) (tokenise))))) ; ------- rename --------- ; resolve scans symtab to find fun def and rename it ; rename traverses n-code and rename local var from 1..n to n..1 ; and instantiates call.idx to the actual reference (let LV) ; number of local var in a fun def (def head e () (vec e 0)) (def tail e () (vec e 1)) (def sethead (e v) () (setv e 0 v)) (def de_op x () (& (>> x 24) 127)) (def de_arg x () (& x 8388607)) ; x & 0x07fffff ; be careful sign extension of bit 23 at s-code ! ; get/put/ldx/stx rename 1..n to n..1 ; call instantiate ref (def reATOM e (op arg) (do (set op (de_op (head e))) (set arg (de_arg (head e))) (if (= op xFUN) (set LV (& arg 255)) (if (or (= op xGET) (= op xPUT)) (sethead e (mkATOM op (+ (- LV arg) 1))) (if (or (= op xLDX) (= op xSTX)) (sethead e (mkATOM op (+ (- LV arg) 1))) (if (= op xCALL) (sethead e (mkATOM op (getVal arg))))))))) ; traverse n-code with one lookahead (def reName e () (if (!= e NIL) (if (isATOM (head e)) (do (reATOM e) (reName (tail e))) ; else (do (reName (head e)) (reName (tail e)))))) ; scan symtab for fundef and reName its body (def resolve () (i end e) (do (set i 90) (set end (* esize numNames)) (while (< i end) ; scan symtab (do (if (= (getType i) tyFUN) (do (set e (getVal i)) ; body (reName e))) (set i (+ i esize)))))) ; relocate arg in atom call.arg by disp (def reloc (a disp) (op arg) (do (set op (de_op a)) (set arg (de_arg a)) (if (= op xCALL) (mkATOM op (shift arg disp)) ; else a))) ; out n-code from start (not include) end ; out data segment and symtab ; relocate start to 2 (def outobj2 (start end) (i a b ty) (do (set a (getVal (install "main"))) (print (shift a start)) (space) (print (- (shift end start) 2)) (nl) (set i start) (while (< i end) ; code segment (do (set a (head i)) (set b (tail i)) (if (isATOM a) (do (set ty 1) (set a (reloc a start))) ; else dot-pair (do (set ty 0) (set a (shift a start)))) (print (shift i start)) (space) (print ty) (space) (print (de_op a)) (space) (print (de_arg a)) (space) (print (shift b start)) (nl) (set i (+ i 2)))) (print DP) (nl) ; data segment (set i 0) (while (< i DP) (do (print (vec mem i)) (space) (set i (+ i 1)) (if (= (& i 7) 0) (nl)))) (nl) (dumpsym))) ; ---------- test --------- ;(def testsym () (idx) ; (do ; (install "abc") ; (install "def") ; (install "123") ; (set idx (install "abc")) ; (prstr (getName idx)) (nl) ; (dumpsym))) ;(def testparsedef () (idx) ; (do ; (tokenise) ; LP ; (tokenise) ; def ; (set idx (parseDef)) ; (nl) ; (prList (getVal idx)))) ;(def testtok () () ; (do ; (tokenise) ; (while (!= (vec tok 0) EOF) ; (do ; (prstr tok) ; (space) ; (tokenise))))) ;(def testlist () (one two three a b) ; (do ; (set one (mkATOM xLIT 1)) ; (set two (mkATOM xLIT 2)) ; (set three (mkATOM xLIT 3)) ; (set b (cons two (cons three NIL))) ; (set a (cons b (cons one NIL))) ; (prList a) (nl) ; (prList (head a)) (nl) ; (prList (tail a)) (nl) ; (prList (tail (tail a))) (nl) ; (sethead a two) ; (reName a) ; (prList a) (nl))) ; ------- main ----------- ; area for user program begins at "start" ; area before that is used by the compiler itself ; must "readinfile" first to get input stream (def main () (end) (do (init) (initsym) (set Start (sys 9)) (sys 11) ; readinfile (parse) (resolve) (set end (sys 9)) (outobj2 Start end))) ; End