// benmark compiler som v.2.0

// lib.txt   21 Oct 2002
//   public release som-v2  30 Dec 2004

//to neg a = 0 - a
//to mod m n = m - (n * (m / n))  // mod is already primitive

to print x = syscall {1 x}
to printc x = syscall {2 x}
to space = syscall {2 32}
to nl = syscall {2 10}
to getchar = syscall {3}
to gets buf = syscall {4 buf}

// string-s.txt   string functions
//    som-string is an array of int
//    an int contains at most 4 char, right pad with 0
//    terminate by int 0

//    public release som-v2    30 Dec 2004

// copy s1 = s2
to strcpy s1 s2 | i =
    i = 0
    while s2[i] != 0
        s1[i] = s2[i]
        i = i + 1
    s1[i] = 0

to strlen s | k =
    k = 0
    while s[k] != 0
        k = k + 1
    k

to streq s1 s2 | flag i c1 c2 c3 =
    flag = 1
    i = 0
    while flag
        c1 = s1[i] == 0
        c2 = s2[i] == 0
        c3 = s1[i] == s2[i]
        if c1 & c2
            break
        if c1 | c2 | (!c3)
            flag = 0
        else
            i = i + 1
    flag

// s1 is som-string (packed string)
// s1 contains only number (no sign)
to atoi s1 | a i c1 c2 c3 c4 v =
    v = 0
    i = 0
    a = s1[i]
    while a != 0
        c4 = a & 255
        c3 = (a >> 8) & 255
        c2 = (a >> 16) & 255
        c1 = (a >> 24) & 255
        if c1 != 0 v = v*10 + c1 - 48
        if c2 != 0 v = v*10 + c2 - 48
        if c3 != 0 v = v*10 + c3 - 48
        if c4 != 0 v = v*10 + c4 - 48
        i = i + 1
        a = s1[i]
    v

// print string s1, s1 is som-string
to prints s1 | a i c1 c2 c3 c4 =
    i = 0
    a = s1[i]
    while a != 0
        c4 = a & 255
        c3 = (a >> 8) & 255
        c2 = (a >> 16) & 255
        c1 = (a >> 24) & 255
        if c1 != 0 printc c1
        if c2 != 0 printc c2
        if c3 != 0 printc c3
        if c4 != 0 printc c4
        i = i + 1
        a = s1[i]

// pack array of char to som-string s1
to strpack s1 ar start len | a k i e =
    k = 0
    i = start
    e = start + len
    while i < e
        a = ar[i] << 24
        i = i + 1
        if i < e
            a = a | (ar[i] << 16)
            i = i + 1
        if i < e
            a = a | (ar[i] << 8)
            i = i + 1
        if i < e
            a = a | ar[i]
            i = i + 1
        s1[k] = a
        k = k + 1
    s1[k] = 0

// End

// compile-h-s.txt
// header file for som-in-som project
//	public release som-v2 30 December 2004

// sequence of loading files
//
// lib.txt
// string-s.txt
// compile-h-s.txt
// list-s.txt
// symtab5-s.txt
// token-s.txt
// parse-h-s.txt
// stmt-s.txt
// parse.som
// icode-s.txt
// gencode-s.txt
// eval-s.txt		not loaded, use eval-c instead
// main-s.txt

enum
    0 NIL MARK BMARK					   // marker
enum
    0 SP OPER NUM GNAME LNAME CNAME STRING // atom
enum
    1 icAdd icSub icMul icDiv icBand icBor icBxor
    icNot icEq icNe icLt icLe icGe icGt icShl icShr icMod
    icLdx icStx icRet icR1 icArray icEnd icGet icPut
    icLd icSt icJmp icJt icJf icLit icCall icR2
    icInc icDec icSys icCase icFun icCalli icBreak
enum
    14 tkIDEN tkNUMBER tkSTRING tkEOF	   // tokentype
enum
    50 tkSTAR tkSLASH tkMINUS tkPLUS tkEQ tkEQEQ
    tkAND tkBAR tkCARET tkMOD tkNOT tkNE tkLT tkLE
    tkLTLT tkGT tkGE tkGTGT tkCOLON tkLPAREN tkRPAREN
    tkLBRACKET tkRBRACKET tkBB tkBE tkTO tkIF tkELSE
    tkWHILE tkFOR tkBREAK tkARRAY tkCASE tkENUM tkSYSCALL
enum
    6 tyFUNC tyLOCAL tyGVAR tyENUM tyKEY   // symbol type

enum
    5678920 OBJ_SOMv2

//  memory model
//  M [ MAXMEM  ]		 declared in C
//
//  M[1..MAXSYS]  		 system area (in eval-s)
//
//  CS[MAXCS]    		 code segment
//       ^
//       CP
//
//  SS[MAXSS]	         stack segment  (in eval-c)
//      ^  ^			 not used, as we use SS in C
//      fp sp
//
//  ystack[MAXYSTK]		 parser stack
//           ^
//          ysp
//
//  cell[MAXCELL]		 store list (parse-tree)

//enum
//	100 MAXSYS		// size of system area
//enum
//	5000 MAXSS		// size of user stack
//enum
//	2000 MAXDS		// size of data segment

enum
    10000 MAXCS		// size of code segment
enum
    1000 MAXYSTK	// size of parser stack

to initSom =
    M = 0				// base of M, alias with C M[]
    CS = array MAXCS	// code segment
    CP = 3  			// code pointer
//	DS = array MAXDS	// data segment use eval-C
//	DP = 1				// free data pointer
//	SS = array MAXSS	// use eval-C and its SS
//	sp = 1
//	fp = 1
//	ip = 1
    ystack = array MAXYSTK  // parser stack
    ysp = 0
    currentf = 0		// current fn idx
    Lo = 0				// range of case label
    Hi = 0				//
    nenum = 0			// enum number
    userDS = 0			// beginning of user DS
    verbose = 1			// control compiler message
    noeval = 0			// control eval
	line = 0

// when no line number is available (gencode, eval, etc.)
to error x =
    prints "error: " prints x nl
    syscall {7}			// exit

// error during parsing
to seterror s =
    prints "line " print line space
    prints s nl
    syscall {7}

to warning nm mess =
    if verbose
        prints "Warning: "
        if nm != 0 prints nm space
        prints mess nl

// End

// list-s.txt    list processing
//   implement list.c  for som-som project  14 Jan 2004
//   P. Chongstitvatana

//   public release som-v2  30 Dec 2004

enum
    10 CELLPTR			// min pointer to acell
enum
    100000 MAXCELL

to init_list =
    cell = array MAXCELL
    freecell = CELLPTR
    endcell = MAXCELL / 2

to setcar a value = cell[a] = value
to setcdr a value = cell[a+1] = value
to car a = if a == NIL a else cell[a]
to cdr a = if a == NIL a else cell[a+1]

to newcell | a =
    a = freecell
    freecell = freecell + 2
    if freecell >= endcell
        seterror "out of memory cell"
    else
        setcar a NIL
        setcdr a NIL
    a

to islist x = (car x) >= CELLPTR
to isatom x = (car x) < CELLPTR

to newatom type value | a =
    a = newcell
    setcar a type
    setcdr a value
    a

// switch the half space
to initcell =
    if endcell == MAXCELL
        freecell = CELLPTR
        endcell = MAXCELL / 2
    else
        freecell = endcell + 1
        endcell = MAXCELL

to list a | b =
    b = newcell
    setcar b a
    b

to cons a l | b =
    b = newcell
    setcdr b l
    setcar b a
    b

to append lst x | a b =
    if x == NIL lst
    a = lst
    b = cdr a
    while b != NIL
        a = b
        b = cdr a
    setcdr a (list x)
    lst

to item2 x = car cdr x
to item3 x = car cdr cdr x

// can cons2 be used instead of cons ?  31 Mar 2004

// cons2 x, y = {NIL, atom, list}
to cons2 x y | z =
    if x == NIL
        y break
    if y == NIL
        list x break
    // it is the same whether x is atom of list
    // a new cell is required to build a dot-pair
    // only y must be inspected, if it is not a list
    // a new dot-pair to make y a list is needed
    if isatom y
        z = newatom y NIL
    else
        z = y
    newatom x z

// clone a copy of list t
to copylist t =
    if t == NIL
        NIL
        break
    if isatom t
        newatom (car t) (cdr t)
    else
        cons2 (copylist car t) (copylist cdr t)

to countcell | k =
    k = (MAXCELL - freecell) / 2
    prints "+freecell = " print k nl

// End

// symtab5-s.txt
//    symbol table implementation with one hash table
//    read som3-j-explain for detail


// structure of symbol table is two arrays of (name,attr)
//    name points to string, never be deleted
//    attr points to (type,ref,arity,lv)

// interface:
//    install string  -- search and insert string in symname
//                    -- return index to table, if new attr=0
//
//    enterLocal idx  -- error if duplicate
//    enterGlobal idx type ref
//    clearLocal
//    init_symtab
//    searchref ref   -- search by ref, ret idx, 0 not found

// pass the test  27 December 2004

enum
    1009 tablesize	  // a prime number
enum
    100 localsize

to init_symtab =
    symname = array tablesize
    symattr = array tablesize
    localvar = array localsize
    lv = 0
    freetuple = NIL

// access function by hash key

to getName a = symname[a]
to getAttr a = symattr[a]
to getType a = M[symattr[a]]
to getRef a = M[symattr[a]+1]
to getArity a = M[symattr[a]+2]
to getLv a = M[symattr[a]+3]

to setName a nm = symname[a] = nm
to setAttr a v = symattr[a] = v
to setType a v = M[symattr[a]] = v
to setRef a v = M[symattr[a]+1] = v
to setArity a v = M[symattr[a]+2] = v
to setLv a v = M[symattr[a]+3] = v

// access to tuple, use type as next field

to getNext a = M[a]
to setNext a v = M[a] = v

to newtuple | a =
    if freetuple == NIL
        a = array 4		// tuple (type,ref,arity,lv)
    else
        a = freetuple
        freetuple = getNext a
    M[a] = 0			// init to all 0
    M[a+1] = 0
    M[a+2] = 0
    M[a+3] = 0
    a

to hash s1 | i v a =
    i = 0
    v = 0
    a = s1[i]
    while a != 0
        v = v + a	// add all int
        i = i + 1
        a = s1[i]
    if (v < 0)	v = 0 - v
    v % tablesize

to dumpSym | i nm =
    prints "symbol table" nl
    for i 0 tablesize-1
        nm = getName i
        if nm != 0
            prints nm space
            if (getAttr i) != 0
                print (getType i) space
                print (getRef i)
            nl

to dumpLocal | i nm =
    prints "local symbol" nl
    for i 0 tablesize-1
        nm = getName i
        if nm != 0
            if (getAttr i) == 0
                prints nm
            else if (getType i) == tyLOCAL
                prints nm space
                print (getRef i)
            nl

// hash with linear probe
//   if new, insert name, return index
to install s1 | key i nm =
    key = hash s1
    i = key
    while 1
        nm = getName i
        if nm == 0		// not found, insert name
            nm = array ((strlen s1) + 1)
            strcpy nm s1
            setName i nm
            setAttr i 0
            i break
        if streq  nm s1
            i break		// return idx, found
        i = (i+1) % tablesize
        if i == key		// wrap around
            seterror "symbol table full" // impossible case

// must install name-string first to get idx
to enterLocal idx | nm =
    localvar[lv] = idx		// record localvar
    lv = lv + 1
    if lv >= localsize
        seterror "local sym overflow"

    if (getAttr idx) == 0		// new local
        setAttr idx newtuple
    else
        if (getType idx) == tyGVAR	// shadow gvar
            ref = getRef idx
            setArity idx tyGVAR		// move (type,ref) ->
            setLv idx ref
        else
            dumpLocal
            seterror "duplicate local"
    setType idx tyLOCAL
    setRef idx lv

// free pure local
to clearLocal | i idx type a =
    for i 0 lv-1
        idx = localvar[i]
        if (getType idx) != tyLOCAL  // impossible case
            seterror "wrong type in local sym"
        type = getArity idx
        if type == 0			// pure local
            a = getAttr idx
            setAttr idx 0
            setNext a freetuple	// free tuple
            freetuple = a
        else
            setType idx type	// move <- (arity,lv)
            setRef idx (getLv idx)
            setArity idx 0		// clear to 0
            setLv idx 0
    lv = 0

// must install name-string first to get idx
to enterGlobal idx type ref =
    if (getAttr idx) == 0
        setAttr idx newtuple
    setType idx type
    setRef idx ref

// search by ref, ret index, -1 not found
to searchref ref | i =
    for i 0 tablesize-1
        if (getAttr i) != 0
            if (getRef i) == ref
                break
    if (i < tablesize) i else 0-1

// End

// token-s.txt
//   lex in som    		    18 Dec 2004
//   public relese som-v2   30 Dec 2004

enum
    80 maxcol

to initlex =
    cLetter = array 8		// bitvec 256 bits
    cSpace = array 8
    cSep = array 8

    inbuf = array 300		// input line buffer
    line = 0				// current line number
    TP = 0					// current token pointer
    CLEN = 0				// current token length
    tok = 0					// current token
    tokvalue = 0			// token value
    tokstring = array 100	// token string

    // column stack for block indentation
    tokcol = 0				// current token column
    colstk = array maxcol	// column stack
    colsp = 0				// stack pointer

    lexstate = 1			// state of lex
    oldtok = 0				// old token

// use bitvector for char type

// set bitvector to 1
to setbitvec bitv c | a =
    a = bitv[ c / 32 ] | (1 << (c % 32))
    bitv[ c / 32 ] = a

// test bitvector
to is1 bitv c =
    bitv[ c / 32 ] & (1 << ( c % 32))

to setbits bitv a b | i =
    for i a b setbitvec bitv i

to initctype | i =
    for i 0 7			// clear bitvec
        cLetter[i] = 0
        cSpace[i] = 0
        cSep[i] = 0

    setbits cLetter 48 57	// 0..9
    setbits cLetter 65 90	// A..Z
    setbits cLetter 97 122	// a..z
//	setbitvec cLetter 36 	// $
//	setbitvec cLetter 63 	// ?
    setbitvec cLetter 95 	// _

    setbits cSpace 1 32		// blank
    setbits cSpace 128 254

    setbitvec cSep 0
    setbits cSep 33 34		// ! "
//	setbitvec cSep 35  		// #
    setbits cSep 37 38		// % &
    setbits cSep 40 43		// ( ) * +
    setbitvec cSep 45  		// -
    setbitvec cSep 47  		// /
    setbitvec cSep 58  		// :
    setbits cSep 60 62		// < = >
    setbitvec cSep 91  		// [
    setbits cSep 93 94		// ] ^
    setbits cSep 123 125	// { | }
    setbitvec cSep 255 		// EOF_CHAR

// return pointer to begin of token and CLEN
// if "//" is found set CLEN = 0
to token | pos a =
    pos = TP
    if (CLEN == 0) | (inbuf[pos] == 0)	// end buffer
        syscall {4 inbuf}	    // get a line
//		nl
        line = line + 1
        pos = 0
    while is1 cSpace inbuf[pos]	// skip space
        pos = pos + 1
    tokcol = pos
    if is1 cSep inbuf[pos]
        CLEN = 1
        if (inbuf[pos]==47) & (inbuf[pos+1]==47) // "//"
            CLEN = 0			// denote blank line
        if inbuf[pos] == 0
            CLEN = 0
    else
        a = pos
        while is1 cLetter inbuf[a]
            a = a + 1
        CLEN = a - pos
    pos

to isNum c =
    (c >= 48) & (c <= 57) // 0..9

// do atoi in an array ar[start] with length len
to aatoi ar start len | i c v =
    v = 0
    for i start start+len-1
        c = ar[i]
        if ! isNum c
            seterror "invalid integer"
        v = v*10 + c - 48
    v

// read until " or end of line, CLEN not count "
to collectstring | a c =
    a = TP+1
    c = inbuf[a]
    while (c != 34) & (c != 0) // until " or eol
        a = a + 1
        c = inbuf[a]
    CLEN = a - TP - 1

// affect CLEN, tok, tokvalue, tokstring. change TP
to lex1 | c d =
    TP = token
    while CLEN == 0		// read until get a token
        TP = token
    c = inbuf[TP]
    if c == 255			// EOF
        tok = tkEOF
        break
    if c == 34   		// " string
        collectstring
        tok = tkSTRING
        strpack tokstring inbuf TP+1 CLEN
        TP = TP + CLEN + 2
        break
    else if isNum c
        tokvalue = aatoi inbuf TP CLEN
        tok = tkNUMBER
    else
        d = inbuf[TP+1]	// lookahead
        case c
            33: if d == 61		// !=
                    CLEN = 2
                    tok = tkNE
                else
                    tok = tkNOT
            37: tok = tkMOD
            38: tok = tkAND
            40: tok = tkLPAREN
            41: tok = tkRPAREN
            42: tok = tkSTAR
            43: tok = tkPLUS
            45: tok = tkMINUS
            47: tok = tkSLASH
            58: tok = tkCOLON
            60: if d == 60		// <<
                    CLEN = 2
                    tok = tkLTLT
                else if d == 61 // <=
                    CLEN = 2
                    tok = tkLE
                else
                    tok = tkLT
            61: if d == 61		// ==
                    CLEN = 2
                    tok = tkEQEQ
                else
                    tok = tkEQ
            62: if d == 62		// >>
                    CLEN = 2
                    tok = tkGTGT
                else if d == 61 // >=
                    CLEN = 2
                    tok = tkGE
                else
                    tok = tkGT
            91: tok = tkLBRACKET // [
            93: tok = tkRBRACKET // ]
            94: tok = tkCARET	 // ^
            123: tok = tkBB		 // {
            124: tok = tkBAR	 // |
            125: tok = tkBE		 // }

            else:		// it is a string
                strpack tokstring inbuf TP CLEN
                tokvalue = install tokstring
                tok = tkIDEN
                if (getAttr tokvalue) != 0
                    if (getType tokvalue) == tyKEY // key word
                        tok = getRef tokvalue
        // end case
    TP = TP + CLEN

to pushcol col =
    if colsp >= maxcol
        seterror "lex error col stack overflow"
    else
        colsp = colsp + 1
        colstk[colsp] = col

to popcol =
    if colsp <= 0
        seterror "lex error col stack underflow"
    else
        colsp = colsp - 1

//to prtoken tk = {}

// lexstate N=1, F=2, B=3
// state description   condition  action    next
//   N  normal  lex     oldline   out tok     N
//   N                  nl & ==   out tok     N
//   N                  nl & >    out { push  F
//   N                  nl & <    out } pop   B
//   F  forward         -         out oldtok  N
//   B  backward        ==        out oldtok  N
//   B                  <         out } pop   B
//   transition occurs on call lex
//   eof is a special case

to lex | oldline =
    case lexstate
        1:	// normal
            oldline = line
            lex1
//			printc 34 prtoken tok printc 34 space
            oldtok = tok
            if tok == tkEOF			// out BE til match
                if colstk[colsp] != 0
                    tok = tkBE
                    popcol
                break
            if line != oldline		// newline
                if tokcol > colstk[colsp]
                    tok = tkBB
                    pushcol tokcol
                    lexstate = 2	// F
                else if tokcol < colstk[colsp]
                    tok = tkBE
                    popcol
                    lexstate = 3	// B
                // tokcol == colstk, just return tok
        2:	// forward
            tok = oldtok
            lexstate = 1			// N
        3:	// backward
            if tokcol >= colstk[colsp]
                tok = oldtok
                lexstate = 1		// N
            else if tokcol < colstk[colsp]
                tok = tkBE
                popcol

to startlex =
    initctype
    TP = 0
    CLEN = 0
    pushcol 0

to prtoken tk =
    case tk
        tkSTAR: printc 42 	// "*"
        tkSLASH: printc 47 	// "/"
        tkMINUS: printc 45	// "-"
        tkPLUS: printc 43	// "+"
        tkEQ: printc 61		// "="
        tkEQEQ: prints "=="
        tkAND: printc 38	// "&"
        tkBAR: printc 124	// "|"
        tkCARET: printc 94	// "^"
        tkMOD: printc 37	// "%"
        tkNOT: printc 33	// "!"
        tkNE: prints "!="
        tkLT: printc 60		// "<"
        tkLE: prints "<="
        tkLTLT: prints "<<"
        tkGT: printc 62		// ">"
        tkGE: prints ">="
        tkGTGT: prints ">>"
        tkCOLON: printc 58	// ":"
        tkLPAREN: printc 40 // "("
        tkRPAREN: printc 41 // ")"
        tkLBRACKET: printc 91 // "["
        tkRBRACKET: printc 93 // "]"
        tkBB: printc 123 	// "{"
        tkBE: printc 125	// "}"
        tkTO: prints "to"
        tkIF: prints "if"
        tkELSE: prints "else"
        tkWHILE: prints "while"
        tkFOR: prints "for"
        tkBREAK: prints "break"
        tkARRAY: prints "array"
        tkCASE: prints "case"
        tkENUM: prints "enum"
        tkSYSCALL: prints "syscall"
        tkIDEN: prints tokstring
        tkNUMBER: print tokvalue
        tkSTRING:
            printc 34 prints tokstring printc 34
        tkEOF: prints "eof"

// print atom
to pratom type ref =
    case type
        SP:
            prints "NIL"
        OPER:
            prtoken ref
        NUM:
            print ref
        GNAME:
            prints getName ref
        LNAME:
            printc 35 print ref // #
        CNAME:
            prints "classname"
        STRING:
            printc 34 prints ref printc 34

// print list
to prlist a =
    if a == NIL break
    if isatom a
        pratom (car a) (cdr a)
        space
    else
        printc 40   // "("
        while a != NIL
            prlist car a
            a = cdr a
        printc 41   // ")"

// End

to top = {}
to fundef = {}
to args = {}
to local = {}
to ex = {}
to exs = {}
to ex1 = {}
to exelse = {}
to elist = {}
to elist2 = {}
to caselist = {}
to label = {}
to ex0 = {}
to terms = {}
to term | a = {}
to tuple = {}
to tuples = {}
to mod = {}
to bop = {}

// stmt-s.txt
//    public release som-v2 30 December 2004

// parser stack  operators

to ypush x =
    ysp = ysp + 1
    if ysp >= MAXYSTK
        seterror "parser stack overflow"
    ystack[ysp] = x

to ypop | x =
    if ysp <= 0
        seterror "parser stack underflow"
    x = ystack[ysp]
    ysp = ysp - 1
    x

to ytos = ystack[ysp]

// -------------------------------

to expect tk mess =
    if tok != tk seterror mess

to commit status =
    if status == 0 seterror "syntax error"

// [ idx arity -- idx ]
to setfname | arity idx =
    arity = ypop
    idx = ytos
    if (getAttr idx) == 0
        enterGlobal idx tyFUNC 0
        setArity idx arity
        setLv idx lv
    else
        warning getName idx "redefine function"

// [idx %ex -- %to]
to dofun | idx e a =
    e = ypop
    idx = ypop
    if verbose
        printc 43 prints getName idx nl
    a = cons (newatom GNAME idx) (list e)
    ypush cons (newatom OPER tkTO) a
    clearLocal

//  [ -- %var ]
to dovar idx | type =
    type = getType idx
    if type == tyGVAR
        ypush newatom GNAME idx
    else if type == tyLOCAL
        ypush newatom LNAME (getRef idx)
    else
        seterror "dovar: expect variable"

// [lvidx %ex0 %ex0 %ex -- %for] (for lv ex0 ex0 ex)
to dofor | e idx =
    e = ypop
    e = cons ypop (list e)
    e = cons ypop e		// (ex0 ex0 ex)
    idx = ypop			// lvidx
    if (getAttr idx) == 0
        seterror "undefined index variable"
    if (getType idx) != tyLOCAL
        seterror "index variable must be local"
    dovar idx			// [ -- %var]
    ypush cons (newatom OPER tkFOR) (cons ypop e)

// [%ex0 %ex -- %while] (while ex0 ex)
to dowhile | e =
    e = ypop
    e = cons ypop (list e)
    ypush cons (newatom OPER tkWHILE) e

// [%ex0 %ex [%ex MARK] -- %if/ifelse ]
to doif | e =
    if ytos == MARK
        e = ypop		// throw away MARK
        e = ypop
        e = cons ypop (list e)
        e = cons ypop e	// (ex0 ex ex)
        ypush cons (newatom OPER tkELSE) e
    else
        e = ypop
        e = cons ypop (list e)
        ypush cons (newatom OPER tkIF) e

// [%var %ex -- %vec]
to dovec | e v a =
    e = ypop
    v = ypop
    if isatom v
        a = newatom OPER tkLBRACKET
        ypush cons a (cons v (list e))
    else
        seterror "dovec: expect variable"

// left variable
// if IDEN: NEW, GVAR, LOCAL then dovar mod else 0
to lval | idx type =
    if tok != tkIDEN
        0 break
    idx = tokvalue
    if (getAttr idx) == 0 	// new global, be careful !!
        enterGlobal idx tyGVAR (array 1)
        warning getName idx "new global"
    else
        type = getType idx
        if !( (type == tyGVAR) | (type == tyLOCAL) )
            0 break
    dovar idx
    lex
    commit mod
    1

to doset | var e a = {}

// assignment, [ -- %var/%vec]
to exas =
    if lval == 1
        if tok == tkEQ
            lex		// skip =
            commit ex0
            doset
        else
            // lval has alread done
            // half of the job, term
            commit terms
        1
    else
        ex0

to doenum =
    if (getAttr tokvalue) == 0
        enterGlobal tokvalue tyENUM nenum
        nenum = nenum + 1
    else
        seterror "enum: expect unique label"

// function call, parse ex0^arity
// [ -- %call]  (fun name ex .. ex)
to docall idx | i arity e =
    arity = getArity idx
    for i 1 arity
        commit ex0
    e = NIL
    for i 1 arity
        e = cons ypop e
    e = cons (newatom GNAME idx) e
    ypush cons (newatom OPER tkBE) e

// [ -- %num]
to donum v = ypush newatom NUM v

// already lex the next token
// ID is: var, fun-call, enum
to doiden idx =
    if (getAttr idx) == 0	// new global, be careful !!
        enterGlobal idx tyGVAR (array 1)
        warning getName idx "new global"
    case getType idx
        tyGVAR:
            dovar idx
            commit mod
        tyLOCAL:
            dovar idx
            commit mod
        tyFUNC:
            docall idx
        tyENUM:
            donum getRef idx

// [ %var %ex -- %set ]  (= var ex)
to doset | var e a =
    e = ypop
    var = ypop
    a = newatom OPER tkEQ
    ypush cons a (cons var (list e))

// [%ex1 bop %ex2 -- %bop] (bop ex1 ex2)
to dobop | e1 op e2 a =
    e2 = ypop
    op = ypop
    e1 = ypop
    a = newatom OPER op
    ypush cons a (cons e1 (list e2))

// [%ex -- %uop]
to douop uop | e =
    e = ypop
    ypush cons (newatom OPER uop) (list e)

// block = tkBB, simplify block size 0 and 1
to makeblock a =
    if a == NIL
        NIL break		// block size 0 {} => NIL
    else if (cdr a) == NIL
        car a break		// block size 1 {a} => a
    else
        cons (newatom OPER tkBB) a

// [MARK %e1 .. %en -- %block] (block e1 .. en)
to doblock | e a =
    a = NIL
    e = ypop
    while e != MARK
        a = cons e a
        e = ypop
    ypush makeblock a

// convert label to number [ -- %num]
to dolabel =
    if (getAttr tokvalue) == 0
        seterror "undefined label"
    if (getType tokvalue) != tyENUM
        seterror "expect label"
    donum getRef tokvalue

// [%ex0 MARK %num %ex .. %-1 %ex -- %case]
to docase | e a =
    a = NIL
    e = ypop
    while e != MARK
        e = cons ypop (list e)	// e = (num ex)
        a = cons e a
        e = ypop
    a = makeblock a
    e = cons ypop (list a)
    ypush cons (newatom OPER tkCASE) e

// [MARK %num %ex0 .. %ex0 -- %sys]
// (sys ex0 .. ex0)
to dosys | a e =
    e = NIL
    a = ypop
    while a != MARK
        e = cons a e
        a = ypop
    ypush cons (newatom OPER tkSYSCALL) e

// [ -- %string]
to dostring | s2 =
    s2 = array ((strlen tokstring) + 1)
    strcpy s2 tokstring
    ypush (newatom STRING s2)

// End

// parser Som-som generated from parse.txt 27 Jan 2004
// extern int lv, tok, tokvalue, nenum;
//to exas = {}
to top = {
if( tok == tkTO ) {
lex
commit fundef
1 break }
if( ex ) {
1 break }
0
}

to fundef = {
if( tok == tkIDEN ) {
ypush tokvalue lex
commit args
setfname
expect tkEQ "missing tkEQ"
lex
commit ex
dofun
1 break }
0
}

to args = {
while tok == tkIDEN {
enterLocal tokvalue lex
}
if( tok == tkBAR ) {
ypush lv lex
commit local
1 break }
ypush lv
1
}

to local = {
while tok == tkIDEN {
enterLocal tokvalue lex
}
1
}

to ex = {
if( tok == tkBB ) {
ypush MARK lex
commit exs
expect tkBE "missing tkBE"
doblock lex
1 break }
if( ex1 ) {
1 break }
0
}

to exs = {
while ex1 {
}
1
}

to ex1 = {
if( tok == tkIF ) {
lex
commit ex0
commit ex
commit exelse
doif
1 break }
if( tok == tkWHILE ) {
lex
commit ex0
commit ex
dowhile
1 break }
if( tok == tkFOR ) {
lex
expect tkIDEN "missing tkIDEN"
ypush tokvalue lex
commit ex0
commit ex0
commit ex
dofor
1 break }
if( tok == tkCASE ) {
lex
commit ex0
expect tkBB "missing tkBB"
ypush MARK lex
commit caselist
expect tkBE "missing tkBE"
docase lex
1 break }
if( tok == tkENUM ) {
lex
expect tkBB "missing tkBB"
lex
commit elist
expect tkBE "missing tkBE"
ypush NIL lex
1 break }
if( tok == tkBREAK ) {
ypush newatom OPER tkBREAK lex
1 break }
if( exas ) {
// commit ex0  // change 17 Dec 2004
// doset
// 1 break }
// if( ex0 ) {
1 break }
0
}

to exelse = {
if( tok == tkELSE ) {
lex
commit ex
ypush MARK
1 break }
1
}

to elist = {
if( tok == tkNUMBER ) {
nenum = tokvalue lex
commit elist2
1 break }
if( elist2 ) {
1 break }
0
}

to elist2 = {
while tok == tkIDEN {
doenum lex
}
1
}

to caselist = {
while label {
expect tkCOLON "missing tkCOLON"
lex
commit ex
}
if( tok == tkELSE ) {
donum 0-1 lex
expect tkCOLON "missing tkCOLON"
lex
commit ex
1 break }
1
}

to label = {
if( tok == tkNUMBER ) {
donum tokvalue lex
1 break }
if( tok == tkIDEN ) {
dolabel lex
1 break }
0
}

to ex0 = {
if( term ) {
commit terms
1 break }
0
}

to terms = {
while bop {
commit term
dobop
}
1
}

to term | a = {
if( tok == tkNUMBER ) {
donum tokvalue lex
1 break }
if( tok == tkSTRING ) {
dostring lex
1 break }
if( tok == tkIDEN ) {
a = tokvalue lex
doiden a
1 break }
if( tok == tkNOT ) {
lex
commit ex0
douop tkNOT
1 break }
if( tok == tkARRAY ) {
lex
commit ex0
douop tkARRAY
1 break }
if( tok == tkSYSCALL ) {
lex
commit tuple
dosys
1 break }
if( tok == tkLPAREN ) {
lex
commit ex0
expect tkRPAREN "missing tkRPAREN"
lex
1 break }
0
}

to tuple = {
if( tok == tkBB ) {
lex
expect tkNUMBER "missing tkNUMBER"
ypush MARK donum tokvalue lex
commit tuples
expect tkBE "missing tkBE"
lex
1 break }
0
}

to tuples = {
while ex0 {
}
1
}

to mod = {
if( tok == tkLBRACKET ) {
lex
commit ex0
expect tkRBRACKET "missing tkRBRACKET"
dovec lex
1 break }
1
}

to bop = {
if( tok == tkPLUS ) {
ypush tok lex
1 break }
if( tok == tkMINUS ) {
ypush tok lex
1 break }
if( tok == tkSTAR ) {
ypush tok lex
1 break }
if( tok == tkSLASH ) {
ypush tok lex
1 break }
if( tok == tkAND ) {
ypush tok lex
1 break }
if( tok == tkBAR ) {
ypush tok lex
1 break }
if( tok == tkCARET ) {
ypush tok lex
1 break }
if( tok == tkEQEQ ) {
ypush tok lex
1 break }
if( tok == tkNE ) {
ypush tok lex
1 break }
if( tok == tkLT ) {
ypush tok lex
1 break }
if( tok == tkLE ) {
ypush tok lex
1 break }
if( tok == tkGE ) {
ypush tok lex
1 break }
if( tok == tkGT ) {
ypush tok lex
1 break }
if( tok == tkMOD ) {
ypush tok lex
1 break }
if( tok == tkGTGT ) {
ypush tok lex
1 break }
if( tok == tkLTLT ) {
ypush tok lex
1 break }
0
}


// icode-s.txt
//   public release som-v2  30 December 2004

to outa code arg =
    CS[CP] = (arg << 8) | (code & 255)
    CP = CP + 1

to outs code =
    CS[CP] = code
    CP = CP + 1

// change arg, perserve code
to patch ads v =
    CS[ads] = (v << 8) | (CS[ads] & 255)

to putcode ip code data =
    CS[ip] = (data << 8) | (code & 255)

to isJmp op =
    (op == icJmp) | (op == icJt) | (op == icJf)

to reName fn | i c d lvar ref =
    lvar = getLv fn
    ref = getRef fn
    for i ref+1 CP-1
        c = CS[i] & 255
        d = CS[i] >> 8
        if (c==icGet) | (c==icPut) | (c==icInc) | (c==icDec)
            patch i lvar-d+1

to improve2 ref | i c1 d1 c2 d2 =
    // replace jmp to ret, jmp to jmp
    for i ref+1 CP-1
        c1 = CS[i] & 255
        d1 = CS[i] >> 8
        if isJmp c1
            c2 = CS[i+d1] & 255
            d2 = CS[i+d1] >> 8
            if (c1==icJmp) & (c2==icRet)
                putcode i icRet d2
            else
                if c2 == icJmp
                    patch i d1+d2

// ref at "ret"
to tailCall2 ref | i ref2 c d c1 d1 arg =
    ref2 = getRef currentf
    d = CS[ref] >> 8		// ret d
    c1 = CS[ref-1] & 255	// call
    d1 = CS[ref-1] >> 8
    if (c1 == icCall) & (d1 == ref2)
//		prints "tail call" nl
        arg = getArity currentf
        CP = ref - 1
        for i 1 arg
            outa icPut (arg-i+1)
        outa icJmp (ref2-CP+1)
        outa icRet d		// necessary !
        // correct jump to ref
        for i ref2 ref-1
            c = CS[i] & 255
            d = CS[i] >> 8
            if (isJmp c) & ((d+i) == ref)
                patch i (CP-1-i)

to pr2 s1 a =
    prints s1 space print a

// search symbol table for name with ref
to prNm s1 ref | idx =
    prints s1 space
    idx = searchref ref
    if idx < 0
        prints "undef"
    else
        prints getName idx

to prCode a | op arg =
    op = CS[a] & 255
    arg = CS[a] >> 8
    case op
        icAdd: prints "Add"
        icSub: prints "Sub"
        icMul: prints "Mul"
        icDiv: prints "Div"
        icBand: prints "Band"
        icBor: prints "Bor"
        icBxor: prints "Bxor"
        icNot: prints "Not"
        icEq: prints "Eq"
        icNe: prints "Ne"
        icLt: prints "Lt"
        icLe: prints "Le"
        icGe: prints "Ge"
        icGt: prints "Gt"
        icShl: prints "Shl"
        icShr: prints "Shr"
        icMod: prints "Mod"
        icLdx: prints "Ldx"
        icStx: prints "Stx"
        icRet: pr2 "Ret" arg
        icArray: prints "Array"
        icEnd: prints "End"
        icGet: pr2 "Get" arg
        icPut: pr2 "Put" arg
        icLd: prNm "Ld" arg
        icSt: prNm "St" arg
        icJmp: pr2 "Jmp" a+arg
        icJt: pr2 "Jt" a+arg
        icJf: pr2 "Jf" a+arg
        icLit: pr2 "Lit" arg
        icCall: prNm "Call" arg
        icInc: pr2 "Inc" arg
        icDec: pr2 "Dec" arg
        icSys: pr2 "Sys" arg
        icCase: prints "Case"
        icFun: prNm "Fun" a
//		icCalli: pr2 "Calli" arg
        else: prints "Undef"

to showCode a b | i =
    for i a b
        print i space
        prCode i nl

to outM mem start end | i =
    print start space print end nl
    for i start end
        print mem[i] space
        if (i % 8) == 0 nl
    nl

// End

// gencode-s.txt
//   public release som-v2  30 December 2004

to atomeq e type value =
    ((car e) == type) & ((cdr e) == value)

// check e = (lv (+ lv 1))
to isinc e | a ref op =
    a = car e
    if (car a) != LNAME
        0 break
    ref = cdr a
    e = item2 e		// e = (+ lv 1)
    op = car e
    if ! ((atomeq op OPER tkPLUS) | (atomeq op OPER tkMINUS))
        0 break
    a = item2 e
    if ! (atomeq a LNAME ref)
        0 break
    if ! (atomeq (item3 e) NUM 1)
        0 break
    1

// if there is any break, patch jump to CP
to patchbreak | ads a =
    if ytos == MARK		// no break
        a = ypop
        break
    while ytos == BMARK
        a = ypop		// throw away BMARK
        ads = ypop
        patch ads CP-ads
    if ytos != MARK
        error "patch break: no matching mark"
    a = ypop			// throw away MARK

// call only when fn is not empty
to clean fn | ref a =
    ref = getRef fn
    patchbreak
    a = (getLv fn) + 1
    outa icRet a
    tailCall2 CP-1
    // update fun head, new lv from for
    a = (getLv fn) - (getArity fn) + 1
    putcode ref icFun a
    // post processing
    reName fn
    improve2 ref
    improve2 ref

to genex l | a e e1 ads ads2 v1 v2 idx type ref = {}

to genbop op l =
    genex car l
    genex item2 l
    outs op

to genuop op l =
    genex car l
    outs op

//  gen vec ads: (name ex) => ld/get, ex, ldx
to genvec e | nm type ref =
    nm = car e
    type = car nm
    ref = cdr nm
    case type
        GNAME: outa icLd getRef ref
        LNAME: outa icGet ref
    genex item2 e

// return patch-address, e = (e1 e)
to genif e | ads =
    genex car e
    outa icJf 0
    ads = CP - 1
    genex item2 e
    ads

// get Lo Hi from caselist
// Lo Hi are globals
to getcaselist e | n e2 =
    Lo = 0
    Hi = 0
    if e == NIL break
    e = cdr e
    e2 = car e				// a = (num ex)
    n = cdr car e2			// n = num.ref
    Lo = n
    Hi = n
    while e != NIL
        e2 = car e
        n = cdr car e2
        if n < 0 break		// else case
        if n < Lo Lo = n
        if n > Hi Hi = n
        e = cdr e

// e = (ex0 block)
to gencase e | ads ads2 i v1 v2 a type ref =
    genex car e				// gen ex0
    outs icCase
    e = item2 e				// block
    getcaselist e			// set Lo Hi globals
    outa icLit Lo
    outa icLit Hi
    ads = CP
    outa icJmp 0			// else case
    for i Lo Hi
        outa icJmp 0		// empty jmp table
    v1 = 0
    e = cdr e				// e = (ex .. ex)
    while e != NIL
        a = car e			// a = (num ex)
        ref = cdr car a 	// ref = num label
        if ref > 0			// skip else (ref -1)
            ads2 = ref-Lo+ads+1	// entry in jmp table
            patch ads2 CP-ads2	// to jmp here
            genex item2 a
            outa icJmp v1		// jmp to eloc, backchain
            v1 = CP-1
        e = cdr e
    // here is exelse
    // patch empty entry to exelse
    for i ads+1 ads+1+Hi-Lo
        v2 = CS[i] >> 8
        if v2 == 0 patch i CP-i
    patch ads CP-ads			// jmp to else
    if ref == (0-1)				// gen else case
        genex item2 a
    // here is eloc
    // update backchain jmp to eloc
    while v1 != 0
        v2 = CS[v1] >> 8
        patch v1 CP-v1
        v1 = v2
    patch v1 CP-v1				// the first one

// (ex .. ex )
to genlist e =
    while e != NIL
        genex car e
        e = cdr e

to genex x | a e e1 ads ads2 v1 v2 idx type ref =
    if x == NIL break
    if isatom x
        type = car x
        ref = cdr x
        case type
            STRING: outa icLit ref
            NUM: outa icLit ref
            GNAME: outa icLd getRef ref
            LNAME: outa icGet ref
            OPER:
                if ref == tkBREAK
                    ypush CP
                    ypush BMARK
                    outa icJmp 0
                else
                    error "genex: unknown atomic operator"
    else
        a = car x
        e = cdr x
        if (car a) != OPER
            error "genex: expect operator"
        case cdr a
            tkPLUS: genbop icAdd e
            tkMINUS: genbop icSub e
            tkSTAR: genbop icMul e
            tkSLASH: genbop icDiv e
            tkAND: genbop icBand e
            tkBAR: genbop icBor e
            tkCARET: genbop icBxor e
            tkEQEQ: genbop icEq e
            tkNE: genbop icNe e
            tkLT: genbop icLt e
            tkLE: genbop icLe e
            tkGE: genbop icGe e
            tkGT: genbop icGt e
            tkMOD: genbop icMod e
            tkGTGT: genbop icShr e
            tkLTLT: genbop icShl e
            tkNOT: genuop icNot e
            tkARRAY: genuop icArray e
            tkSYSCALL:				// (num ex .. ex)
                genlist cdr e
                outa icSys (cdr car e)
            tkBB:
                genlist e
            tkBE:					// (call name ex ... ex)
                idx = cdr car e		// e = (name ex .. ex)
                genlist cdr e
                a = getRef idx
                if a == 0			// forward ref
                    outa icCalli idx
                else
                    outa icCall a
            tkTO:					// (to name ex)
                idx = cdr car e		// e = (name ex)
                currentf = idx
                e = item2 e
                if e != NIL
                    setRef idx CP
                    outa icFun 0	// update at clean
                    ypush MARK
                    genex e
                    clean idx
            tkEQ:					// (= var/vec ex)
                a = car e			// var/vec
                e1 = item2 e
                if isinc e			// e = (lv (+ lv 1))
                    ref = cdr a 	// lvar ref
                    if atomeq (car e1) OPER tkPLUS
                        outa icInc ref
                    else
                        outa icDec ref
                else
                    if isatom a			// var
                        genex e1		// RHS
                        type = car a
                        ref = cdr a
                        case type
                            GNAME: outa icSt getRef ref
                            LNAME: outa icPut ref
                            else: error "genex: unknown atom type"
                    else
                        genvec cdr a	// get address LHS
                        genex e1		// RHS
                        outs icStx
            tkLBRACKET:				// vec RHS
                genvec e
                outs icLdx
            tkIF:					// (if e1 e)
                ads = genif e		// e = (e1 e)
                patch ads CP-ads
            tkELSE:					// (ifelse e1 et ef)
                ads = genif e		// e = (e1 et ef)
                outa icJmp 0
                patch ads CP-ads
                ads = CP - 1
                genex item3 e
                patch ads CP-ads
            tkWHILE:				// (while e1 e)
                ypush MARK
                ads = CP
                outa icJmp 0		// branch to cond
                genex item2 e		// body of while
                ads2 = CP
                genex car e			// cond
                outa icJt ads+1-CP	// jmp to body
                patch ads ads2-ads
                patchbreak
            tkFOR:					// (for lv ex0 ex0 ex)
                ypush MARK			// e = (lv ex0 ex0 ex)
                v2 = (getLv currentf) + 1
                setLv currentf v2	// update header when clean
                v1 = cdr car e
                e = cdr e			// e = (ex0 ex0 ex)
                genex car e			// initial
                outa icPut v1
                genex item2 e		// end
                outa icPut v2
                ads = CP
                outa icJmp 0		// jmp to test
                genex item3 e		// body of for
                outa icInc v1
                patch ads CP-ads	// test
                outa icGet v1
                outa icGet v2
                outs icLe
                outa icJt ads+1-CP
                patchbreak
            tkCASE:
                gencase e
            else: error "genex: unknown operator"

// change Calli to Call from CS[a]..CS[b]
to patchCalli a b | i c d =
    for i a b
        c = CS[i] & 255
        d = CS[i] >> 8
        if c == icCalli
            putcode i icCall (getRef d)

// End

// main-s.txt
//   public release som-v2   30 Dec 2004

to eval ref =
    syscall {5 ref}		// eval in C

to parse | f e a =
    lex
    while tok != tkEOF
        if tok == tkTO
            f = top		// throw away flag
            e = ypop
            def = append def e
            genex e
        else
            f = top
            e = ypop
            a = CP
            genex e
            outs icEnd
            patchCalli 3 CP-1
            eval a		// immediately eval it
            CP = a		// and throw away code

to showGlobal | i nm =
    prints "Global: "
    for i 0 tablesize-1
        nm = getName i
        if (nm != 0) & ((getAttr i) != 0)
            if (getType i) == tyGVAR
                prints nm space
    nl

to epilog | id =
    patchCalli 3 CP-1
    id = install "main"
    if (getAttr id) != 0	// if found call main
        putcode 1 icCall getRef id
        putcode 2 icEnd 0
        if noeval == 0
            eval 1

to initkeysym =
    enterGlobal install "to" tyKEY tkTO
    enterGlobal install "if" tyKEY tkIF
    enterGlobal install "else" tyKEY tkELSE
    enterGlobal install "while" tyKEY tkWHILE
    enterGlobal install "for" tyKEY tkFOR
    enterGlobal install "break" tyKEY tkBREAK
    enterGlobal install "array" tyKEY tkARRAY
    enterGlobal install "case" tyKEY tkCASE
    enterGlobal install "enum" tyKEY tkENUM
    enterGlobal install "syscall" tyKEY tkSYSCALL

// code segment 1..CP-1, data segment userDS..DP
to outobj | dp =
    print OBJ_SOMv2 nl
    outM CS 1 CP-1
    dp = array 0	// find where is DP
    outM M userDS dp-1

to init_all =
    initSom
    init_list
    initlex
    init_symtab
    initkeysym
    initcell
    def = list newatom OPER tkBB
    startlex
    userDS = array 0   // find where is DS

to doObject =
    init_all
    verbose = 0
    noeval = 1
    parse
    epilog
    outobj

to doListing =
    init_all
    verbose = 0
    noeval = 1
    parse
    epilog
    showCode 1 CP-1

to main =
    init_all
    parse
//	prlist def nl
    epilog
//	if verbose
//		showGlobal
//		dumpSym

// End

