// main-s.txt parser generator in Som // to be used with som 4.2 (lex in vm) // 22 Sept 2009 // P. Chongstitvatana // print string of token, len 3 each tok_str = array "*" 0 "/" 0 "-" 0 "+" 0 "=" 0 "==" 0 "&" 0 "|" 0 "^" 0 "%" 0 "!" 0 "!=" 0 "<" 0 "<=" 0 "<<" 0 ">" 0 ">=" 0 ">>" 0 ":" 0 "(" 0 ")" 0 "[" 0 "]" 0 "{" 0 "}" 0 "to" 0 "if" 0 "else" 0 "while" "for" 0 "break" "array" "case" 0 "enum" 0 "syscall" to prtoken tk | a = if tk > tkSYSCALL seterror "unknown token" if tk >= tkSTAR a = tk - tkSTAR // a 0..34 prints tok_str+(a*3) break case tk tkIDEN: prints tokstring // tkNUMBER: print tokvalue tkSTRING: printc 34 prints tokstring printc 34 tkEOF: prints "eof" // ---- parser stack operators ----- to ypush x = ysp = ysp + 1 if ysp >= MAXYSTK seterror "parser stack overflow" ystack[ysp] = x to ypop | x = if ysp <= 0 seterror "parser stack underflow" x = ystack[ysp] ysp = ysp - 1 x to ytos = ystack[ysp] // ------------------------------- // create a new string to copystring s1 | s2 = s2 = array ((strlen s1) + 1) strcpy s2 s1 s2 to doiden | s ty = s = copystring tokstring strunpack sbuf s if (sbuf[0] == 116) & (sbuf[1] == 107) // tk ty = TERM else if streq tokstring "nil" ty = NIL else ty = NONTERM ypush (newatom ty s) to dostring = ypush (newatom STRING (copystring tokstring)) //[%mark %atom ... %atom -> %alt] to doalt | a b = b = NIL while ytos != MARK a = ypop b = cons a b a = ypop // throw mark away ypush b // print atom to pratom type val = case type NIL: prints "NIL" SP: prints "SP" TERM: printc 39 prints val NONTERM: prints val STRING: printc 34 prints val printc 34 // print list to prlist a = if a == NIL break if isatom a pratom (car a) (cdr a) space else printc 40 // "(" while a != NIL prlist car a a = cdr a printc 41 // ")" // ------------------------------ to expect tk mess = if tok != tk seterror mess to commit status = if status == 0 seterror "syntax error" to mylex = tok = syslex FI // printc 39 prtoken tok space // lexgen grammar // grammar -> 'string | rule | 'eof // rule -> 'id rule2 // rule2 -> '= es | '[ var // var -> 'id var | '] '= es // es -> e1 es | '| es | '% // e1 -> 'id | 'string to e1 = if tok == tkIDEN doiden mylex 1 else if tok == tkSTRING dostring mylex 1 else 0 to es = while e1 {} if tok == tkBAR doalt ypush MARK mylex es else if tok == tkMOD doalt doalt 1 else 0 to var = while tok == tkIDEN doiden mylex if tok == tkRBRACKET doalt mylex expect tkEQ "expect =" ypush MARK mylex es else 0 to rule2 = if tok == tkEQ ypush (newatom NIL 0) ypush MARK mylex es else if tok == tkLBRACKET ypush MARK mylex var else 0 to rule = if tok == tkIDEN ypush MARK doiden mylex rule2 else 0 to grammar | a = mylex ypush MARK while tok != tkEOF if tok == tkSTRING a = copystring tokstring header = append header (newatom STRING a) else commit rule // prlist ytos nl mylex doalt // collect all rules // ------------------ enum 125 RBRACE enum 123 LBRACE to lenlist m | k = k = 0 while m != NIL k = k + 1 m = cdr m k to last m = while (cdr m) != NIL m = cdr m car m to atomeq a1 a2 = ((car a1) == (car a2)) & (streq (cdr a1) (cdr a2)) // reverse m1 to m2 to reverselist m1 | m2 = m2 = NIL while m1 != NIL m2 = cons (car m1) m2 m1 = cdr m1 m2 to trylex = if lexflag prints "lex" nl lexflag = 0 // for the first match item to genone1 a | ty val = ty = car a val = cdr a case ty NIL: nilflag = 1 TERM: trylex if loopflag prints "while tok == " else prints "if tok == " prints val space printc LBRACE nl lexflag = 1 NONTERM: trylex if loopflag prints "while " else prints "if " prints val space printc LBRACE nl STRING: prints val nl trylex // for one match item to genone a | ty val = ty = car a val = cdr a case ty NIL: nilflag = 1 TERM: trylex prints "expect " prints val space printc 34 prints "missing " prints val printc 34 nl lexflag = 1 NONTERM: trylex prints "commit " prints val nl STRING: prints val nl trylex // for each match in an alternative to genalt2 e level len = if e == NIL if nilflag == 0 trylex prints "1 break }" nl else if loopflag & (level == len) // don't do the last one printc RBRACE nl else if level == 1 genone1 (car e) // first match else genone (car e) // the rest genalt2 (cdr e) (level + 1) len // for each alternative to genalt e | e2 = if e != NIL e2 = car e if atomeq lhs (last e2) loopflag = 1 else loopflag = 0 genalt2 e2 1 (lenlist e2) genalt (cdr e) to gencase e = if e != NIL genone car e gencase cdr e to genonecase a = prints (cdr car a) prints ": {" nl lexflag = 1 gencase (cdr a) trylex prints "1 break }" nl to genmulti e | k i a = prints "case tok {" nl k = lenlist e for i 1 k-1 genonecase car e e = cdr e // the last one if (car car car e) == TERM // is tkXX genonecase car e else prints "else: {" nl genalt2 (car e) 1 (lenlist car e) printc RBRACE nl // close last alt printc RBRACE nl // check if there is a recursive rule to chkRecursive e = if e == NIL 0 else if atomeq lhs (last car e) 1 break chkRecursive (cdr e) // to be multi, no recursion, alt > 2, // all alts except last must be tkXX to ismulti e | k i e2 f ty = if loopflag 0 break k = lenlist e if k < 3 0 break f = 1 for i 1 k-1 ty = car car car e // type of first match if ty != TERM f = 0 break e = cdr e f // list of local variable to prnames e = if e != NIL prints (cdr car e) space prnames cdr e to endrule = trylex if nilflag print 1 nl else print 0 nl nilflag = 0 // for each rule to genarule a | b nm = lhs = car a prints "to " prints (cdr lhs) nm = car cdr a if (car nm) != NIL prints " | " prnames nm prints " = {" nl b = cdr cdr a loopflag = chkRecursive b if ismulti b genmulti b else genalt b endrule printc RBRACE nl nl to gen e = if e != NIL genarule car e gen cdr e to prheader h = if h != NIL prints cdr car h nl prheader cdr h // gen forward fun def to genforward e | e2 = if e != NIL e2 = car e // a rule prints "to " prints (cdr car e2) prints " = {}" nl genforward cdr e to parse fn = FI = fopen fn 0 grammar fclose FI to main | e e2 = initPgen init_list header = list (newatom SP 0) parse "parse-grammar.txt" e = ypop prheader cdr header nl // prlist e nl // genforward e nl // e2 = reverselist e gen e // End