// main-s.txt  parser generator in Som
// to be used with som 4.2  (lex in vm)
//   22 Sept 2009

//  system area  1 .. MAXSYS-1

//  250..299 tokstring
//  102  tokvalue
//  103  tokcol
//  104  line

//: tokvalue = M[102]
//: settokvalue x = M[102] = x

// print string of token, len 3 each
tok_str = array
	"*" 0 "/" 0 "-" 0 "+" 0 "=" 0 "==" 0 "&" 0 "|" 0
	"^" 0 "%" 0 "!" 0 "!=" 0 "<" 0 "<=" 0 "<<" 0 ">" 0
	">=" 0 ">>" 0 ":" 0 "(" 0 ")" 0 "[" 0 "]" 0 "{" 0
	"}" 0 "to" 0 "if" 0 "else" 0 "while" "for" 0 "break" "array"
	"case" 0 "enum" 0 "syscall"

to prtoken tk | a =
	if tk > tkSYSCALL
		seterror "unknown token"
	if tk >= tkSTAR
		a = tk - tkSTAR		// a 0..34
		prints tok_str+(a*3)
		break
	case tk
		tkIDEN: prints tokstring
//        tkNUMBER: print tokvalue
		tkSTRING:
			printc 34 prints tokstring printc 34
		tkEOF: prints "eof"

// ---- parser stack  operators -----

to ypush x =
	ysp = ysp + 1
	if ysp >= MAXYSTK
		seterror "parser stack overflow"
	ystack[ysp] = x

to ypop | x =
	if ysp <= 0
		seterror "parser stack underflow"
	x = ystack[ysp]
	ysp = ysp - 1
	x

to ytos = ystack[ysp]

// -------------------------------

// create a new string
to copystring s1 | s2 =
	s2 = array ((strlen s1) + 1)
	strcpy s2 s1
	s2

to doiden | s ty =
	s = copystring tokstring
	strunpack sbuf s
	if (sbuf[0] == 116) & (sbuf[1] == 107)  // tk
		ty = TERM
	else if streq tokstring "nil"
		ty = NIL
	else
		ty = NONTERM
	ypush (newatom ty s)

to dostring =
	ypush (newatom STRING (copystring tokstring))

//[%mark %atom ... %atom -> %alt]
to doalt | a b =
	b = NIL
	while ytos != MARK
		a = ypop
		b = cons a b
	a = ypop			// throw mark away
	ypush b

// print atom
to pratom type val =
	case type
		NIL:
			prints "NIL"
		SP:
			prints "SP"
		TERM:
			printc 39 prints val
		NONTERM:
			prints val
		STRING:
			printc 34 prints val printc 34

// print list
to prlist a =
	if a == NIL break
	if isatom a
		pratom (car a) (cdr a)
		space
	else
		printc 40   // "("
		while a != NIL
			prlist car a
			a = cdr a
		printc 41   // ")"

// ------------------------------

to expect tk mess =
	if tok != tk seterror mess

to commit status =
	if status == 0 seterror "syntax error"

to mylex =
	tok = syslex FI
//	printc 39  prtoken tok space

//  lexgen grammar

// grammar -> 'string | rule | 'eof
// rule -> 'id rule2
// rule2 ->  '= es | '[ var
// var -> 'id var | '] '= es
// es -> e1 es | '| es | '%
// e1 -> 'id | 'string

to e1 =
	if tok == tkIDEN
		doiden
		mylex
		1
	else if tok == tkSTRING
		dostring
		mylex
		1
	else
		0

to es =
	while e1 {}
	if tok == tkBAR
		doalt
		ypush MARK
		mylex
		es
	else if tok == tkMOD
		doalt
		doalt
		1
	else
		0

to var =
	while tok == tkIDEN
		doiden
		mylex
	if tok == tkRBRACKET
		doalt
		mylex
		expect tkEQ "expect ="
		ypush MARK
		mylex
		es
	else
		0

to rule2 =
	if tok == tkEQ
		ypush (newatom NIL 0)
		ypush MARK
		mylex
		es
	else if tok == tkLBRACKET
		ypush MARK
		mylex
		var
	else
		0

to rule =
	if tok == tkIDEN
		ypush MARK
		doiden
		mylex
		rule2
	else
		0

to grammar | a =
	mylex
	ypush MARK
	while tok != tkEOF
		if tok == tkSTRING
			a = copystring tokstring
			header =  append header (newatom STRING a)
		else
			commit rule
//			prlist ytos nl
		mylex
	doalt				// collect all rules

// ------------------

enum
	125 RBRACE
enum
	123 LBRACE

to lenlist m | k =
	k = 0
	while m != NIL
		k = k + 1
		m = cdr m
	k

to last m =
	while (cdr m) != NIL
		m = cdr m
	car m

to atomeq a1 a2 =
	((car a1) == (car a2)) & (streq (cdr a1) (cdr a2))

// reverse m1 to m2
to reverselist m1 | m2 =
	m2 = NIL
	while m1 != NIL
		m2 = cons (car m1) m2
		m1 = cdr m1
	m2

to trylex =
	if lexflag
		prints "lex" nl
		lexflag = 0

// for the first match item
to genone1 a | ty val =
	ty = car a
	val = cdr a
	case ty
		NIL:
			nilflag = 1
		TERM:
			trylex
			if loopflag
				prints "while tok == "
			else
				prints "if tok == "
			prints val  space printc LBRACE nl
			lexflag = 1
		NONTERM:
			trylex
			if loopflag
				prints "while "
			else
				prints "if "
			prints val  space printc LBRACE nl
		STRING:
			prints val nl
			trylex

// for one match item
to genone a | ty val =
	ty = car a
	val = cdr a
	case ty
		NIL:
			nilflag = 1
		TERM:
			trylex
			prints "expect "  prints val space
			printc 34 prints "missing " prints val printc 34 nl
			lexflag = 1
		NONTERM:
			trylex
			prints "commit " prints val nl
		STRING:
			prints val nl
			trylex

// for each match in an alternative
to genalt2 e level len =
	if e == NIL
		if nilflag == 0
			trylex
			prints "1 break }" nl
	else if loopflag & (level == len)	// don't do the last one
		printc RBRACE nl
	else
		if level == 1
			genone1 (car e)				// first match
		else
			genone (car e)				// the rest
		genalt2 (cdr e) (level + 1) len

// for each alternative
to genalt e | e2 =
	if e != NIL
		e2 = car e
		if atomeq lhs (last e2)
			loopflag = 1
		else
			loopflag = 0
		genalt2 e2 1 (lenlist e2)
		genalt (cdr e)

to gencase e =
	if e != NIL
		genone car e
		gencase cdr e

to genonecase a =
	prints (cdr car a) prints ": {" nl
	lexflag = 1
	gencase (cdr a)
	trylex
	prints "1 break }" nl

to genmulti e | k i a =
	prints "case tok {" nl
	k = lenlist e
	for i 1 k-1
		genonecase car e
		e = cdr e
	// the last one
	if (car car car e) == TERM 	// is tkXX
		genonecase car e
	else
		prints "else: {" nl
		genalt2 (car e) 1 (lenlist car e)
		printc RBRACE nl		// close last alt
	printc RBRACE nl

// check if there is a recursive rule
to chkRecursive e =
	if e == NIL
		0
	else
		if atomeq lhs (last car e)
			1 break
		chkRecursive (cdr e)

// to be multi, no recursion, alt > 2,
// all alts except last must be tkXX
to ismulti e | k i e2 f ty =
	if loopflag
		0 break
	k = lenlist e
	if k < 3
		0 break
	f = 1
	for i 1 k-1
		ty = car car car e	// type of first match
		if ty != TERM
			f = 0
			break
		e = cdr e
	f

// list of local variable
to prnames e =
	if e != NIL
		prints (cdr car e) space
		prnames cdr e

to endrule =
	trylex
	if nilflag
		print 1 nl
	else
		print 0 nl
	nilflag = 0

// for each rule
to genarule a | b nm =
	lhs = car a
	prints "to " prints (cdr lhs)
	nm = car cdr a
	if (car nm) != NIL
   		prints " | " prnames nm
	prints " = {" nl
	b = cdr cdr a
	loopflag = chkRecursive b
	if ismulti b
		genmulti b
	else
		genalt b
	endrule
	printc RBRACE nl nl

to gen e =
	if e != NIL
		genarule car e
		gen cdr e

to prheader h =
	if h != NIL
		prints cdr car h  nl
		prheader cdr h

// gen forward fun def
to genforward e | e2 =
	if e != NIL
		e2 = car e		// a rule
		prints "to " prints (cdr car e2)
		prints " = {}" nl
		genforward cdr e

//to testlex fn | tok =
//    FI = fopen fn 0
//    tok = syslex FI
//    while tok != tkEOF
//        prtoken tok space
//        tok = syslex FI
//    fclose FI

//to testunpack | i s1 s2 =
//	strunpack sbuf "I am a good boy"
//	i = 0
//	while sbuf[i] != 0
//		printc sbuf[i]
//		i = i + 1

to parse fn =
	FI = fopen fn 0
	grammar
	fclose FI

to main | e e2 =
	initPgen
	init_list
	header = list (newatom SP 0)
	parse "parse-grammar.txt"
	e = ypop
	prheader cdr header  nl
//	prlist e nl
//	genforward e  nl
//	e2 = reverselist e
	gen e

// End
