//  pgen.txt
//    a parser generator in one file   28 Oct 2009

//  load "string-s.txt"
//  load "pgen-h-s.txt"
//  load "list-s.txt"
//  load "main-s.txt"

// string-s.txt   string functions
//    som-string is an array of int
//    an int contains at most 4 char, right pad with 0
//    terminate by int 0

//    public release som v3.0   5 March 2007 (Maka-bucha day)
//    public release som v3.1 	19 Aug 2007 (Draft vote day)
//    public release som v4.0   2 July 2008
//    public release som v4.1   9 Aug 2008 (Birthday)

//    improve efficiency by nested-if  29 July 2008
//    fprints now move to syscall 11   20 sept 2009

// copy s1 = s2
to strcpy s1 s2 | i =
	i = 0
	while s2[i] != 0
		s1[i] = s2[i]
		i = i + 1
	s1[i] = 0

to strlen s | k =
	k = 0
	while s[k] != 0
		k = k + 1
	k

// debug per "som-string-bug.txt, 7/9/2007" 16 Mar 2008
to streq s1 s2 | i =
	i = 0
	while and (s1[i] == s2[i]) s1[i]
		i = i + 1
	s1[i] == s2[i]

// s1 is som-string (packed string)
// s1 contains only number (no sign)
to atoi s1 | a i c1 c2 c3 c4 v =
	v = 0
	i = 0
	a = s1[i]
	while a != 0
		c1 = (a >> 24) & 255
		if c1 != 0
			v = v*10 + c1 - 48
			c2 = (a >> 16) & 255
			if c2 != 0
				v = v*10 + c2 - 48
				c3 = (a >> 8) & 255
				if c3 != 0
					v = v*10 + c3 - 48
					c4 = a & 255
					if c4 != 0
						v = v*10 + c4 - 48
		i = i + 1
		a = s1[i]
	v

: prints s1 = fprints 1 s1		// prints to stdout

// pack array of char to som-string s1
// use nested if
to strpack s1 ar start len | a k i e =
	k = 0
	i = start
	e = start + len
	while i < e
		a = ar[i] << 24
		i = i + 1
		if i < e
			a = a | (ar[i] << 16)
			i = i + 1
			if i < e
				a = a | (ar[i] << 8)
				i = i + 1
				if i < e
					a = a | ar[i]
					i = i + 1
		s1[k] = a
		k = k + 1
	s1[k] = 0

// unpack som-string s2 to array of char s1
to strunpack s1 s2 | i j a c1 c2 c3 c4 =
	i = 0
	j = 0
	while s2[i] != 0
		a = s2[i]
		c1 = (a >> 24) & 255
		c2 = (a >> 16) & 255
		c3 = (a >> 8) & 255
		c4 = a & 255
		if c1 != 0
			s1[j] = c1
			j = j + 1
			if c2 != 0
				s1[j] = c2
				j = j + 1
				if c3 != 0
					s1[j] = c3
					j = j + 1
					if c4 != 0
						s1[j] = c4
						j = j + 1
		i = i + 1
	s1[j] = 0

// End

// pgen-h-s.txt

//  public release som v4.0   2 July 2008
//  public release som v4.1   9 Aug 2008 (Birthday)
//  public release som v4.2a  23 Sept 2009

// sequence of loading files
//
// lib.som
// string-s.txt
// pgen-h-s.txt
// list-s.txt
// main-s.txt

enum	// marker
	1 NIL MARK BMARK
enum	// atom
	4 SP TERM NONTERM STRING

enum	// token type
	14 tkIDEN tkNUMBER tkSTRING tkEOF tkERROR
enum	// token
	50 tkSTAR tkSLASH tkMINUS tkPLUS tkEQ tkEQEQ
	tkAND tkBAR tkCARET tkMOD tkNOT tkNE tkLT tkLE
	tkLTLT tkGT tkGE tkGTGT tkCOLON tkLPAREN tkRPAREN
	tkLBRACKET tkRBRACKET tkBB tkBE tkTO tkIF tkELSE
	tkWHILE tkFOR tkBREAK tkARRAY tkCASE tkENUM tkSYSCALL

enum
	500 MAXYSTK		// size of parser stack

//  system area  1 .. MAXSYS-1

//  250..299 tokstring
//  101  mode
//  102  tokvalue
//  103  tokcol
//  104  line

: line = M[104]
: setline n = M[104] = n

to initPgen | a =
	M = 0
	verbose = 1
	setline 0			// line
	FI = 0				// input file
	FO = 1				// output file
	tokstring = 250
	tok = 0
	ystack = array MAXYSTK  // parser stack
	ysp = 0
	sbuf = array 80			// string buffer
	lexflag = 0
	lhs = 0
	loopflag = 0
	nilflag = 0

: fspace = fprintc FO 32
: fnl = fprintc FO 10

// error during parsing
to seterror s =
	prints "line " print line space
	prints s nl
	exit

to warning nm mess =
	if verbose
		prints "Warning: "
		if nm != 0
			prints nm space
		prints mess nl

// End

// list-s.txt    list processing
//   implement list.c  for som-som project  14 Jan 2004
//   P. Chongstitvatana

//   public release som v3.0   5 March 2007 (Maka-bucha day)
//   public release som v3.1   19 Aug 2007 (Draft vote day)
//   public release som v4.0   2 July 2008
//   public release som v4.1   9 Aug 2008 (Birthday)

enum
	10 CELLPTR			// min pointer to acell
enum
	10000 MAXCELL

to init_list =
	cell = array MAXCELL
	freecell = CELLPTR
	endcell = MAXCELL - 2

: setcar a value =
	cell[a] = value

: setcdr a value =
	cell[a+1] = value

//: car a = if a == NIL a else cell[a]
//: cdr a = if a == NIL a else cell[a+1]
: car a = cell[a]
: cdr a = cell[a+1]
to item2 x = car cdr x
to item3 x = car cdr cdr x

to newcell | a =
	a = freecell
	freecell = freecell + 2
	if freecell >= endcell
		seterror "out of memory cell"
	else
		setcar a NIL
		setcdr a NIL
	a

to islist x = (car x) >= CELLPTR
to isatom x = (car x) < CELLPTR

to newatom type value | a =
	a = newcell
	setcar a type
	setcdr a value
	a

to list a | b =
	b = newcell
	setcar b a
	b

to cons a l | b =
	b = newcell
	setcdr b l
	setcar b a
	b

to append lst x | a b =
	if x != NIL
		a = lst
		b = cdr a
		while b != NIL
			a = b
			b = cdr a
		setcdr a (list x)
	lst


// can cons2 be used instead of cons ?  31 Mar 2004

// cons2 x, y = {NIL, atom, list}
to cons2 x y | z =
	if x == NIL
		y break
	if y == NIL
		list x break
	// it is the same whether x is atom of list
	// a new cell is required to build a dot-pair
	// only y must be inspected, if it is not a list
	// a new dot-pair to make y a list is needed
	if isatom y
		z = newatom y NIL
	else
		z = y
	newatom x z

// clone a copy of list t
to copylist t =
	if t == NIL
		NIL
		break
	if isatom t
		newatom (car t) (cdr t)
	else
		cons2 (copylist car t) (copylist cdr t)

to countcell | k =
	k = (MAXCELL - freecell) / 2
	prints "+freecell = " print k nl

// data structure of type lis
// is an one-dimension vector with element 0 storing its size
// total space of vector is size+1

to addlis lis x | n =
	n = lis[0] + 1
	lis[n] = x
	lis[0] = n

: clearlis lis = lis[0] = 0
: sizeoflis lis = lis[0]

to countlis lis | i n =
	n = 0
	for i 1 (sizeoflis lis)
		if lis[i] != 0
			n = n + 1
	n

// end

// main-s.txt  parser generator in Som
// to be used with som 4.2  (lex in vm)
//   22 Sept 2009

//  system area  1 .. MAXSYS-1

//  250..299 tokstring
//  102  tokvalue
//  103  tokcol
//  104  line

//: tokvalue = M[102]
//: settokvalue x = M[102] = x

// print string of token, len 3 each
tok_str = array
	"*" 0 "/" 0 "-" 0 "+" 0 "=" 0 "==" 0 "&" 0 "|" 0
	"^" 0 "%" 0 "!" 0 "!=" 0 "<" 0 "<=" 0 "<<" 0 ">" 0
	">=" 0 ">>" 0 ":" 0 "(" 0 ")" 0 "[" 0 "]" 0 "{" 0
	"}" 0 "to" 0 "if" 0 "else" 0 "while" "for" 0 "break" "array"
	"case" 0 "enum" 0 "syscall"

to prtoken tk | a =
	if tk > tkSYSCALL
		seterror "unknown token"
	if tk >= tkSTAR
		a = tk - tkSTAR		// a 0..34
		prints tok_str+(a*3)
		break
	case tk
		tkIDEN: prints tokstring
//        tkNUMBER: print tokvalue
		tkSTRING:
			printc 34 prints tokstring printc 34
		tkEOF: prints "eof"

// ---- parser stack  operators -----

to ypush x =
	ysp = ysp + 1
	if ysp >= MAXYSTK
		seterror "parser stack overflow"
	ystack[ysp] = x

to ypop | x =
	if ysp <= 0
		seterror "parser stack underflow"
	x = ystack[ysp]
	ysp = ysp - 1
	x

to ytos = ystack[ysp]

// -------------------------------

// create a new string
to copystring s1 | s2 =
	s2 = array ((strlen s1) + 1)
	strcpy s2 s1
	s2

to doiden | s ty =
	s = copystring tokstring
	strunpack sbuf s
	if (sbuf[0] == 116) & (sbuf[1] == 107)  // tk
		ty = TERM
	else if streq tokstring "nil"
		ty = NIL
	else
		ty = NONTERM
	ypush (newatom ty s)

to dostring =
	ypush (newatom STRING (copystring tokstring))

//[%mark %atom ... %atom -> %alt]
to doalt | a b =
	b = NIL
	while ytos != MARK
		a = ypop
		b = cons a b
	a = ypop			// throw mark away
	ypush b

// print atom
to pratom type val =
	case type
		NIL:
			prints "NIL"
		SP:
			prints "SP"
		TERM:
			printc 39 prints val
		NONTERM:
			prints val
		STRING:
			printc 34 prints val printc 34

// print list
to prlist a =
	if a == NIL break
	if isatom a
		pratom (car a) (cdr a)
		space
	else
		printc 40   // "("
		while a != NIL
			prlist car a
			a = cdr a
		printc 41   // ")"

// ------------------------------

to expect tk mess =
	if tok != tk seterror mess

to commit status =
	if status == 0 seterror "syntax error"

to mylex =
	tok = syslex FI
//	printc 39  prtoken tok space

//  lexgen grammar

// grammar -> 'string | rule | 'eof
// rule -> 'id rule2
// rule2 ->  '= es | '[ var
// var -> 'id var | '] '= es
// es -> e1 es | '| es | '%
// e1 -> 'id | 'string

to e1 =
	if tok == tkIDEN
		doiden
		mylex
		1
	else if tok == tkSTRING
		dostring
		mylex
		1
	else
		0

to es =
	while e1 {}
	if tok == tkBAR
		doalt
		ypush MARK
		mylex
		es
	else if tok == tkMOD
		doalt
		doalt
		1
	else
		0

to var =
	while tok == tkIDEN
		doiden
		mylex
	if tok == tkRBRACKET
		doalt
		mylex
		expect tkEQ "expect ="
		ypush MARK
		mylex
		es
	else
		0

to rule2 =
	if tok == tkEQ
		ypush (newatom NIL 0)
		ypush MARK
		mylex
		es
	else if tok == tkLBRACKET
		ypush MARK
		mylex
		var
	else
		0

to rule =
	if tok == tkIDEN
		ypush MARK
		doiden
		mylex
		rule2
	else
		0

to grammar | a =
	mylex
	ypush MARK
	while tok != tkEOF
		if tok == tkSTRING
			a = copystring tokstring
			header =  append header (newatom STRING a)
		else
			commit rule
//			prlist ytos nl
		mylex
	doalt				// collect all rules

// ------------------

enum
	125 RBRACE
enum
	123 LBRACE

to lenlist m | k =
	k = 0
	while m != NIL
		k = k + 1
		m = cdr m
	k

to last m =
	while (cdr m) != NIL
		m = cdr m
	car m

to atomeq a1 a2 =
	((car a1) == (car a2)) & (streq (cdr a1) (cdr a2))

// reverse m1 to m2
to reverselist m1 | m2 =
	m2 = NIL
	while m1 != NIL
		m2 = cons (car m1) m2
		m1 = cdr m1
	m2

to trylex =
	if lexflag
		prints "lex" nl
		lexflag = 0

// for the first match item
to genone1 a | ty val =
	ty = car a
	val = cdr a
	case ty
		NIL:
			nilflag = 1
		TERM:
			trylex
			if loopflag
				prints "while tok == "
			else
				prints "if tok == "
			prints val  space printc LBRACE nl
			lexflag = 1
		NONTERM:
			trylex
			if loopflag
				prints "while "
			else
				prints "if "
			prints val  space printc LBRACE nl
		STRING:
			prints val nl
			trylex

// for one match item
to genone a | ty val =
	ty = car a
	val = cdr a
	case ty
		NIL:
			nilflag = 1
		TERM:
			trylex
			prints "expect "  prints val space
			printc 34 prints "missing " prints val printc 34 nl
			lexflag = 1
		NONTERM:
			trylex
			prints "commit " prints val nl
		STRING:
			prints val nl
			trylex

// for each match in an alternative
to genalt2 e level len =
	if e == NIL
		if nilflag == 0
			trylex
			prints "1 break }" nl
	else if loopflag & (level == len)	// don't do the last one
		printc RBRACE nl
	else
		if level == 1
			genone1 (car e)				// first match
		else
			genone (car e)				// the rest
		genalt2 (cdr e) (level + 1) len

// for each alternative
to genalt e | e2 =
	if e != NIL
		e2 = car e
		if atomeq lhs (last e2)
			loopflag = 1
		else
			loopflag = 0
		genalt2 e2 1 (lenlist e2)
		genalt (cdr e)

to gencase e =
	if e != NIL
		genone car e
		gencase cdr e

to genonecase a =
	prints (cdr car a) prints ": {" nl
	lexflag = 1
	gencase (cdr a)
	trylex
	prints "1 break }" nl

to genmulti e | k i a =
	prints "case tok {" nl
	k = lenlist e
	for i 1 k-1
		genonecase car e
		e = cdr e
	// the last one
	if (car car car e) == TERM 	// is tkXX
		genonecase car e
	else
		prints "else: {" nl
		genalt2 (car e) 1 (lenlist car e)
		printc RBRACE nl		// close last alt
	printc RBRACE nl

// check if there is a recursive rule
to chkRecursive e =
	if e == NIL
		0
	else
		if atomeq lhs (last car e)
			1 break
		chkRecursive (cdr e)

// to be multi, no recursion, alt > 2,
// all alts except last must be tkXX
to ismulti e | k i e2 f ty =
	if loopflag
		0 break
	k = lenlist e
	if k < 3
		0 break
	f = 1
	for i 1 k-1
		ty = car car car e	// type of first match
		if ty != TERM
			f = 0
			break
		e = cdr e
	f

// list of local variable
to prnames e =
	if e != NIL
		prints (cdr car e) space
		prnames cdr e

to endrule =
	trylex
	if nilflag
		print 1 nl
	else
		print 0 nl
	nilflag = 0

// for each rule
to genarule a | b nm =
	lhs = car a
	prints "to " prints (cdr lhs)
	nm = car cdr a
	if (car nm) != NIL
   		prints " | " prnames nm
	prints " = {" nl
	b = cdr cdr a
	loopflag = chkRecursive b
	if ismulti b
		genmulti b
	else
		genalt b
	endrule
	printc RBRACE nl nl

to gen e =
	if e != NIL
		genarule car e
		gen cdr e

to prheader h =
	if h != NIL
		prints cdr car h  nl
		prheader cdr h

// gen forward fun def
to genforward e | e2 =
	if e != NIL
		e2 = car e		// a rule
		prints "to " prints (cdr car e2)
		prints " = {}" nl
		genforward cdr e

//to testlex fn | tok =
//    FI = fopen fn 0
//    tok = syslex FI
//    while tok != tkEOF
//        prtoken tok space
//        tok = syslex FI
//    fclose FI

//to testunpack | i s1 s2 =
//	strunpack sbuf "I am a good boy"
//	i = 0
//	while sbuf[i] != 0
//		printc sbuf[i]
//		i = i + 1

to parse fn =
	FI = fopen fn 0
	grammar
	fclose FI

to main | e e2 =
	initPgen
	init_list
	header = list (newatom SP 0)
	parse "parse-grammar.txt"
	e = ypop
	prheader cdr header  nl
//	prlist e nl
//	genforward e  nl
//	e2 = reverselist e
	gen e

// End

