/* as23.c  a simple assembler for s2 version 3

structure of code (case insensitive)

	.symbol                   define symbol
	symbol value
	 ...
	.code ads                 code segment at ads
	[:label] opcode operand
	 ...
	.data ads				  data segment at ads
	sym sym ...               define words
	.end

addressing mode
  absolute:  ld r1 ads
  indirect:  ld r1 @disp r2
  index:     ld r1 +r2 r3
  register:  add r1 r2 r3
  immediate: add r1 r2 #sym

operand = sym @sym +sym #sym

modify from cpu3 assembler A3  version 1.0  7th September 1999
fix bug : undef op, add listing 			2 Oct 1999
add extended instructions  					28 November 2001
modify from as2								16 Jan 2007
improve for s2.3 (for embed sys)			1 Jan 2013

Prabhas Chongstitvatana
*/

#include "as23.h"

//char 	lbuf[MXBUF];				// copy of input
char 	ibuf[MXBUF], *cp = NULL;   	// input buffer, ch ptr
char	ob[MXBUF];					// output buffer
//char	cbuf[MXBUF];				// output buffer for listing
char 	fout[80], flis[80];

static 	char sep[] = " \t\n";     	// separator char
char    *w;							// current input word
int 	lineno,loc;		   			// line num, current ads
FILE 	*fi, *fo, *fl;
token	mem[MXDAT];					// store tokens for pass2
int 	tp = 0;						// token index
int		pass;
int 	ipzero, datzero;			// start of code, data 
int 	ip = 0;						// index to code[.]
int 	npatch = 0;					// size of patch list

coderec code[MAXCODE];				// code segment
patchrec patchlist[MAXPATCH];

// initial symbol in the symbol table
struct { char name[8]; int value; }
initsym[] = {
	{"NOP",NOP},{"LD",LD},{"ST",ST},{"MOV",MOV},
	{"JMP",JMP},{"JAL",JAL},{"RET",RET},{"JT",JT},{"JF",JF},
	{"ADD",ADD},{"SUB",SUB},{"MUL",MUL},{"DIV",DIV},
	{"AND",AND},{"OR",OR},{"XOR",XOR},
	{"EQ",EQ},{"NE",NE},{"LT",LT}, {"LE",LE},
	{"GT",GT},{"GE",GE},{"SHL",SHL},{"SHR",SHR}, {"MOD",MOD},
	{"TRAP",TRAP},{"PUSH",PUSH},{"POP",POP},
//	{"XL",XL}, {"XD",XD}, {"XX",XX},
	{"R0",0},{"R1",1},{"R2",2},{"R3",3},{"R4",4},
	{"R5",5},{"R6",6},{"R7",7},{"R8",8},{"R9",9},
	{"R10",10},{"R11",11},{"R12",12},{"R13",13},{"R14",14},
	{"R15",15},{"R16",16},{"R17",17},{"R18",18},{"R19",19},
	{"R20",20},{"R21",21},{"R22",22},{"R23",23},{"R24",24},
	{"R25",25},{"R26",26},{"R27",27},{"R28",28},{"R29",29},
	{"R30",30},{"R31",31},
	{"",0}
};

void error(char *s){
	if(pass == 1)
		printf("line %d error: %s symbol %s\n",lineno,s,w);
	else
		printf("error: %s\n",s);
	exit(0);
}

// get one token from input
char *tok(void){
	if( cp != NULL ) cp = strtok(NULL,sep);
	while ( cp == NULL || eqs(cp,";") ){
		if( fgets(ibuf,MXBUF,fi) == NULL ) return NULL;
//		strcpy(lbuf,ibuf);
		lineno++;
		cp = strtok(ibuf, sep);
	}
	cp = strupr(cp);   // convert to uppercase
//	printf("%s ",cp);
	return cp;
}

// store data token
void store(char type, int ref){
	if( tp >= MXDAT ) error("out of memory");
	mem[tp].type = type;
	mem[tp].ref = ref;
	tp++;
}

// check number
int isnumber(char *s){
	int i = 0;
	if(s[0] == '-') i = 1;
	while( isdigit(s[i]) ) i++;
	if( s[i] == 0 ) return 1;
	return 0;
}

int getNum(void){
	w = tok();
	if(isnumber(w))	return atoi(w);
	error("expect number");
	return 0;
}

// list of patch, at ip, value idx to symbol table
void add_patchlist(int ip, int idx){
	patchlist[npatch].ads = ip;
	patchlist[npatch].idx = idx;
	npatch++;
}

// denote forward ref by type UD
int valueof(char *s, int *type){
	int idx, v;
	if( isnumber(s) ){
		*type = NUM;
		return atoi(s);
	}
	idx = putsym(s);
	v = getValue(idx);
	if( v == UNDEF){
		add_patchlist(ip,idx);
		*type = UD;
		return idx;
	}
	*type = SYM;
	return v;
}

void psR(int k){
	int v, type;
	w = tok();
	if(w[0]==':' || w[0]=='@' || w[0]=='+' || w[0]=='#')
		error("expect register");
	code[ip].arg[k] = valueof(w,&type);
	if( type == UD )
		error("undefine symbol");
}

// store argument at arg[2]
void psAds(void){
	int v,type;
	w = tok();
	if(w[0]==':' || w[0]=='@' || w[0]=='+' || w[0]=='#')
		error("expect ads");
	code[ip].arg[2] = valueof(w,&type);
}

void psIm(void){
	int v,type;
	w = tok();
	if(w[0] != '#')
		error("expect constant");
	code[ip].arg[2] = valueof(w+1,&type);
}

//  arg of ld/st  {@n r, +r r, ads}
void psRM(void){
	int v, type;
	w = tok();
	if(w[0] == '@'){
		code[ip].arg[2] = valueof(w+1,&type);
		psR(1);
		code[ip].op += 1;			// adjust op, indirect
	}else if(w[0] == '+'){
		code[ip].arg[1] = valueof(w+1,&type);
		if( type == UD )
			error("undefine symbol");
		psR(2);
		code[ip].op = (code[ip].op == LD) ? LDX : STX;
	}else{
		code[ip].arg[2] = valueof(w,&type);
	}
}

void psRI(void){
	int v,type;
	w = tok();
	if(w[0] == '#'){
		code[ip].arg[2] = valueof(w+1,&type);
		code[ip].op -= 22;			// adjust op, im
	}else{
		code[ip].arg[2] = valueof(w,&type);
		if( type == UD )
			error("undefine symbol");
	}
}

// read argument according to OP
void readArg(int op){
	int v, type;
	code[ip].op = op;
	switch( op ) {
	case LD:
	case ST: psR(0); psRM(); break;
	case JMP: psAds(); break;
	case RET: psR(0);  break;
	case TRAP: psR(0); psIm(); break;
	case JAL:
	case JT:
	case JF: psR(0); psAds(); break;
	case ADD:			// three arg
	case SUB:
	case MUL:
	case DIV:
	case AND:
	case OR:
	case XOR:
	case EQ:
	case NE:
	case LT:
	case LE:
	case GT:
	case GE:
	case MOD:
	case SHL:
	case SHR: psR(0); psR(1); psRI(); break;
	case MOV: psR(0); psRI(); break;
	case PUSH:
	case POP:  psR(0); psR(1); break;
	default: error("undefine op");
	}
}

void doSymbol(void){
	int idx;
	if( !eqs(w,".SYMBOL"))
		error("expect .symbol");
	w = tok();
	while(!eqs(w,".CODE") ){	// until code section
		idx = putsym(w);
		if(getValue(idx) != UNDEF)
			error("duplicate symbol");
		setsym(idx,getNum(),SYM);
		w = tok();
	}
}

void doCode(void){
	int idx;
	ip = getNum();
	ipzero = ip;
	w = tok();
	while(!eqs(w,".DATA") ){	// until data section
		if( w[0] == ':' ){		// insert symbol
			idx = putsym(w+1);
			if(getValue(idx) != UNDEF)
				error("duplicate label");
			setsym(idx,ip,SYM);
		}else{
			idx = searchsym(w);
			if(idx < 0 || getType(idx) != OP)
				error("undefined op");
			readArg(getValue(idx));
			ip++;
		}
		w = tok();
	}
}

void doData(void){
	loc = getNum();
	datzero = loc;
	w = tok();
	while(!eqs(w,".END")){
		if( isnumber(w) )
			store(NUM,atoi(w));
		else
			store(SYM,putsym(w));
		loc++;
		w = tok();
	}
	store(DEND,loc);
}

void pass1(void){
	pass = 1;
	w = tok();
	doSymbol();
	doCode();
	doData();
}

/*  opcode encoding (flat encode)

op:6 r1:5 r2:5 d:16   d ={ads,disp,r3}

1   ld r1 ads
2   ld r1 @d r2
3 	st r1 ads
4   st r1 @d r2
5   -
6	jmp ads        // pseudo
7   jal r1 ads
8   jt r1 ads
9   jf r1 ads
10  add r1 r2 #n
11  sub r1 r2 #n
12  mul r1 r2 #n
13  div ...
14  and ...
15  or ...
16  xor ...
17  eq ...
18  ne ...
19  lt ...
20  le ...
21  gt ...
22  ge ...
23  shl ...
24  shr ...
25  mod ...
26..31  undef

32  add r1 r2 r3
33  sub r1 r2 r3
...
45  shl r1 r2 r3
46  shr r1 r2 r3
47  mod r1 r2 r3

48  ld r1 +r2 r3
49  st r1 +r2 r3
50  ret r1
51	trap r1 #n
52  push r1 r2
53  pop  r1 r2
54  mov r1 #n	// pseudo
76  mov r1 r2

*/

int rdtokval(void){
	int v;
	v = UNDEF;
	switch( mem[tp].type ){
	case NUM: v = mem[tp].ref; break;
	case SYM: v = getValue(mem[tp].ref);
	}
	if(v == UNDEF) error("undefined symbol");
	return v;
}

void outobj(void);

// one format  (for s2.3)
PRIVATE void prZ(int op, int a1, int a2, int a3){
	sprintf(ob,"%d, %d, %d, %d,",op,a1,a2,a3);
	outobj();
}

void gencode(void){
	int i, op, a1, a2, a3;
	
	for(i = 0; i < ip; i++){
		op = code[i].op;
		a1 = code[i].arg[0];
		a2 = code[i].arg[1];
		a3 = code[i].arg[2];

		switch( op ){
		case LD:
		case ST:  prZ(op,a1,0,a3); break;
		case LDD:
		case LDX:
		case STD:
		case STX: prZ(op,a1,a2,a3); break;
		case JAL:
		case JT:
		case JF:  prZ(op,a1,0,a3); break;
		case RET: prZ(op,a1,0,0); break;
		case ADD:
		case SUB:
		case MUL:
		case DIV:
		case AND:
		case OR:
		case XOR:
		case EQ:
		case NE:
		case LT:
		case LE:
		case GT:
		case GE:
		case SHL:
		case SHR:
		case MOD:
		case ADDI:
		case SUBI:
		case MULI:
		case DIVI:
		case ANDI:
		case ORI:
		case XORI:
		case EQI:
		case NEI:
		case LTI:
		case LEI:
		case GTI:
		case GEI:
		case SHLI:
		case SHRI:
		case MODI: prZ(op,a1,a2,a3); break;
		case TRAP: prZ(op,a1,0,a3); break;
		case PUSH:
		case POP:  prZ(op,a1,a2,0); break;
		// pseudo
		case JMP: prZ(JF,0,0,a3); break;
		case MOV: prZ(OR,a1,0,a3); break;
		case MOVI: prZ(ORI,a1,0,a3); break;

		default:  error("undefine op");
		}
	}
}

void outobj(void){
	fprintf(fo,"%s\n",ob);
//	printf("%s\n",ob);
}

/*
void dumpAlist(void){
	int i;
	for(i = 0; i < npatch; i++){
		printf("ads %d idx %d\n",patchlist[i].ads,patchlist[i].idx );
	}
}
*/
// update code[.].arg[2] for resolved forward reference
void backpatch(void){
	int i, ads, v;
	for(i = 0; i < npatch; i++){
		ads = patchlist[i].ads;
		v = getValue(patchlist[i].idx);
		if( v == UNDEF ){
			printf("%s ",getName(patchlist[i].idx));
			error("undefine symbol");
		}
		code[ads].arg[2] = v;
	}
}

void pass2(void){
	pass = 2;
	backpatch();
//	dumpAlist();
//	rewind(fi);
//	sprintf(ob,"var obj =[  // %s",fout);
	sprintf(ob,"%d,",MAGIC);
	outobj();
	// code segment
	sprintf(ob,"%d, %d,",ipzero,ip);
	outobj();
	gencode();
	// data segment
	sprintf(ob,"%d, %d,",datzero,loc-datzero);  // data len
	outobj();
	tp = 0;
	while(mem[tp].type != DEND){
		sprintf(ob,"%d, ",rdtokval());
		outobj();
		tp++;
	}
	sprintf(ob,"0");
	outobj();
}

// put reserved words into symbol table
void initsymbol(void){
	int i;
	for(i=0; initsym[i].name[0] != 0; i++)
		setsym(putsym(initsym[i].name), initsym[i].value,OP);
}

// make an obj file name from source
void makename( char *source, char *obj, char *lis ){
	int n;
	n = strcspn(source,".");
	strncpy(obj,source,n);
	strcpy(obj+n,".obj");
//	strncpy(lis,source,n);
//	strcpy(lis+n,".lis");
}

void main(int argc, char *argv[]){
	
	if( argc < 2 ) {
		printf("usage : as21 inputfile\n");
		exit(0);
	}
	fi = fopen(argv[1],"r");
	if( fi == NULL ){
		printf("input file not found\n");
		exit(0);
	}
	makename(argv[1], fout, flis);
	initsymbol();
	lineno = 0;
	loc = 0;
	pass1();
	fclose(fi);
//	fl = fopen(flis,"w");
	fo = fopen(fout,"w");
	pass2();
	fclose(fo);
//	fclose(fl);
}

