// npasm.c    assembler for npu
//   one pass assembler
//   P. Chongstitvatana   22 Mar 2012
//   improve: add label in data segment   7 Dec 2012

#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include "npu.h"			// opcode encoding

FILE *FI, *FO;				// input output files

#define  tkEOF		20
#define  tkEND		21
#define  tkOP		22
#define  tkLABEL	23
#define  tkNUM		24
#define  tkADS		25
#define  tkIM	    26

#define  UNDEF		-1
#define  maxsym		100		// max size of symbol table
#define  maxcode	200		// max size of code
#define  maxpatch	200		// max no. of patch
#define  maxmem		1000

#define  eqs(s1,s2)   (strcmp(s1,s2) == 0)

typedef char string[32];

typedef struct{
	string key;
	int value;
} symrec;					// symbol table record

typedef struct{
	int op;
	int arg[3];
	int ads;
} coderec;					// code record

typedef struct{
	int ads;
	int idx;
} patchrec;					// patch list record

symrec symtab[maxsym];				// symbol table
coderec code[maxcode];				// code segment
patchrec patchlist[maxpatch];

extern char buf[];	            	// input buffer, one line
extern int TP;                      // current token pointer
extern int line;					// current line
extern char tokstring[];			// current token
int tok, tokvalue;
int ip, nsym, state, npatch;
int mem[maxmem];					// data segment
int dstart, dp;

int lex(FILE *fi);

void error(void){
	printf("error at line %d symbol '%s'\n",line,tokstring);
	exit(0);
}

void init(void){
	line = 0;
	buf[0] = 0;
	TP = 0;
	ip = 0;
	npatch = 0;
	dp = 0;
	dstart = 0;
}

#define  numOp  39		// number of op in opsym table

char opsym[][8] = {
  "ld", "st", "ldr", "str", "ldx", "stx", "ldw", "bc", "add", "sub",
  "mul", "ashr", "addi", "and", "or", "xor", "lt","le","eq", "jmp",
  "jz", "jnz", "rnd", "mv_t", "sys", "inc", "dec", "clr", "mov"
};

// check is opcode
int isOp(char *s){
	int i;
	for(i = 0; i < numOp; i++ )
		if( eqs(s,opsym[i]) ) return i+10;
	return 0;
}

// read a token and check prefix, return type of token
int tokenise(void){
	int tok, c;
	tok = lex(FI);
	if( tok == tkEOF ) return tok;
	c = tokstring[0];	// prefix
	if( c == ':' ) return tkLABEL;
	if( c == '@' ) return tkADS;
	if( c == '#' ) return tkIM;
	if( isdigit(c) ) return tkNUM;
	if( eqs(tokstring,".end") ) return tkEND;
	tokvalue = isOp(tokstring);
	if( tokvalue > 0 ) return tkOP;
	return 0;
}

// search key in symtab, if not found insert at end
// return index to symtab
int searchSym(char *key){
	int i;
	for(i = 0; i < nsym; i++)
		if( eqs(key,symtab[i].key) ) return i;
	strcpy(symtab[nsym].key, key);
	symtab[nsym].value = UNDEF;
	i = nsym;
	nsym++;
	return i;
}
/*
void dumpSym(void){
	int i;
	for(i = 0; i < nsym; i++)
		printf("key %s value %d\n",symtab[i].key,symtab[i].value);
}
*/
// add address and index of key to symtab[.] to patch list
void addPatchList(int ads, int idxkey){
	patchlist[npatch].ads = ads;
	patchlist[npatch].idx = idxkey;
	npatch++;
}

// search s in symtab, instantiate value
void doLabel(char *s){
	int idx;
	idx = searchSym(s);
	if(symtab[idx].value != UNDEF)
		error();
	symtab[idx].value = ip;
}

int isnum(char *s){
	if( s[0] == '-' ) s++;
	while( *s != 0 ){
		if( !isdigit(*s) ) return 0;
		s++;
	}
	return 1;
}

// num or sym, if sym search symtab, if not found add_patch_list
// return tokvalue
int doAds(char *s){
	int idx;
	if( isnum(s) ) return atoi(s);
	else{
		idx = searchSym(s);
		if(symtab[idx].value != UNDEF)
			return symtab[idx].value;
		else
			addPatchList(ip,idx);	// undef, add sym to patch list
	}
	return 0;
}

// start a new instruction
void newInst(void){
	code[ip].op = tokvalue;
}

// keep arguments in either ads or arg[.]
void keepArg(int i){
	if( tok == tkADS || tok == tkIM )
		code[ip].ads = doAds(tokstring+1);
	else 						// mostly register
		code[ip].arg[i] = atoi(tokstring);
}

// state machine to parse one line
//   separate .end and label,
//   state     input    action       next state
//   1         op       newi         2
//   2         op       ip++,newi    2
//             other    arg(0)       3
//   3         op       ip++,newi    2
//             other    arg(1)       4
//   4         op       ip++,newi    2
//             other    arg(2),ip++  1

// assemble code segment
void doCode(void){
	state = 1;
	while(1){
		tok = tokenise();
//		printf("tok %d %s\n",tok, tokstring);

		if( tok == tkEND ){
		    if(state != 1) ip++;
		    return;
		}else if( tok == tkLABEL ){
            if(state != 1) ip++;
			doLabel(tokstring+1);
			state = 1;
		}else{
			switch(state){
			case 1:
				if( tok == tkOP ){
					newInst();
					state = 2;
				}else
					error();
				break;
			case 2:
				if( tok == tkOP ){
					ip++;
					newInst();
					state = 2;
				}else{
					keepArg(0);
					state = 3;
				}
				break;
			case 3:
				if( tok == tkOP ){
					ip++;
					newInst();
					state = 2;
				}else{
					keepArg(1);
					state = 4;
				}
				break;
			case 4:
				if( tok == tkOP ){
					ip++;
					newInst();
					state = 2;
				}else{
					keepArg(2);
					ip++;
					state = 1;
				}
				break;
			}  // end switch
		}  // end else
	}  // end while
}

// use patchlist {ads,key} search symtab for value
void backPatch(void){
	int i, a, a2, idx;
	for(i = 0; i < npatch; i++){
		a = patchlist[i].ads;
		idx = patchlist[i].idx;
		a2 = symtab[idx].value;
		if( a2 == UNDEF ){
			printf("symbol %s undefined\n",symtab[idx].key);
			exit(0);
		}
		code[a].ads = a2;
	}
}
/*
void dumpPatchList(void){
	int i, a, idx;
	for(i = 0; i < npatch; i++){
		a = patchlist[i].ads;
		idx = patchlist[i].idx;
		printf("at %d key %s value %d\n", i,
			symtab[idx].key, symtab[idx].value);
	}
}
*/

// output object code of each code record
void out4(int op, int a1, int a2, int a3){
	printf("%d %d %d %d\n",op,a1,a2,a3);
}

// assemble data segment
void doData(void){
	int idx;
	while(1){
		tok = tokenise();
		if( tok == tkEND ) return;
		if( tok == tkADS ){
			dp = atoi(tokstring+1);
			if( dstart == 0 ) dstart = dp;
		}else if( tok == tkLABEL ){
			idx = searchSym(tokstring+1);
			symtab[idx].value = dp;
		}else{
			mem[dp] = atoi(tokstring);
			dp++;
		}
	}
}

// output code segment, in the correct order of fields
void outCode(void){
	int i, op;
	coderec cd;

	printf("0 %d\n",ip);		// header: start len
	for(i = 0; i < ip; i++){
		cd = code[i];
		op = cd.op;
		switch(op){
		case xLd:
		case xSt: out4(op,cd.ads,cd.arg[0],0); break;
		case xLdr:
		case xStr: out4(op,cd.arg[0],0,0); break;
		case xLdx:
		case xStx: out4(op,cd.arg[0],cd.arg[1],cd.arg[2]); break;
		case xLdw: out4(op,cd.ads,0,0); break;
		case xBc:  out4(op,cd.arg[0],cd.arg[1],0); break;
		case xAdd:
		case xSub:
		case xMul:
		case xAnd:
		case xOr:
		case xXor:
		case xLt:
		case xLe:
		case xMvt:
		case xEq: out4(op,cd.arg[0],cd.arg[1],cd.arg[2]); break;
		case xAddi:
		case xAshr: out4(op,cd.ads,cd.arg[0],cd.arg[1]); break;
		case xJmp: out4(op,cd.ads,0,0); break;
		case xJz:
		case xJnz: out4(op,cd.ads,cd.arg[0],0); break;
		case xRnd:
		case xSys: out4(op,cd.arg[0],0,0); break;
		// pseudo
		case xInc: out4(xAddi,1,cd.arg[0],cd.arg[0]); break;
		case xDec: out4(xAddi,-1,cd.arg[0],cd.arg[0]); break;
		case xClr: out4(xXor,cd.arg[0],cd.arg[0],cd.arg[0]); break;
		case xMov: out4(xAddi,0,cd.arg[0],cd.arg[1]); break;
		}
	}
}

void outData(void){
	int i;
	printf("%d %d\n",dstart,dp-dstart);	// header: start len
	for(i = dstart; i < dp; i++)
		printf("%d\n",mem[i]);
}

void outobj(void){
	printf("%d\n",MAGIC);
	outCode();
	outData();
}

void asm2(void){
	FI = stdin;
	doCode();
	doData();
	fclose(FI);
	backPatch();
//	dumpPatchList();
//	dumpSym();
	outobj();
}

int main(void){
//	testlex();
	init();
	asm2();
	return 0;
}

// ----------- tester -------------

/*
void pre(void){
	int tok, oldline;
	FI = fopen("test.txt","r");
	oldline = line;
	tok = lex(FI);
	while( tok != tkEOF ){
		if(oldline != line){
			printf("\n");
			oldline = line;
		}
		printf("%s ",tokstring);
		tok = lex(FI);
	}
	fclose(FI);
}
*/


