;  add array  terminate with 0 
;  show how to access array
;  use all cores (4x)
;    P. Chongstitvatana  29 Dec 2012

;  i = 0; s = 0;
;  while ax[i] != 0
;     s = s + ax[i]
;     i++

;  we do four elements at once, walking across array
;    by 4 at each step.  assuming the array is padded
;    with zero (until address mod 4). when all cores 
;    detect zero they will stop.  the last step is to
;    sum the result of each core together.  "bc" is used
;    to send result from each core to the other.


; r1 s, r2 i, r3 ax[i], r5 &ax
; r6 t1, r7 t2, r8 t3


  ld 0 @100
  ld 1 @101
  ld 2 @102
  ld 3 @103    
  ldr 2         ; i = 0,1,2,3
  clr 1		; s = 0
  addi 5 5 #105    ; base &ax
:loop
  ldx 0	5 2	; get ax[i] to all cores
  ldx 1 5 2
  ldx 2 5 2
  ldx 3 5 2
  ldr 3		; to r3
  jz 3 @exit	; ax[i] == 0 ?
  add 1 1 3	; s += ax[i]
  addi 2 2 #4	; i += 4
  jmp @loop
:exit
  str 1
  bc 6 0
  bc 7 1
  bc 8 2        ; broadcast s[0],s[1],s[2]
  add 1 1 6
  add 1 1 7
  add 1 1 8	; result in core3 R[1]
  str 1
  st 3 @104     ; store result
  sys 4
.end

@100		
0 1 2 3         ; initial i 0,1,2,3
0               ; result
                ; ax[.]
1 2 3 4 5 6 7 0 
0 0 0 0         ; pad zero
.end
