We start with simple examples of simple sequences of instructions
      register to register
  addi x2,x0,11
          add  x3,x2,x0
      
      register to/from memory
  addi x5,x0,0x11        #
        set x5 to 0x11
          sw x5, 0x100(x0)       # store at
        address 0x100
          lw x6, 0x100(x0)       # get from
        mem
          addi
        x6,x6,1           
          sw x6, 0x104(x0)       # store to
        mem 0x104
  a = b + c
       
      local variables are stored in registers
        let x4 - a, x5 -b, x6 - c
      
        add x4, x5, x6
      
      global variables are stored in memory
        let M[0x100] - M[a], M[0x104] - M[b], M[0x108] - M[c]
        let x4 - a, x5 - b, x6 - c
      
      must move data from memory to register before perform addition
      
        lw  x5, 0x104(x0)     # get b
          lw  x6, 0x108(x0)     # get c
          add x4, x5, x6
          sw  x4, 0x100(x0)     # store to a
using offset,  base address points to the beginning of the array
      
         ax[20]    ax starts at 0x100  base
      address, size 20 bytes
      
         to get an element at ax[1], each element is 32-bit
         let x3 - index, x4 - base, x5 - effective address, x5 - value
      
         addi x3,x0,4        
        # offset to index 1, 4 bytes
           addi x4,x0,0x100     # base
           add 
        x5,x3,x4        # compute effective
        address
           lw  
        x6,0(x5)        # get from memory
while condition
         body
is transformed into sequences of instructions
loop:
        test condition
        jump if false to exit
        body
        jump loop 
      exit:
example
i = 0
        while i < 10
          i = i + 1
      
      let x3 - i
      
         addi x3,x0,0    # i = 0
           addi x4,x0,10   # const 10
        loop:
           bge  x3,x4, exit
           addi x3,x3,1
           j loop
        exit:
sum all elements of an array
pseudo code
ax[5]
        
        s = 0
        i = 0
        while i < 5
          s = s + ax[i]
          i = i + 1
array ax contains 5 integers (each 32-bits) hence has the size 20 bytes
let x3 - s, x4 - i, x5 - const 5, x6 - base, x7 - effective address, 
      x8 - offset, x9 - ax[i]
      
        addi x3,x0,0       #  s =
        0
          addi x4,x0,0       #  i = 0
          addi x5,x0,5       #  const 5
          addi x6,x0,0x100   #  base address of ax[]
          addi x8,x0,0       #  offset =
        0
        loop:
          bge x4, x5, exit
          add x7, x6, x8    # compute effective address 
          lw  x9, 0(x7)     # get ax[i]
          add x3, x3, x9    # s = s + ax[i]
          addi x8, x8, 4    # next element, offset 4 bytes
          addi x4, x4, 1    # increment index
          j loop
        exit:
      use "godbolt.org" (Compiler
      Explorer)  with "RISC_V rv32gc clang" compiler
      
      the first simple program (C)
      
      int a,b,c;
        
        int main(){
          a = 11;
        }
      
      risc-v assembly output, we put a at address 0x100
      
        li     a1, 0x0100
          li     a0, 11
          sw     a0, 0(a1)
      
      second program: if then else
      
      int a,b,c;
        
        int main(){
          if(a > 0){
            b = 22;
          }
        }
      
      risc-v assembly output, a at 0x100, b at 0x104
      
         li     a1, 0x100
           lw     a1, 0(a1)
           bge    a0, a1, LBB0_2
           li     a1, 0x104
           li     a0, 22
           sw     a0, 0(a1)
        LBB0_2:
      
      call a function
      
      to perform a "jump" to subroutine, we need a way to "come back" to where
      we call.
      use a register to store "return address"
int twice(int x){
        return( x + x);
      }
      
      int main(){
        int a;  
        a = twice(2);
      } j main
       twice:                             
      # @twice
              addi    sp, sp,
      -16          # create stack
      frame with 4 slots
             
      sw      ra,
      12(sp)           # first
      slot keeps return address
             
      sw      s0,
      8(sp)            #
      second slot keeps s0
              addi    s0, sp,
      16           # set s0 to
      this stack frame
             
      sw      a0,
      -12(s0)          # store
      passing value (x) to slot 4
             
      lw      a0,
      -12(s0)          # get x
              add     a0,
      a0, a0           # x +
      x   return value in a0
             
      lw      ra,
      12(sp)           #
      restore return address 
             
      lw      s0,
      8(sp)            
              addi    sp, sp,
      16           # delete
      stack frame
              ret
      main:                                
      # @main
              addi    sp, sp,
      -16           # stack
      frame has 4 slots
             
      sw      ra,
      12(sp)            
             
      sw      s0,
      8(sp)                     
      
        addi    s0,
      sp, 16
             
      li      a0,
      2                
      # pass number 2 in a0
              call   
      twice                
      # call twice  ***
             
      sw      a0,
      -12(s0)           # put
      return value to a
             
      li      a0, 0
             
      lw      ra,
      12(sp)            
      
             
      lw      s0,
      8(sp)            
      
              addi    sp, sp,
      16            #
      restore spstack frame
sp -> 1) return address   <- s0
            2) old s0
            3) ...
            4) x
      sp' ->Before the function ends, its stack frame is deleted, by increment sp, then the function return (using return address). Please note that the main function also has its own stack frame.
*** Note: For "call twice" (which translate into two
      instructions and use "large address") you can use "jal ra,twice"
      which is the same as call but "twice" is "small address". The "ret"
      is actually "jalr x0,ra,0" which use "ra" that stored the
      return address.
end
last update 26 Feb 2022