jmp word [cs:bx]
table:
- dw emu ; 0
- dw quit
- dw xkbd@
- dw xnum
- dw xjmp
- dw xcall ; 5
- dw xinc
- dw xdec
- dw xdup
- dw xdrop
- dw xif ; 10
- dw xret
- dw xc@
- dw xc!
- dw xpush
- dw xpop ; 15
- dw 0 ; 16 - unused instruction
- dw xrot
- dw xdisk@
- dw xdisk!
- dw x@ ; 20
- dw x!
- dw xover
- dw xswap
- dw xplus
- dw xminus ; 25
- dw xmul
- dw xdiv
- dw xgreat
- dw xless ; 29
- dw xnot ; 30
- dw xi ; 31
- dw xcprt@ ; 32
- dw xcprt! ; 33
- dw xi2 ; 34
- dw xi3 ; 35
- dw xshl ; 36
- dw xshr ; 37
- dw lor ; 38
- dw lxor ; 39
- dw xvidmap ; 40
- dw xmouse@ ; 41
- dw xvidput ; 42
- dw xcmove ; 43
- dw xcfill ; 44
- dw xtvidput ; 45
- dw xdep ; 46
- dw xcharput ; 47
+ dw emu ; 0 - nop (handled as emu loop entry)
+ dw quit ; 1 - halt
+ dw op_02_kbd@ ; 2 - read keyboard scancode
+ dw op_03_xnum ; 3 - push literal number
+ dw op_04_xjmp ; 4 - unconditional jump
+ dw op_05_xcall ; 5 - call subroutine
+ dw op_06_xinc ; 6 - increment TOS
+ dw op_07_xdec ; 7 - decrement TOS
+ dw op_08_xdup ; 8 - duplicate TOS
+ dw op_09_xdrop ; 9 - drop TOS
+ dw op_10_xif ; 10 - conditional jump if zero
+ dw op_11_xret ; 11 - return from subroutine
+ dw op_12_xc@ ; 12 - fetch byte
+ dw op_13_xc! ; 13 - store byte
+ dw op_14_xpush ; 14 - data stack to return stack
+ dw op_15_xpop ; 15 - return stack to data stack
+ dw 0 ; 16 - unused instruction
+ dw op_17_xrot ; 17 - rotate top 3
+ dw op_18_xdisk@ ; 18 - load sector from disk
+ dw op_19_xdisk! ; 19 - save sector to disk
+ dw op_20_x@ ; 20 - fetch 32-bit
+ dw op_21_x! ; 21 - store 32-bit
+ dw op_22_xover ; 22 - copy second to top
+ dw op_23_xswap ; 23 - swap top two
+ dw op_24_xplus ; 24 - add
+ dw op_25_xminus ; 25 - subtract
+ dw op_26_xmul ; 26 - multiply (signed)
+ dw op_27_xdiv ; 27 - divide (signed)
+ dw op_28_xgreat ; 28 - greater than
+ dw op_29_xless ; 29 - less than
+ dw op_30_xnot ; 30 - bitwise NOT
+ dw op_31_xi ; 31 - copy return stack TOS
+ dw op_32_xcprt@ ; 32 - read I/O port
+ dw op_33_xcprt! ; 33 - write I/O port
+ dw op_34_xi2 ; 34 - copy return stack 2nd
+ dw op_35_xi3 ; 35 - copy return stack 3rd
+ dw op_36_xshl ; 36 - shift left
+ dw op_37_xshr ; 37 - shift right
+ dw op_38_lor ; 38 - bitwise OR
+ dw op_39_lxor ; 39 - bitwise XOR
+ dw op_40_xvidmap ; 40 - map image to VESA video
+ dw op_41_xmouse@ ; 41 - read mouse state
+ dw op_42_xvidput ; 42 - blit image to image
+ dw op_43_xcmove ; 43 - copy memory bytes
+ dw op_44_xcfill ; 44 - fill memory with byte
+ dw op_45_xtvidput ; 45 - transparent blit
+ dw op_46_xdep ; 46 - data stack depth
+ dw op_47_xcharput ; 47 - render character glyph
include 'opcodes_00_09.inc'
; Opcode 0 (xnop) is handled inline as the emu loop entry point.
; Opcode 1 (xhalt/quit) is handled in the main emulator file.
-xkbd@: call KB_read
- sub edi, 4
- mov [es:edi], dl
-
- mov ah, 0bh ; check for key in keyboard buffer
- int 21h
- cmp al, 0h
- je emu
- mov ah, 0 ; read key
- int 16h
- jmp emu
-
-xnum: mov edx, dword [es:esi]
- sub edi, 4
- mov [es:edi], edx
- add esi, 4
- jmp emu
-
-xjmp: mov esi, dword [es:esi]
- add esi, [xms_addr]
- jmp emu
-
-xcall: mov edx, dword [es:esi]
- mov eax, [resp]
- sub eax, 4
- mov ebx, esi
- add ebx, 4
- sub ebx, [xms_addr]
- mov [es:eax], ebx
- mov [resp], eax
- mov esi, edx
- add esi, [xms_addr]
- jmp emu
-
-xinc: ; Increment: ( n -- n+1 )
+op_02_kbd@: ; Read Keyboard: ( -- scancode )
+ call KB_read ; read scancode from keyboard ring buffer into dl
+ sub edi, 4 ; make room on data stack for one element
+ mov [es:edi], dl ; push scancode onto data stack
+
+ mov ah, 0bh ; DOS: check if key waiting in keyboard buffer
+ int 21h ; call DOS interrupt
+ cmp al, 0h ; al=0 means no key waiting
+ je emu ; if no key waiting, return to emulation loop
+ mov ah, 0 ; BIOS: read key from keyboard buffer
+ int 16h ; call BIOS interrupt (consume the key)
+ jmp emu ; return to emulation loop
+
+op_03_xnum: ; Push Literal: ( -- n )
+ mov edx, dword [es:esi] ; read 32-bit literal from instruction stream
+ sub edi, 4 ; make room on data stack for one element
+ mov [es:edi], edx ; push the literal value onto data stack
+ add esi, 4 ; advance instruction pointer past the 32-bit literal
+ jmp emu ; return to emulation loop
+
+op_04_xjmp: ; Unconditional Jump: ( -- )
+ mov esi, dword [es:esi] ; read 32-bit target address from instruction stream
+ add e si, [xms_addr] ; convert virtual address to physical address
+ jmp emu ; return to emulation loop
+
+op_05_xcall: ; Call Subroutine: ( -- ) R:( -- ret-addr )
+ mov edx, dword [es:esi] ; read 32-bit target address from instruction stream
+ mov eax, [resp] ; load return stack pointer
+ sub eax, 4 ; make room on return stack for one element
+ mov ebx, esi ; copy current instruction pointer
+ add ebx, 4 ; advance past the 32-bit argument (return address)
+ sub ebx, [xms_addr] ; convert physical address to virtual address
+ mov [es:eax], ebx ; push return address onto return stack
+ mov [resp], eax ; store updated return stack pointer
+ mov esi, edx ; set instruction pointer to target address
+ add esi, [xms_addr] ; convert virtual address to physical address
+ jmp emu ; return to emulation loop
+
+op_06_xinc: ; Increment: ( n -- n+1 )
; Increments the top of the data stack by 1.
- inc dword [es:edi]
- jmp emu
-
+ inc dword [es:edi] ; increment the top of the data stack in place
+ jmp emu ; return to emulation loop
-xdec: ; Decrement: ( n -- n-1 )
+op_07_xdec: ; Decrement: ( n -- n-1 )
;
; Decrements the top of the data stack by 1.
;
; Memory layout after execution:
; [es:edi] = n-1
- dec dword [es:edi] ; decrement the top of the stack
- jmp emu
+ dec dword [es:edi] ; decrement the top of the data stack in place
+ jmp emu ; return to emulation loop
-
-xdup: ; Duplicate: ( n -- n n )
+op_08_xdup: ; Duplicate: ( n -- n n )
;
; Duplicates the top element of the data stack.
;
; [es:edi] = n (new top)
; [es:edi+4] = n (original top)
- mov eax, [es:edi] ; copy top element to eax
- sub edi, 4 ; move stack pointer down to make space
- mov [es:edi], eax ; push copy of top element onto stack
- jmp emu
-
+ mov eax, [es:edi] ; copy top element to eax
+ sub edi, 4 ; make room on data stack for one element
+ mov [es:edi], eax ; push copy of top element onto stack
+ jmp emu ; return to emulation loop
-xdrop: ; Drop: ( n -- )
+op_09_xdrop: ; Drop: ( n -- )
;
; Removes the top element from the data stack.
;
; Memory layout after execution:
; Stack pointer is adjusted; n is no longer on stack
- add edi, 4 ; pop top element off the stack
- jmp emu
+ add edi, 4 ; pop top element off the stack
+ jmp emu ; return to emulation loop
; Opcodes 10-19: if, ret, c@, c!, push, pop, (unused), rot, disk@, disk!
-xif: mov eax, [es:edi]
- add edi, 4
- cmp eax, 0
- jne l2
- mov esi, [es:esi]
- add esi, [xms_addr]
- jmp emu
+op_10_xif: ; Conditional Jump: ( flag -- )
+ mov eax, [es:edi] ; eax = flag (top of data stack)
+ add edi, 4 ; pop flag off the data stack
+ cmp eax, 0 ; test if flag is zero
+ jne l2 ; if flag is non-zero, skip the jump
+ mov esi, [es:esi] ; read 32-bit target address from instruction stream
+ add esi, [xms_addr] ; convert virtual address to physical address
+ jmp emu ; return to emulation loop
l2:
- add esi, 4
- jmp emu
+ add esi, 4 ; skip past the 32-bit jump target in instruction stream
+ jmp emu ; return to emulation loop
-xret: mov eax, [resp]
- mov esi, [es:eax]
- add esi, [xms_addr]
- add eax, 4
- mov [resp], eax
- jmp emu
+op_11_xret: ; Return: ( -- ) R:( ret-addr -- )
+ mov eax, [resp] ; load return stack pointer
+ mov esi, [es:eax] ; pop return address from return stack
+ add esi, [xms_addr] ; convert virtual address to physical address
+ add eax, 4 ; adjust return stack pointer (pop)
+ mov [resp], eax ; store updated return stack pointer
+ jmp emu ; return to emulation loop
-xc@: mov eax, [es:edi]
- add eax, [xms_addr]
- sub ecx, ecx
- mov cl, [es:eax]
- mov [es:edi], ecx
- jmp emu
+op_12_xc@: ; Fetch Byte: ( addr -- byte )
+ mov eax, [es:edi] ; eax = virtual address from top of data stack
+ add eax, [xms_addr] ; convert virtual address to physical address
+ sub ecx, ecx ; clear ecx (zero-extend the byte)
+ mov cl, [es:eax] ; read one byte from memory into cl
+ mov [es:edi], ecx ; replace address on stack with the fetched byte value
+ jmp emu ; return to emulation loop
-xc!: ;( n addr -- )
- mov ebx, [es:edi]
- add edi, 4
- mov ecx, [es:edi]
- add edi, 4
- add ebx, [xms_addr]
- mov [es:ebx], cl
- jmp emu
+op_13_xc!: ; Store Byte: ( byte addr -- )
+ mov ebx, [es:edi] ; ebx = virtual address (top of stack)
+ add edi, 4 ; pop address off the data stack
+ mov ecx, [es:edi] ; ecx = byte value (second on stack, only cl used)
+ add edi, 4 ; pop byte value off the data stack
+ add ebx, [xms_addr] ; convert virtual address to physical address
+ mov [es:ebx], cl ; store low byte of ecx to memory
+ jmp emu ; return to emulation loop
-xpush: mov ebx, [es:edi]
- add edi, 4
- mov eax, [resp]
- sub eax, 4
- mov [es:eax], ebx
- mov [resp], eax
- jmp emu
+op_14_xpush: ; Push to Return Stack: ( n -- ) R:( -- n )
+ mov ebx, [es:edi] ; ebx = top of data stack
+ add edi, 4 ; pop value off the data stack
+ mov eax, [resp] ; load return stack pointer
+ sub eax, 4 ; make room on return stack for one element
+ mov [es:eax], ebx ; push value onto return stack
+ mov [resp], eax ; store updated return stack pointer
+ jmp emu ; return to emulation loop
-xpop: mov eax, [resp]
- mov ebx, [es:eax]
- add eax, 4
- mov [resp], eax
- sub edi, 4
- mov [es:edi], ebx
- jmp emu
+op_15_xpop: ; Pop from Return Stack: ( -- n ) R:( n -- )
+ mov eax, [resp] ; load return stack pointer
+ mov ebx, [es:eax] ; read value from top of return stack
+ add eax, 4 ; adjust return stack pointer (pop)
+ mov [resp], eax ; store updated return stack pointer
+ sub edi, 4 ; make room on data stack for one element
+ mov [es:edi], ebx ; push value onto data stack
+ jmp emu ; return to emulation loop
-xrot: mov ebx, [es:edi]
- mov ecx, [es:edi+4]
- mov edx, [es:edi+8]
- mov [es:edi+8], ecx
- mov [es:edi+4], ebx
- mov [es:edi], edx
- jmp emu
+op_17_xrot: ; Rotate: ( a b c -- b c a )
+ mov ebx, [es:edi] ; ebx = c (top of stack)
+ mov ecx, [es:edi+4] ; ecx = b (second on stack)
+ mov edx, [es:edi+8] ; edx = a (third on stack)
+ mov [es:edi+8], ecx ; third slot = b
+ mov [es:edi+4], ebx ; second slot = c
+ mov [es:edi], edx ; top slot = a
+ jmp emu ; return to emulation loop
-xdisk@: mov ebx, [es:edi]
- add ebx, [xms_addr]
- mov ecx, [es:edi+4]
- add edi, 8
- call diskload ; ecx-fromdisk ebx-tomem
- jmp emu
+op_18_xdisk@: ; Disk Load: ( sector mem -- )
+ mov ebx, [es:edi] ; ebx = destination memory address (virtual)
+ add ebx, [xms_addr] ; convert virtual address to physical address
+ mov ecx, [es:edi+4] ; ecx = sector number
+ add edi, 8 ; pop both arguments off the data stack
+ call diskload ; load 1024 bytes: ecx=sector, ebx=destination
+ jmp emu ; return to emulation loop
-xdisk!: mov ecx, [es:edi]
- call file_seek
- mov ecx, 1024
- mov ebx, [es:edi+4]
- add edi, 8
- add ebx, [xms_addr]
- sub edx, edx
- mov dx, cs
- shl edx, 4
- add edx, buf
- call memmove ; ebx - from, edx - to, ecx - amount
- mov ah, 40h
- mov bx, [fileh]
- mov cx, 1024
- mov dx, buf
- int 21h
- jmp emu
+op_19_xdisk!: ; Disk Save: ( mem sector -- )
+ mov ecx, [es:edi] ; ecx = sector number (top of stack)
+ call file_seek ; seek to the correct position in disk file
+ mov ecx, 1024 ; number of bytes to write
+ mov ebx, [es:edi+4] ; ebx = source memory address (virtual)
+ add edi, 8 ; pop both arguments off the data stack
+ add ebx, [xms_addr] ; convert virtual address to physical address
+ sub edx, edx ; clear edx
+ mov dx, cs ; edx = code segment
+ shl edx, 4 ; convert segment to linear address
+ add edx, buf ; edx = physical address of temporary buffer
+ call memmove ; copy from XMS to temporary buffer
+ mov ah, 40h ; DOS: write to file
+ mov bx, [fileh] ; file handle
+ mov cx, 1024 ; number of bytes to write
+ mov dx, buf ; source buffer address
+ int 21h ; call DOS interrupt
+ jmp emu ; return to emulation loop
; Opcodes 20-29: @, !, over, swap, plus, minus, mul, div, great, less
-x@: mov eax, [es:edi]
- add eax, [xms_addr]
- mov eax, [es:eax]
- mov [es:edi], eax
- jmp emu
+op_20_x@: ; Fetch 32-bit: ( addr -- n )
+ mov eax, [es:edi] ; eax = virtual address from top of data stack
+ add eax, [xms_addr] ; convert virtual address to physical address
+ mov eax, [es:eax] ; read 32-bit value from memory
+ mov [es:edi], eax ; replace address on stack with the fetched value
+ jmp emu ; return to emulation loop
-x!: ;( n addr -- )
- mov eax, [es:edi]
- add eax, [xms_addr]
- mov ecx, [es:edi+4]
- add edi, 8
- mov [es:eax], ecx
- jmp emu
+op_21_x!: ; Store 32-bit: ( n addr -- )
+ mov eax, [es:edi] ; eax = virtual address (top of stack)
+ add eax, [xms_addr] ; convert virtual address to physical address
+ mov ecx, [es:edi+4] ; ecx = value to store (second on stack)
+ add edi, 8 ; pop both arguments off the data stack
+ mov [es:eax], ecx ; store 32-bit value to memory
+ jmp emu ; return to emulation loop
-xover: mov ebx, [es:edi+4]
- sub edi, 4
- mov [es:edi], ebx
- jmp emu
+op_22_xover: ; Over: ( a b -- a b a )
+ mov ebx, [es:edi+4] ; ebx = a (second element on stack)
+ sub edi, 4 ; make room on data stack for one element
+ mov [es:edi], ebx ; push copy of a onto top of stack
+ jmp emu ; return to emulation loop
-xswap: mov ebx, [es:edi]
- xchg ebx, [es:edi+4]
- mov [es:edi], ebx
- jmp emu
+op_23_xswap: ; Swap: ( a b -- b a )
+ mov ebx, [es:edi] ; ebx = b (top of stack)
+ xchg ebx, [es:edi+4] ; exchange ebx with a (second on stack); ebx=a, [edi+4]=b
+ mov [es:edi], ebx ; top of stack = a
+ jmp emu ; return to emulation loop
-xplus: mov ebx, [es:edi]
- add edi, 4
- add [es:edi], ebx
- jmp emu
+op_24_xplus: ; Add: ( n1 n2 -- n1+n2 )
+ mov ebx, [es:edi] ; ebx = n2 (the addend — top of stack)
+ add edi, 4 ; pop n2 off the stack; n1 is now the new top
+ add [es:edi], ebx ; [es:edi] = n1 + n2 (add n2 to n1 in place)
+ jmp emu ; return to emulation loop
-
-xminus:
-; Subtract: ( n1 n2 -- n1-n2 )
+op_25_xminus: ; Subtract: ( n1 n2 -- n1-n2 )
;
; Pops the top two elements from the data stack, subtracts the top
; element (n2) from the second element (n1), and pushes the result.
; Memory layout after execution:
; [es:edi] = n1 - n2 (edi has been adjusted; old n2 slot is freed)
- mov ebx, [es:edi] ; ebx = n2 (the subtrahend — value to subtract)
- add edi, 4 ; pop n2 off the stack; n1 is now the new top
- sub [es:edi], ebx ; [es:edi] = n1 - n2 (subtract n2 from n1 in place)
- jmp emu
-
+ mov ebx, [es:edi] ; ebx = n2 (the subtrahend — value to subtract)
+ add edi, 4 ; pop n2 off the stack; n1 is now the new top
+ sub [es:edi], ebx ; [es:edi] = n1 - n2 (subtract n2 from n1 in place)
+ jmp emu ; return to emulation loop
-xmul:
- mov eax, [es:edi]
- add edi, 4
- sub edx, edx
- imul dword [es:edi]
- mov [es:edi], eax
- jmp emu
+op_26_xmul: ; Multiply (signed): ( n1 n2 -- n1*n2 )
+ mov eax, [es:edi] ; eax = n2 (top of stack)
+ add edi, 4 ; pop n2 off the stack; n1 is now the new top
+ sub edx, edx ; clear edx before signed multiply
+ imul dword [es:edi] ; edx:eax = eax * n1 (signed multiply)
+ mov [es:edi], eax ; store low 32 bits of result as new top of stack
+ jmp emu ; return to emulation loop
-xdiv: add edi, 4
- mov eax, [es:edi]
- cdq
- idiv dword [es:edi-4]
- mov [es:edi], eax
- jmp emu
+op_27_xdiv: ; Divide (signed): ( n1 n2 -- n1/n2 )
+ add edi, 4 ; pop n2; n1 is now the new top (but we read n2 from edi-4)
+ mov eax, [es:edi] ; eax = n1 (the dividend)
+ cdq ; sign-extend eax into edx:eax for signed division
+ idiv dword [es:edi-4] ; eax = edx:eax / n2 (signed divide)
+ mov [es:edi], eax ; store quotient as new top of stack
+ jmp emu ; return to emulation loop
-xgreat: mov eax, [es:edi]
- add edi, 4
- mov edx, 0
- cmp [es:edi], eax
- jng l3
- dec edx
-l3: mov [es:edi], edx
- jmp emu
+op_28_xgreat: ; Greater Than: ( a b -- flag )
+ mov eax, [es:edi] ; eax = b (top of stack)
+ add edi, 4 ; pop b off the stack; a is now the new top
+ mov edx, 0 ; edx = 0 (default: false)
+ cmp [es:edi], eax ; compare a with b
+ jng l3 ; if a is NOT greater than b, skip setting flag
+ dec edx ; edx = -1 (true flag, all bits set)
+l3: mov [es:edi], edx ; store flag as new top of stack
+ jmp emu ; return to emulation loop
-xless: mov eax, [es:edi]
- add edi, 4
- mov edx, 0
- cmp [es:edi], eax
- jnl l4
- dec edx
-l4: mov [es:edi], edx
- jmp emu
+op_29_xless: ; Less Than: ( a b -- flag )
+ mov eax, [es:edi] ; eax = b (top of stack)
+ add edi, 4 ; pop b off the stack; a is now the new top
+ mov edx, 0 ; edx = 0 (default: false)
+ cmp [es:edi], eax ; compare a with b
+ jnl l4 ; if a is NOT less than b, skip setting flag
+ dec edx ; edx = -1 (true flag, all bits set)
+l4: mov [es:edi], edx ; store flag as new top of stack
+ jmp emu ; return to emulation loop
; Opcodes 30-39: not, i, cprt@, cprt!, i2, i3, shl, shr, or, xor
-xnot: not dword [es:edi]
- jmp emu
-
-xi: mov ebx, [resp]
- mov eax, [es:ebx]
- sub edi, 4
- mov [es:edi], eax
- jmp emu
-
-xcprt@: mov dx, [es:edi]
- in al, dx
- sub ecx, ecx
- mov cl, al
- mov [es:edi], ecx
- jmp emu
-
-xcprt!: mov dx, [es:edi]
- mov al, [es:edi+4]
- add edi, 8
- out dx, al
- jmp emu
-
-xi2: mov ebx, [resp]
- mov eax, [es:ebx+4]
- sub edi, 4
- mov [es:edi], eax
- jmp emu
-
-xi3: mov ebx, [resp]
- mov eax, [es:ebx+8]
- sub edi, 4
- mov [es:edi], eax
- jmp emu
-
-xshl: mov cl, [es:edi]
- add edi, 4
- shl dword [es:edi], cl
- jmp emu
-
-xshr: mov cl, [es:edi]
- add edi, 4
- shr dword [es:edi], cl
- jmp emu
-
-lor: mov eax, [es:edi]
- add edi, 4
- or [es:edi], eax
- jmp emu
-
-lxor: mov eax, [es:edi]
- add edi, 4
- xor [es:edi], eax
- jmp emu
+op_30_xnot: ; Bitwise NOT: ( n -- ~n )
+ not dword [es:edi] ; invert all bits of top of data stack in place
+ jmp emu ; return to emulation loop
+
+op_31_xi: ; Copy Return Stack TOS: ( -- n ) R:( n -- n )
+ mov ebx, [resp] ; ebx = return stack pointer
+ mov eax, [es:ebx] ; eax = value at top of return stack
+ sub edi, 4 ; make room on data stack for one element
+ mov [es:edi], eax ; push copy of return stack top onto data stack
+ jmp emu ; return to emulation loop
+
+op_32_xcprt@: ; Read I/O Port: ( port -- byte )
+ mov dx, [es:edi] ; dx = port number from top of data stack
+ in al, dx ; read one byte from the I/O port
+ sub ecx, ecx ; clear ecx (zero-extend the byte)
+ mov cl, al ; cl = byte read from port
+ mov [es:edi], ecx ; replace port number on stack with the read byte
+ jmp emu ; return to emulation loop
+
+op_33_xcprt!: ; Write I/O Port: ( byte port -- )
+ mov dx, [es:edi] ; dx = port number (top of stack)
+ mov al, [es:edi+4] ; al = byte value to write (second on stack)
+ add edi, 8 ; pop both arguments off the data stack
+ out dx, al ; write byte to the I/O port
+ jmp emu ; return to emulation loop
+
+op_34_xi2: ; Copy Return Stack 2nd: ( -- n )
+ mov ebx, [resp] ; ebx = return stack pointer
+ mov eax, [es:ebx+4] ; eax = second element on return stack
+ sub edi, 4 ; make room on data stack for one element
+ mov [es:edi], eax ; push value onto data stack
+ jmp emu ; return to emulation loop
+
+op_35_xi3: ; Copy Return Stack 3rd: ( -- n )
+ mov ebx, [resp] ; ebx = return stack pointer
+ mov eax, [es:ebx+8] ; eax = third element on return stack
+ sub edi, 4 ; make room on data stack for one element
+ mov [es:edi], eax ; push value onto data stack
+ jmp emu ; return to emulation loop
+
+op_36_xshl: ; Shift Left: ( n count -- n<<count )
+ mov cl, [es:edi] ; cl = shift count (top of stack, only low byte used)
+ add edi, 4 ; pop count off the stack; n is now the new top
+ shl dword [es:edi], cl ; shift n left by cl bits in place
+ jmp emu ; return to emulation loop
+
+op_37_xshr: ; Shift Right: ( n count -- n>>count )
+ mov cl, [es:edi] ; cl = shift count (top of stack, only low byte used)
+ add edi, 4 ; pop count off the stack; n is now the new top
+ shr dword [es:edi], cl ; shift n right by cl bits in place (unsigned)
+ jmp emu ; return to emulation loop
+
+op_38_lor: ; Bitwise OR: ( a b -- a|b )
+ mov eax, [es:edi] ; eax = b (top of stack)
+ add edi, 4 ; pop b off the stack; a is now the new top
+ or [es:edi], eax ; [es:edi] = a OR b (bitwise OR in place)
+ jmp emu ; return to emulation loop
+
+op_39_lxor: ; Bitwise XOR: ( a b -- a^b )
+ mov eax, [es:edi] ; eax = b (top of stack)
+ add edi, 4 ; pop b off the stack; a is now the new top
+ xor [es:edi], eax ; [es:edi] = a XOR b (bitwise XOR in place)
+ jmp emu ; return to emulation loop
; Opcodes 40-47: vidmap, mouse@, vidput, cmove, cfill, tvidput, dep, charput
-xvidmap:
- mov edx, [es:edi]
- add edx, [xms_addr]
- add edi, 4
- push edi
- push esi
- push 0a000h
- pop es
- mov word [ds:gra], 0
- push 0
- pop ds
- mov esi, edx
-mapl1: mov dx, [cs:gra]
- xor bx, bx
- mov ax, 4f05h
- int 10h
- mov edi, 0
- mov cx, 4096
-; mov cx, 16384
-mapl2: mov eax, [ds:esi]
- add esi, 4
- stosd
- loop mapl2
- inc word [cs:gra]
-; cmp word [cs:gra], 5
- cmp word [cs:gra], 19
- jne mapl1
- push 0
- pop es
- push cs
- pop ds
- pop esi
- pop edi
- jmp emu
-gra dw 0
+op_40_xvidmap: ; Video Map: ( addr -- )
+ mov edx, [es:edi] ; edx = virtual address of image buffer
+ add edx, [xms_addr] ; convert virtual address to physical address
+ add edi, 4 ; pop address off the data stack
+ push edi ; save data stack pointer
+ push esi ; save instruction pointer
+ push 0a000h ; push video memory segment
+ pop es ; es = video memory segment (A000h)
+ mov word [ds:gra], 0 ; reset VESA granule counter to 0
+ push 0 ; push zero segment
+ pop ds ; ds = 0 (flat memory access)
+ mov esi, edx ; esi = source address (image buffer in XMS)
+mapl1:
+ mov dx, [cs:gra] ; dx = current VESA granule number
+ xor bx, bx ; bx = 0 (window A)
+ mov ax, 4f05h ; VESA: set memory window position
+ int 10h ; call VESA BIOS
+ mov edi, 0 ; edi = 0 (start of video window)
+ mov cx, 4096 ; cx = number of dwords to copy (4096 * 4 = 16384 bytes)
+mapl2:
+ mov eax, [ds:esi] ; read 4 bytes from source image buffer
+ add esi, 4 ; advance source pointer by 4 bytes
+ stosd ; write 4 bytes to video memory, advance edi
+ loop mapl2 ; repeat for all 4096 dwords in this granule
+ inc word [cs:gra] ; advance to next VESA granule
+ cmp word [cs:gra], 19 ; check if all 19 granules are done (19*16384=311296>=307200)
+ jne mapl1 ; if not done, process next granule
+ push 0 ; push zero segment
+ pop es ; restore es = 0
+ push cs ; push code segment
+ pop ds ; restore ds = cs
+ pop esi ; restore instruction pointer
+ pop edi ; restore data stack pointer
+ jmp emu ; return to emulation loop
+gra dw 0 ; current VESA granule number
-xmouse@:
- mov ax, 0bh ; read motion counter
- int 33h
- push dx
- sub eax, eax
- mov ax, cx
- cwd
- shl edx, 16
- add edx, eax
- mov [es:edi-4], edx
- pop ax
- cwd
- shl edx, 16
- add edx, eax
- mov [es:edi-8], edx
- mov ax, 3 ; read buttons
- int 33h
- sub eax, eax
- mov ax, bx
- sub edi, 12
- mov [es:edi], eax
- jmp emu
+op_41_xmouse@: ; Read Mouse: ( -- buttons dy dx )
+ mov ax, 0bh ; mouse function: read motion counters
+ int 33h ; call mouse driver; cx=dx_motion, dx=dy_motion
+ push dx ; save dy (vertical motion) on x86 stack
+ sub eax, eax ; clear eax
+ mov ax, cx ; ax = dx_motion (horizontal motion)
+ cwd ; sign-extend ax into dx:ax
+ shl edx, 16 ; shift sign extension to upper 16 bits
+ add edx, eax ; edx = sign-extended 32-bit dx_motion
+ mov [es:edi-4], edx ; store dx (horizontal motion) below current stack top
+ pop ax ; restore dy (vertical motion) into ax
+ cwd ; sign-extend ax into dx:ax
+ shl edx, 16 ; shift sign extension to upper 16 bits
+ add edx, eax ; edx = sign-extended 32-bit dy_motion
+ mov [es:edi-8], edx ; store dy (vertical motion) further below stack top
+ mov ax, 3 ; mouse function: read button status
+ int 33h ; call mouse driver; bx=button state
+ sub eax, eax ; clear eax
+ mov ax, bx ; ax = button state
+ sub edi, 12 ; allocate 3 stack slots (buttons, dy, dx)
+ mov [es:edi], eax ; push button state as bottom of the three new elements
+ jmp emu ; return to emulation loop
-xcmove: mov ecx, [es:edi]
- add edi, 12
- mov edx, [es:edi-8]
- add edx, [xms_addr]
- mov ebx, [es:edi-4]
- add ebx, [xms_addr]
- cmp ecx, 0
- je emu
- cmp ebx, edx
- ja l8
- call memmove2
- jmp emu
-l8: call memmove
- jmp emu
+op_43_xcmove: ; Copy Memory: ( src dst count -- )
+ mov ecx, [es:edi] ; ecx = byte count (top of stack)
+ add edi, 12 ; pop all three arguments off the data stack
+ mov edx, [es:edi-8] ; edx = destination address (virtual)
+ add edx, [xms_addr] ; convert destination virtual address to physical
+ mov ebx, [es:edi-4] ; ebx = source address (virtual)
+ add ebx, [xms_addr] ; convert source virtual address to physical
+ cmp ecx, 0 ; check if count is zero
+ je emu ; if zero bytes, nothing to do
+ cmp ebx, edx ; compare source and destination addresses
+ ja l8 ; if source > dest, copy forward (no overlap issue)
+ call memmove2 ; otherwise copy backward (handles overlapping regions)
+ jmp emu ; return to emulation loop
+l8: call memmove ; copy forward: ebx=from, edx=to, ecx=count
+ jmp emu ; return to emulation loop
-xcfill: mov ecx, [es:edi]
- mov edx, [es:edi+4]
- add edx, [xms_addr]
- mov eax, [es:edi+8]
- add edi, 12
-l9: cmp ecx, 0
- je emu
- mov [es:edx], al
- inc edx
- dec ecx
- jmp l9
+op_44_xcfill: ; Fill Memory: ( byte addr count -- )
+ mov ecx, [es:edi] ; ecx = byte count (top of stack)
+ mov edx, [es:edi+4] ; edx = destination address (virtual, second on stack)
+ add edx, [xms_addr] ; convert virtual address to physical address
+ mov eax, [es:edi+8] ; eax = fill byte value (third on stack, only al used)
+ add edi, 12 ; pop all three arguments off the data stack
+l9:
+ cmp ecx, 0 ; check if remaining count is zero
+ je emu ; if zero, we are done
+ mov [es:edx], al ; store fill byte at current destination
+ inc edx ; advance destination pointer
+ dec ecx ; decrement remaining count
+ jmp l9 ; repeat for next byte
-xdep: sub eax, eax
- mov ax, cs
- shl eax, 4
- add eax, buf
- add eax, 20000
- sub eax, edi
- shr eax, 2
- sub edi, 4
- mov [es:edi], eax
- jmp emu
+op_46_xdep: ; Stack Depth: ( -- depth )
+ sub eax, eax ; clear eax
+ mov ax, cs ; ax = code segment
+ shl eax, 4 ; convert segment to linear address
+ add eax, buf ; eax = linear address of buffer area
+ add eax, 20000 ; eax = base of data stack (stack bottom)
+ sub eax, edi ; eax = number of bytes used on stack
+ shr eax, 2 ; convert bytes to 32-bit cell count (divide by 4)
+ sub edi, 4 ; make room on data stack for one element
+ mov [es:edi], eax ; push stack depth onto data stack
+ jmp emu ; return to emulation loop
include 'vidput.inc'
include 'tvidput.inc'
; Memory layout after execution:
; Destination image updated with source image data (transparent pixels skipped)
-xtvidput:
-mov ebx, edi ; read data from stack, and save it to variables
-mov eax, [es:ebx]
-mov [cory], eax
-add ebx, 4
-mov eax, [es:ebx]
-mov [corx], eax
-
-add ebx, 4
-mov eax, [es:ebx]
-add eax, [xms_addr]
-mov ecx, [es:eax]
-mov [img2x], ecx
-add eax, 4
-mov ecx, [es:eax]
-mov [img2y], ecx
-add eax, 4
-mov [img2a], eax
-
-add ebx, 4
-mov eax, [es:ebx]
-add eax, [xms_addr]
-mov ecx, [es:eax]
-mov [img1x], ecx
-add eax, 4
-mov ecx, [es:eax]
-mov [img1y], ecx
-add eax, 4
-mov [img1a], eax
-
-add ebx, 4
-mov edi, ebx
-
-cmp dword [cory] , 0 ; calculate Y start
-jl tvidl1
-mov dword [starty], 0
-jmp tvidl2
+op_45_xtvidput:
+mov ebx, edi ; ebx = saved data stack pointer for reading arguments
+mov eax, [es:ebx] ; eax = y coordinate
+mov [cory], eax ; store y coordinate
+add ebx, 4 ; advance to next stack slot
+mov eax, [es:ebx] ; eax = x coordinate
+mov [corx], eax ; store x coordinate
+
+add ebx, 4 ; advance to next stack slot
+mov eax, [es:ebx] ; eax = addr2 (destination image virtual address)
+add eax, [xms_addr] ; convert virtual address to physical address
+mov ecx, [es:eax] ; ecx = destination image width
+mov [img2x], ecx ; store destination image width
+add eax, 4 ; advance to height field
+mov ecx, [es:eax] ; ecx = destination image height
+mov [img2y], ecx ; store destination image height
+add eax, 4 ; advance past header to pixel data
+mov [img2a], eax ; store destination image data address
+
+add ebx, 4 ; advance to next stack slot
+mov eax, [es:ebx] ; eax = addr1 (source image virtual address)
+add eax, [xms_addr] ; convert virtual address to physical address
+mov ecx, [es:eax] ; ecx = source image width
+mov [img1x], ecx ; store source image width
+add eax, 4 ; advance to height field
+mov ecx, [es:eax] ; ecx = source image height
+mov [img1y], ecx ; store source image height
+add eax, 4 ; advance past header to pixel data
+mov [img1a], eax ; store source image data address
+
+add ebx, 4 ; advance past all arguments
+mov edi, ebx ; pop all four arguments off the data stack
+
+cmp dword [cory] , 0 ; calculate Y start: is y >= 0?
+jl tvidl1 ; if y < 0, need to clip from top
+mov dword [starty], 0 ; y >= 0: start copying from row 0 of source
+jmp tvidl2
tvidl1:
-mov eax, [cory]
-neg eax
-mov [starty], eax
+mov eax, [cory] ; eax = negative y offset
+neg eax ; eax = number of rows to skip in source
+mov [starty], eax ; store start row (skip clipped rows)
tvidl2:
-cmp dword [corx] , 0 ; calculate X start
-jl tvidl3
-mov dword [startx], 0
-jmp tvidl4
+cmp dword [corx] , 0 ; calculate X start: is x >= 0?
+jl tvidl3 ; if x < 0, need to clip from left
+mov dword [startx], 0 ; x >= 0: start copying from column 0 of source
+jmp tvidl4
tvidl3:
-mov eax, [corx]
-neg eax
-mov [startx], eax
+mov eax, [corx] ; eax = negative x offset
+neg eax ; eax = number of columns to skip in source
+mov [startx], eax ; store start column (skip clipped columns)
tvidl4:
-mov eax, [cory] ; calculate Y end
-add eax, [img1y]
-cmp eax, [img2y]
-jg tvidl5
-mov eax, [img1y]
-mov [endy], eax
+mov eax, [cory] ; calculate Y end: does source extend past destination?
+add eax, [img1y] ; eax = y + source height
+cmp eax, [img2y] ; compare with destination height
+jg tvidl5 ; if exceeds destination, clip
+mov eax, [img1y] ; no clipping needed: end = source height
+mov [endy], eax ; store Y end
jmp tvidl6
tvidl5:
-mov eax, [img2y]
-sub eax, [cory]
-mov [endy], eax
+mov eax, [img2y] ; eax = destination height
+sub eax, [cory] ; eax = destination height - y (max rows that fit)
+mov [endy], eax ; store clipped Y end
tvidl6:
-mov eax, [corx] ; calculate X end
-add eax, [img1x]
-cmp eax, [img2x]
-jg tvidl7
-mov eax, [img1x]
-mov [endx], eax
-jmp tvidl8
+mov eax, [corx] ; calculate X end: does source extend past destination?
+add eax, [img1x] ; eax = x + source width
+cmp eax, [img2x] ; compare with destination width
+jg tvidl7 ; if exceeds destination, clip
+mov eax, [img1x] ; no clipping needed: end = source width
+mov [endx], eax ; store X end
+jmp tvidl8
tvidl7:
-mov eax, [img2x]
-sub eax, [corx]
-mov [endx], eax
+mov eax, [img2x] ; eax = destination width
+sub eax, [corx] ; eax = destination width - x (max columns that fit)
+mov [endx], eax ; store clipped X end
tvidl8:
-mov eax, [endy] ; calculate Y length
-sub eax, [starty]
-cmp eax, 0
-jle emu
-mov [lengthy], eax
-
-mov eax, [endx] ; calculate X length
-sub eax, [startx]
-cmp eax, 0
-jle emu
-mov [lengthx], eax
-
-mov eax, [starty] ; calculate img1 start address
-mov ebx, [img1x]
-sub edx, edx
-mul ebx
-add eax, [img1a]
-add eax, [startx]
-mov [img1start], eax
-
-mov eax, [cory] ; calculate img2 start address
-add eax, [starty]
-mov ebx, [img2x]
-sub edx, edx
-mul ebx
-add eax, [img2a]
-add eax, [corx]
-add eax, [startx]
-mov [img2start], eax
+mov eax, [endy] ; calculate Y length (number of rows to copy)
+sub eax, [starty] ; eax = endy - starty
+cmp eax, 0 ; check if any rows to copy
+jle emu ; if zero or negative, nothing to draw
+mov [lengthy], eax ; store row count
+
+mov eax, [endx] ; calculate X length (number of columns to copy)
+sub eax, [startx] ; eax = endx - startx
+cmp eax, 0 ; check if any columns to copy
+jle emu ; if zero or negative, nothing to draw
+mov [lengthx], eax ; store column count
+
+mov eax, [starty] ; calculate source image start address
+mov ebx, [img1x] ; ebx = source image width (stride)
+sub edx, edx ; clear edx for multiplication
+mul ebx ; eax = starty * source width
+add eax, [img1a] ; add source data base address
+add eax, [startx] ; add starting column offset
+mov [img1start], eax ; store source start address
+
+mov eax, [cory] ; calculate destination image start address
+add eax, [starty] ; eax = y + starty (destination row)
+mov ebx, [img2x] ; ebx = destination image width (stride)
+sub edx, edx ; clear edx for multiplication
+mul ebx ; eax = destination row * destination width
+add eax, [img2a] ; add destination data base address
+add eax, [corx] ; add x coordinate offset
+add eax, [startx] ; add starting column offset
+mov [img2start], eax ; store destination start address
tvidl9:
-mov ebx, [img1start]
-mov ecx, [lengthx]
-mov edx, [img2start]
-
-tmemmove: ; ebx - from, edx - to, ecx - amount
-cmp ecx, 0
-je tl11
-mov al, [es:ebx]
-cmp al, 255
-je tl12
-mov [es:edx], al
+mov ebx, [img1start] ; ebx = current source row address
+mov ecx, [lengthx] ; ecx = number of pixels to copy per row
+mov edx, [img2start] ; edx = current destination row address
+
+tmemmove: ; transparent copy: ebx=from, edx=to, ecx=count
+cmp ecx, 0 ; check if remaining count is zero
+je tl11 ; if zero, done with this row
+mov al, [es:ebx] ; al = source pixel
+cmp al, 255 ; is pixel transparent (0xFF)?
+je tl12 ; if transparent, skip writing to destination
+mov [es:edx], al ; copy non-transparent pixel to destination
tl12:
-inc ebx
-inc edx
-dec ecx
-jmp tmemmove
+inc ebx ; advance source pointer
+inc edx ; advance destination pointer
+dec ecx ; decrement remaining pixel count
+jmp tmemmove ; repeat for next pixel
tl11:
-mov eax, [img1x]
-add [img1start], eax
-mov eax, [img2x]
-add [img2start], eax
-dec dword [lengthy]
-cmp [lengthy], 0
-jg tvidl9
+mov eax, [img1x] ; eax = source image width (stride)
+add [img1start], eax ; advance source address to next row
+mov eax, [img2x] ; eax = destination image width (stride)
+add [img2start], eax ; advance destination address to next row
+dec dword [lengthy] ; decrement remaining row count
+cmp [lengthy], 0 ; check if more rows to copy
+jg tvidl9 ; if yes, copy next row
-jmp emu
+jmp emu ; return to emulation loop
; Memory layout after execution:
; Destination image updated with source image data
-xvidput:
-mov ebx, edi ; read data from stack, and save it to variables
-mov eax, [es:ebx]
-mov [cory], eax
-add ebx, 4
-mov eax, [es:ebx]
-mov [corx], eax
-
-add ebx, 4
-mov eax, [es:ebx]
-add eax, [xms_addr]
-mov ecx, [es:eax]
-mov [img2x], ecx
-add eax, 4
-mov ecx, [es:eax]
-mov [img2y], ecx
-add eax, 4
-mov [img2a], eax
-
-add ebx, 4
-mov eax, [es:ebx]
-add eax, [xms_addr]
-mov ecx, [es:eax]
-mov [img1x], ecx
-add eax, 4
-mov ecx, [es:eax]
-mov [img1y], ecx
-add eax, 4
-mov [img1a], eax
-
-add ebx, 4
-mov edi, ebx
-
-cmp dword [cory] , 0 ; calculate Y start
-jl vidl1
-mov dword [starty], 0
-jmp vidl2
+op_42_xvidput:
+mov ebx, edi ; ebx = saved data stack pointer for reading arguments
+mov eax, [es:ebx] ; eax = y coordinate
+mov [cory], eax ; store y coordinate
+add ebx, 4 ; advance to next stack slot
+mov eax, [es:ebx] ; eax = x coordinate
+mov [corx], eax ; store x coordinate
+
+add ebx, 4 ; advance to next stack slot
+mov eax, [es:ebx] ; eax = addr2 (destination image virtual address)
+add eax, [xms_addr] ; convert virtual address to physical address
+mov ecx, [es:eax] ; ecx = destination image width
+mov [img2x], ecx ; store destination image width
+add eax, 4 ; advance to height field
+mov ecx, [es:eax] ; ecx = destination image height
+mov [img2y], ecx ; store destination image height
+add eax, 4 ; advance past header to pixel data
+mov [img2a], eax ; store destination image data address
+
+add ebx, 4 ; advance to next stack slot
+mov eax, [es:ebx] ; eax = addr1 (source image virtual address)
+add eax, [xms_addr] ; convert virtual address to physical address
+mov ecx, [es:eax] ; ecx = source image width
+mov [img1x], ecx ; store source image width
+add eax, 4 ; advance to height field
+mov ecx, [es:eax] ; ecx = source image height
+mov [img1y], ecx ; store source image height
+add eax, 4 ; advance past header to pixel data
+mov [img1a], eax ; store source image data address
+
+add ebx, 4 ; advance past all arguments
+mov edi, ebx ; pop all four arguments off the data stack
+
+cmp dword [cory] , 0 ; calculate Y start: is y >= 0?
+jl vidl1 ; if y < 0, need to clip from top
+mov dword [starty], 0 ; y >= 0: start copying from row 0 of source
+jmp vidl2
vidl1:
-mov eax, [cory]
-neg eax
-mov [starty], eax
+mov eax, [cory] ; eax = negative y offset
+neg eax ; eax = number of rows to skip in source
+mov [starty], eax ; store start row (skip clipped rows)
vidl2:
-cmp dword [corx] , 0 ; calculate X start
-jl vidl3
-mov dword [startx], 0
-jmp vidl4
+cmp dword [corx] , 0 ; calculate X start: is x >= 0?
+jl vidl3 ; if x < 0, need to clip from left
+mov dword [startx], 0 ; x >= 0: start copying from column 0 of source
+jmp vidl4
vidl3:
-mov eax, [corx]
-neg eax
-mov [startx], eax
+mov eax, [corx] ; eax = negative x offset
+neg eax ; eax = number of columns to skip in source
+mov [startx], eax ; store start column (skip clipped columns)
vidl4:
-mov eax, [cory] ; calculate Y end
-add eax, [img1y]
-cmp eax, [img2y]
-jg vidl5
-mov eax, [img1y]
-mov [endy], eax
+mov eax, [cory] ; calculate Y end: does source extend past destination?
+add eax, [img1y] ; eax = y + source height
+cmp eax, [img2y] ; compare with destination height
+jg vidl5 ; if exceeds destination, clip
+mov eax, [img1y] ; no clipping needed: end = source height
+mov [endy], eax ; store Y end
jmp vidl6
vidl5:
-mov eax, [img2y]
-sub eax, [cory]
-mov [endy], eax
+mov eax, [img2y] ; eax = destination height
+sub eax, [cory] ; eax = destination height - y (max rows that fit)
+mov [endy], eax ; store clipped Y end
vidl6:
-mov eax, [corx] ; calculate X end
-add eax, [img1x]
-cmp eax, [img2x]
-jg vidl7
-mov eax, [img1x]
-mov [endx], eax
-jmp vidl8
+mov eax, [corx] ; calculate X end: does source extend past destination?
+add eax, [img1x] ; eax = x + source width
+cmp eax, [img2x] ; compare with destination width
+jg vidl7 ; if exceeds destination, clip
+mov eax, [img1x] ; no clipping needed: end = source width
+mov [endx], eax ; store X end
+jmp vidl8
vidl7:
-mov eax, [img2x]
-sub eax, [corx]
-mov [endx], eax
+mov eax, [img2x] ; eax = destination width
+sub eax, [corx] ; eax = destination width - x (max columns that fit)
+mov [endx], eax ; store clipped X end
vidl8:
-mov eax, [endy] ; calculate Y length
-sub eax, [starty]
-cmp eax, 0
-jle emu
-mov [lengthy], eax
-
-mov eax, [endx] ; calculate X length
-sub eax, [startx]
-cmp eax, 0
-jle emu
-mov [lengthx], eax
-
-mov eax, [starty] ; calculate img1 start address
-mov ebx, [img1x]
-sub edx, edx
-mul ebx
-add eax, [img1a]
-add eax, [startx]
-mov [img1start], eax
-
-mov eax, [cory] ; calculate img2 start address
-add eax, [starty]
-mov ebx, [img2x]
-sub edx, edx
-mul ebx
-add eax, [img2a]
-add eax, [corx]
-add eax, [startx]
-mov [img2start], eax
+mov eax, [endy] ; calculate Y length (number of rows to copy)
+sub eax, [starty] ; eax = endy - starty
+cmp eax, 0 ; check if any rows to copy
+jle emu ; if zero or negative, nothing to draw
+mov [lengthy], eax ; store row count
+
+mov eax, [endx] ; calculate X length (number of columns to copy)
+sub eax, [startx] ; eax = endx - startx
+cmp eax, 0 ; check if any columns to copy
+jle emu ; if zero or negative, nothing to draw
+mov [lengthx], eax ; store column count
+
+mov eax, [starty] ; calculate source image start address
+mov ebx, [img1x] ; ebx = source image width (stride)
+sub edx, edx ; clear edx for multiplication
+mul ebx ; eax = starty * source width
+add eax, [img1a] ; add source data base address
+add eax, [startx] ; add starting column offset
+mov [img1start], eax ; store source start address
+
+mov eax, [cory] ; calculate destination image start address
+add eax, [starty] ; eax = y + starty (destination row)
+mov ebx, [img2x] ; ebx = destination image width (stride)
+sub edx, edx ; clear edx for multiplication
+mul ebx ; eax = destination row * destination width
+add eax, [img2a] ; add destination data base address
+add eax, [corx] ; add x coordinate offset
+add eax, [startx] ; add starting column offset
+mov [img2start], eax ; store destination start address
vidl9:
-mov ebx, [img1start]
-mov ecx, [lengthx]
-mov edx, [img2start]
-call memmove
-
-mov eax, [img1x]
-add [img1start], eax
-mov eax, [img2x]
-add [img2start], eax
-dec dword [lengthy]
-cmp [lengthy], 0
-jg vidl9
-
-jmp emu
+mov ebx, [img1start] ; ebx = current source row address
+mov ecx, [lengthx] ; ecx = number of bytes (pixels) to copy per row
+mov edx, [img2start] ; edx = current destination row address
+call memmove ; copy one row of pixels: ebx=from, edx=to, ecx=count
+
+mov eax, [img1x] ; eax = source image width (stride)
+add [img1start], eax ; advance source address to next row
+mov eax, [img2x] ; eax = destination image width (stride)
+add [img2start], eax ; advance destination address to next row
+dec dword [lengthy] ; decrement remaining row count
+cmp [lengthy], 0 ; check if more rows to copy
+jg vidl9 ; if yes, copy next row
+
+jmp emu ; return to emulation loop
cory dd 0
corx dd 0