04-22-2003, 08:43 PM
i never said i was going to use it, pointless opn 486. Since noone enterd i didn't bother myself either. So i never finished it, doesn't align to vram. But beat this. It's faster then ugl on 486.
Code:
.model medium, basic
.386
.data
innerloop word ?
.code
Blit486 proc public uses bx di si ds es,\
pdst:far ptr, x:word, y:word, psrc:far ptr
local dstXres:word, dstYres:word
local srcXres:word, srcYres:word
;;
;; es:di -> dst
;; ds:si -> src
;;
lds si, psrc
les di, pdst
cmp word ptr [pdst+2], 0a000h
jne @@sramloop
mov innerloop, offset vram_inner
jmp @@conta
@@sramloop: mov innerloop, offset sram_inner
@@conta:
;;
;; Get destination width and height
;;
mov dstYres, 200
mov dstXres, 320
;;
;; Get source width and height
;;
mov ax, ds:[si]
mov cx, ds:[si+2]
shr ax, 3
mov srcYres, cx
mov srcXres, ax
;;
;; Setup destination adress
;;
mov ax, y
mul dstXres
add di, x
add di, ax
add si, 4
mov bx, srcXres
mov cx, srcYres
or bx, bx
jz @@exit
or cx, cx
jz @@exit
@@oloop: call innerloop
add si, srcXres
add di, dstXres
dec cx
jnz @@oloop
@@exit: ret
Blit486 endp
;;
;; bx -> pixel count
;; ds:si -> source
;; es:di -> destination
;;
sram_inner proc near private uses ax bx cx dx di si bp
;;
;; fs -> single loop count
;; gs -> quad loop count
;;
xor ax, ax
mov fs, bx
mov gs, ax
cmp bx, 8
jl @@bloop_s
;;
;; Quad pixels
;;
mov bp, bx
shr bp, 2
shl bp, 2
mov gs, bp
;;
;; Single pixels
;;
and bx, 3
jz @@qloop_s
mov fs, bx
;;
;; Single pixel loop
;;
@@bloop_s: mov bp, fs
add si, bp
add di, bp
neg bp
@@bloop_i: mov cl, ds:[si+bp] ;; 1
mov dl, es:[di+bp] ;; 2
;;
;; Create mask
;;
cmp cl, 1 ;; 3
sbb al, al ;; 4
;;
;; Combine pixels
;;
and al, dl ;; 5
or al, cl ;; 6
mov es:[di+bp], al ;; 7
;;
;; Pixels left ?
;;
inc bp ;; 8
jnz @@bloop_i ;; 8-11
;;
;; Quad pixel loop
;;
@@qloop_s: mov bp, gs
or bp, bp
jz @@exit
add si, bp
add di, bp
neg bp
@@qloop_i: mov ecx, ds:[si+bp] ;; 1
mov edx, es:[di+bp] ;; 2
;; ecx -> source
;; edx -> destination
mov ebx, ecx ;; 3
shr ebx, 16 ;; 3-5
;;
;; Create mask
;;
cmp bl, 1 ;; 6
sbb al, al ;; 7
cmp bh, 1 ;; 8
sbb ah, ah ;; 9
shl eax, 16 ;; 9-11
cmp cl, 1 ;; 12
sbb al, al ;; 13
cmp ch, 1 ;; 14
sbb ah, ah ;; 15
;;
;; Combine source and destination
;;
and eax, edx ;; 16
or eax, ecx ;; 17
mov es:[di+bp], eax ;; 18 (Hopefully)
;;
;; More pixels?
;;
add bp, 4 ;; 19
jnz @@qloop_i ;; 19-22
@@exit: ret
sram_inner endp
;;
;; bx -> pixel count
;; ds:si -> source
;; es:di -> destination
;;
vram_inner proc near private uses ax bx cx di si bp
jmp @@begin
vramTB word @@cont, @@casea, @@caseb, @@casec
@@begin: mov bp, bx
add si, bx
add di, bx
neg bp
@@loop: mov ax, ds:[si+bp]
or al, al
setnz bl
or ah, ah
setnz bh
shl bl, 1
shl bh, 2
or bl, bh
xor bh, bh
jmp [vramTB+bx]
@@casea: mov es:[di+bp+0], al
jmp @@cont
@@caseb: mov es:[di+bp+1], ah
jmp @@cont
@@casec: mov es:[di+bp+0], ax
@@cont: add bp, 2
jnz @@loop
@@exit: ret
vram_inner endp
end
oship me and i will give you lots of guurrls and beeea