Posts: 788
Threads: 53
Joined: Nov 2002
The contest is over, noone enterd. You guys suck.
oship me and i will give you lots of guurrls and beeea
Posts: 1,166
Threads: 62
Joined: Apr 2003
Quote:The contest is over, noone enterd. You guys suck.
BAHAHA! :lol:
am an asshole. Get used to it.
Posts: 614
Threads: 87
Joined: Aug 2001
Posts: 3,288
Threads: 167
Joined: Nov 2001
Cya tomorrow. I'll post something... Just that I was on vacation for a little R&R. :*)
Posts: 3,288
Threads: 167
Joined: Nov 2001
Here:
Notes. Your BX indexing technique is slow. ;*) as compared to a di/si inc.
PutF uses your BX indexing technique and PutF2 uses the standard Inc Si/Di.
Code: MODEL MEDIUM,BASIC
.STACK 30H
.386
.CODE
;SUB RelPutF(BYVAL DESTSEG%,BYVAL X%,BYVAL Y%,BYVAL SPRITESEGMENT%,BYVAL SPRITEOFFSET%)
;STACK
;DEST SEG =14
;X =12
;Y =10
;SPRITESEGMENT =8
;SPRITOFFSET =6
;RET SEG =4
;RET OFF =2
;BP =0
;ds =-2 ;REAL WIDTH
;320-WIDTH =-4 ;FOR SPEEDIER LOOP NO SUB
;DS:SI =SPRITE SEG:SPRITE OFF
;ES:DI =DEST SEG:DEST OFF
align 2
PUBLIC RelPutF
RelPutF PROC
PUSH BP
mov bp,sp
sub sp,4
mov [bp-2],ds ;save ds
mov es,[bp+14] ;layer
mov ds,[bp+8] ;spr seg
mov si,[bp+6] ;spr off
mov dx,[si] ;wid
mov ax,[si+2] ;height
add si,4
shr dx,3 ;wid\8
;calc offset
mov cx,[bp+10] ;y
xchg ch,cl ;Y*256
mov di,cx ;save
shr di,2 ;Y*64
add di,cx ;Y*64+Y*256=320
add di,[bp+12] ;Offset=Y*320+X
;notes:
;Al = Hieght
;dx = Wid
;bx = free
;cx = free
mov cx,320
sub cx,dx
mov [bp-4],cx ;save to stack
Yloop:
xor bx,bx
mov cx,dx ;save wid
Xloop:
mov ah,[si+bx]
or ah,ah
jz skip
mov es:[di+bx],ah
skip:
inc bx
dec cx
jnz Xloop
add si,bx
add bx,[bp-4]
add di,bx
dec al
jnz Yloop
mov ds,[bp-2]
ADD SP,4
POP BP ;RESTORE BP
RET 10
RelPutF ENDP
END
Here's the one which is faster:
Code: MODEL MEDIUM,BASIC
.STACK 30H
.386
.CODE
;SUB RelPutF2(BYVAL DESTSEG%,BYVAL X%,BYVAL Y%,BYVAL SPRITESEGMENT%,BYVAL SPRITEOFFSET%)
;STACK
;DEST SEG =14
;X =12
;Y =10
;SPRITESEGMENT =8
;SPRITOFFSET =6
;RET SEG =4
;RET OFF =2
;BP =0
;ds =-2 ;REAL WIDTH
;320-WIDTH =-4 ;FOR SPEEDIER LOOP NO SUB
;DS:SI =SPRITE SEG:SPRITE OFF
;ES:DI =DEST SEG:DEST OFF
align 2
PUBLIC RelPutF2
RelPutF2 PROC
PUSH BP
mov bp,sp
sub sp,4
mov [bp-2],ds ;save ds
mov es,[bp+14] ;layer
mov ds,[bp+8] ;spr seg
mov si,[bp+6] ;spr off
mov dx,[si] ;wid
mov bx,[si+2] ;height
add si,4
shr dx,3 ;wid\8
;calc offset
mov cx,[bp+10] ;y
xchg ch,cl ;Y*256
mov di,cx ;save
shr di,2 ;Y*64
add di,cx ;Y*64+Y*256=320
add di,[bp+12] ;Offset=Y*320+X
;notes:
;Ax = free
;dx = Wid
;bx = Height
;cx = free
mov cx,320
sub cx,dx
mov [bp-4],cx ;save to stack
Yloop:
mov cx,dx ;save wid
Xloop:
mov al,[si]
inc si
or al,al
jz skip
mov es:[di],al
skip:
inc di
dec cx
jnz Xloop
add di,[bp-4]
dec bx
jnz Yloop
mov ds,[bp-2]
ADD SP,4
POP BP ;RESTORE BP
RET 10
RelPutF2 ENDP
END
To test:
Code: DECLARE SUB RelPutF (BYVAL DESTSEG%, BYVAL X%, BYVAL Y%, BYVAL SPRITESEGMENT%, BYVAL SPRITEOFFSET%)
DECLARE SUB RelPutF2 (BYVAL DESTSEG%, BYVAL X%, BYVAL Y%, BYVAL SPRITESEGMENT%, BYVAL SPRITEOFFSET%)
DEFINT A-Z
DIM Vpage(31999) AS INTEGER
'Test Vars
DIM PutTestMem AS SINGLE
DIM PutTestMem2 AS SINGLE
DIM PutTestVid AS SINGLE
DIM PutTestVid2 AS SINGLE
CLS
SCREEN 13
W = 15
H = 15
Layer = VARSEG(Vpage(0))
Size = ((W + 1) * (H + 1) + 4) \ 2
DIM Array(Size) AS INTEGER
FOR I = 0 TO H
FOR J = 0 TO W
PSET (J, I), (I OR J) + 150
NEXT
NEXT
GET (0, 0)-(W, H), Array
SprSeg = VARSEG(Array(0))
SprOff = VARPTR(Array(0))
TIMER ON
T# = TIMER
FOR n& = 1 TO 500000
RelPutF Layer, 100, 100, SprSeg, SprOff
NEXT
PutTestMem = n& / (TIMER - T#)
T# = TIMER
FOR n& = 1 TO 500000
RelPutF2 Layer, 100, 100, SprSeg, SprOff
NEXT
PutTestMem2 = n& / (TIMER - T#)
'VIDEO
T# = TIMER
FOR n& = 1 TO 500000
RelPutF &HA000, 100, 100, SprSeg, SprOff
NEXT
PutTestVid = n& / (TIMER - T#)
T# = TIMER
FOR n& = 1 TO 500000
RelPutF2 &HA000, 100, 100, SprSeg, SprOff
NEXT
PutTestVid2 = n& / (TIMER - T#)
CLS
SCREEN 0
WIDTH 80
PRINT STR$(PutTestMem) + " For PutF aka [Si/di+bx] trick (Base mem)"
PRINT STR$(PutTestMem2) + " For PutF2 aka inc si/di (Base mem)"
PRINT STR$(PutTestVid) + " For PutF aka [Si/di+bx] trick (Video mem)"
PRINT STR$(PutTestVid2) + " For PutF2 aka inc si/di (Video mem)"
C$ = INPUT$(1)
CLS
SCREEN 0
END
Posts: 788
Threads: 53
Joined: Nov 2002
i never said i was going to use it, pointless opn 486. Since noone enterd i didn't bother myself either. So i never finished it, doesn't align to vram. But beat this. It's faster then ugl on 486.
Code: .model medium, basic
.386
.data
innerloop word ?
.code
Blit486 proc public uses bx di si ds es,\
pdst:far ptr, x:word, y:word, psrc:far ptr
local dstXres:word, dstYres:word
local srcXres:word, srcYres:word
;;
;; es:di -> dst
;; ds:si -> src
;;
lds si, psrc
les di, pdst
cmp word ptr [pdst+2], 0a000h
jne @@sramloop
mov innerloop, offset vram_inner
jmp @@conta
@@sramloop: mov innerloop, offset sram_inner
@@conta:
;;
;; Get destination width and height
;;
mov dstYres, 200
mov dstXres, 320
;;
;; Get source width and height
;;
mov ax, ds:[si]
mov cx, ds:[si+2]
shr ax, 3
mov srcYres, cx
mov srcXres, ax
;;
;; Setup destination adress
;;
mov ax, y
mul dstXres
add di, x
add di, ax
add si, 4
mov bx, srcXres
mov cx, srcYres
or bx, bx
jz @@exit
or cx, cx
jz @@exit
@@oloop: call innerloop
add si, srcXres
add di, dstXres
dec cx
jnz @@oloop
@@exit: ret
Blit486 endp
;;
;; bx -> pixel count
;; ds:si -> source
;; es:di -> destination
;;
sram_inner proc near private uses ax bx cx dx di si bp
;;
;; fs -> single loop count
;; gs -> quad loop count
;;
xor ax, ax
mov fs, bx
mov gs, ax
cmp bx, 8
jl @@bloop_s
;;
;; Quad pixels
;;
mov bp, bx
shr bp, 2
shl bp, 2
mov gs, bp
;;
;; Single pixels
;;
and bx, 3
jz @@qloop_s
mov fs, bx
;;
;; Single pixel loop
;;
@@bloop_s: mov bp, fs
add si, bp
add di, bp
neg bp
@@bloop_i: mov cl, ds:[si+bp] ;; 1
mov dl, es:[di+bp] ;; 2
;;
;; Create mask
;;
cmp cl, 1 ;; 3
sbb al, al ;; 4
;;
;; Combine pixels
;;
and al, dl ;; 5
or al, cl ;; 6
mov es:[di+bp], al ;; 7
;;
;; Pixels left ?
;;
inc bp ;; 8
jnz @@bloop_i ;; 8-11
;;
;; Quad pixel loop
;;
@@qloop_s: mov bp, gs
or bp, bp
jz @@exit
add si, bp
add di, bp
neg bp
@@qloop_i: mov ecx, ds:[si+bp] ;; 1
mov edx, es:[di+bp] ;; 2
;; ecx -> source
;; edx -> destination
mov ebx, ecx ;; 3
shr ebx, 16 ;; 3-5
;;
;; Create mask
;;
cmp bl, 1 ;; 6
sbb al, al ;; 7
cmp bh, 1 ;; 8
sbb ah, ah ;; 9
shl eax, 16 ;; 9-11
cmp cl, 1 ;; 12
sbb al, al ;; 13
cmp ch, 1 ;; 14
sbb ah, ah ;; 15
;;
;; Combine source and destination
;;
and eax, edx ;; 16
or eax, ecx ;; 17
mov es:[di+bp], eax ;; 18 (Hopefully)
;;
;; More pixels?
;;
add bp, 4 ;; 19
jnz @@qloop_i ;; 19-22
@@exit: ret
sram_inner endp
;;
;; bx -> pixel count
;; ds:si -> source
;; es:di -> destination
;;
vram_inner proc near private uses ax bx cx di si bp
jmp @@begin
vramTB word @@cont, @@casea, @@caseb, @@casec
@@begin: mov bp, bx
add si, bx
add di, bx
neg bp
@@loop: mov ax, ds:[si+bp]
or al, al
setnz bl
or ah, ah
setnz bh
shl bl, 1
shl bh, 2
or bl, bh
xor bh, bh
jmp [vramTB+bx]
@@casea: mov es:[di+bp+0], al
jmp @@cont
@@caseb: mov es:[di+bp+1], ah
jmp @@cont
@@casec: mov es:[di+bp+0], ax
@@cont: add bp, 2
jnz @@loop
@@exit: ret
vram_inner endp
end
oship me and i will give you lots of guurrls and beeea
Posts: 788
Threads: 53
Joined: Nov 2002
Oh, and it's.
Code: add si, bx
add di, bx
neg bx
@@iloop:
mov al, ds:[si+bx]
or al, al
jz @@skip
mov es:[di+bx], al
@@skip: inc bx
jnz @@iloop
oship me and i will give you lots of guurrls and beeea
Posts: 788
Threads: 53
Joined: Nov 2002
No comments rel? That's a first
oship me and i will give you lots of guurrls and beeea
Posts: 3,368
Threads: 195
Joined: Jan 2003
I'd post something but this whole thread isn't in English...
Peace cannot be obtained without war. Why? If there is already peace, it is unnecessary for war. If there is no peace, there is already war."
Visit www.neobasic.net to see rubbish in all its finest.
Posts: 691
Threads: 5
Joined: Apr 2002
I just glanced at this thread so I could be way off, but wouldnt something like the following be sufficient:
Code: void blit(void *dst, int x, int y, void *src) {
int i, width, height;
/*
* Work out the height and width and offset
* dst correctly. Cant be bothered.
*/
for(i = 0; i < width * height; i++) {
/* Gives transparent colour = 0 */
if(*src) {
/* Copy */
*dst++ = *src++;
}
}
}
Much nicer than programming in assembly, plus its completely portable and should give reasonably good object good if you use a good compiler (gcc -O3).
esus saves.... Passes to Moses, shoots, he scores!
|