Who's up for a real challenge? - Blitz - 04-19-2003
The contest is over, noone enterd. You guys suck.
Who's up for a real challenge? - Ninkazu - 04-19-2003
Quote:The contest is over, noone enterd. You guys suck.
BAHAHA! :lol:
Who's up for a real challenge? - Fling-master - 04-19-2003
:rotfl:
Who's up for a real challenge? - relsoft - 04-21-2003
Cya tomorrow. I'll post something... Just that I was on vacation for a little R&R. :*)
Who's up for a real challenge? - relsoft - 04-22-2003
Here:
Notes. Your BX indexing technique is slow. ;*) as compared to a di/si inc.
PutF uses your BX indexing technique and PutF2 uses the standard Inc Si/Di.
Code: MODEL MEDIUM,BASIC
.STACK 30H
.386
.CODE
;SUB RelPutF(BYVAL DESTSEG%,BYVAL X%,BYVAL Y%,BYVAL SPRITESEGMENT%,BYVAL SPRITEOFFSET%)
;STACK
;DEST SEG =14
;X =12
;Y =10
;SPRITESEGMENT =8
;SPRITOFFSET =6
;RET SEG =4
;RET OFF =2
;BP =0
;ds =-2 ;REAL WIDTH
;320-WIDTH =-4 ;FOR SPEEDIER LOOP NO SUB
;DS:SI =SPRITE SEG:SPRITE OFF
;ES:DI =DEST SEG:DEST OFF
align 2
PUBLIC RelPutF
RelPutF PROC
PUSH BP
mov bp,sp
sub sp,4
mov [bp-2],ds ;save ds
mov es,[bp+14] ;layer
mov ds,[bp+8] ;spr seg
mov si,[bp+6] ;spr off
mov dx,[si] ;wid
mov ax,[si+2] ;height
add si,4
shr dx,3 ;wid\8
;calc offset
mov cx,[bp+10] ;y
xchg ch,cl ;Y*256
mov di,cx ;save
shr di,2 ;Y*64
add di,cx ;Y*64+Y*256=320
add di,[bp+12] ;Offset=Y*320+X
;notes:
;Al = Hieght
;dx = Wid
;bx = free
;cx = free
mov cx,320
sub cx,dx
mov [bp-4],cx ;save to stack
Yloop:
xor bx,bx
mov cx,dx ;save wid
Xloop:
mov ah,[si+bx]
or ah,ah
jz skip
mov es:[di+bx],ah
skip:
inc bx
dec cx
jnz Xloop
add si,bx
add bx,[bp-4]
add di,bx
dec al
jnz Yloop
mov ds,[bp-2]
ADD SP,4
POP BP ;RESTORE BP
RET 10
RelPutF ENDP
END
Here's the one which is faster:
Code: MODEL MEDIUM,BASIC
.STACK 30H
.386
.CODE
;SUB RelPutF2(BYVAL DESTSEG%,BYVAL X%,BYVAL Y%,BYVAL SPRITESEGMENT%,BYVAL SPRITEOFFSET%)
;STACK
;DEST SEG =14
;X =12
;Y =10
;SPRITESEGMENT =8
;SPRITOFFSET =6
;RET SEG =4
;RET OFF =2
;BP =0
;ds =-2 ;REAL WIDTH
;320-WIDTH =-4 ;FOR SPEEDIER LOOP NO SUB
;DS:SI =SPRITE SEG:SPRITE OFF
;ES:DI =DEST SEG:DEST OFF
align 2
PUBLIC RelPutF2
RelPutF2 PROC
PUSH BP
mov bp,sp
sub sp,4
mov [bp-2],ds ;save ds
mov es,[bp+14] ;layer
mov ds,[bp+8] ;spr seg
mov si,[bp+6] ;spr off
mov dx,[si] ;wid
mov bx,[si+2] ;height
add si,4
shr dx,3 ;wid\8
;calc offset
mov cx,[bp+10] ;y
xchg ch,cl ;Y*256
mov di,cx ;save
shr di,2 ;Y*64
add di,cx ;Y*64+Y*256=320
add di,[bp+12] ;Offset=Y*320+X
;notes:
;Ax = free
;dx = Wid
;bx = Height
;cx = free
mov cx,320
sub cx,dx
mov [bp-4],cx ;save to stack
Yloop:
mov cx,dx ;save wid
Xloop:
mov al,[si]
inc si
or al,al
jz skip
mov es:[di],al
skip:
inc di
dec cx
jnz Xloop
add di,[bp-4]
dec bx
jnz Yloop
mov ds,[bp-2]
ADD SP,4
POP BP ;RESTORE BP
RET 10
RelPutF2 ENDP
END
To test:
Code: DECLARE SUB RelPutF (BYVAL DESTSEG%, BYVAL X%, BYVAL Y%, BYVAL SPRITESEGMENT%, BYVAL SPRITEOFFSET%)
DECLARE SUB RelPutF2 (BYVAL DESTSEG%, BYVAL X%, BYVAL Y%, BYVAL SPRITESEGMENT%, BYVAL SPRITEOFFSET%)
DEFINT A-Z
DIM Vpage(31999) AS INTEGER
'Test Vars
DIM PutTestMem AS SINGLE
DIM PutTestMem2 AS SINGLE
DIM PutTestVid AS SINGLE
DIM PutTestVid2 AS SINGLE
CLS
SCREEN 13
W = 15
H = 15
Layer = VARSEG(Vpage(0))
Size = ((W + 1) * (H + 1) + 4) \ 2
DIM Array(Size) AS INTEGER
FOR I = 0 TO H
FOR J = 0 TO W
PSET (J, I), (I OR J) + 150
NEXT
NEXT
GET (0, 0)-(W, H), Array
SprSeg = VARSEG(Array(0))
SprOff = VARPTR(Array(0))
TIMER ON
T# = TIMER
FOR n& = 1 TO 500000
RelPutF Layer, 100, 100, SprSeg, SprOff
NEXT
PutTestMem = n& / (TIMER - T#)
T# = TIMER
FOR n& = 1 TO 500000
RelPutF2 Layer, 100, 100, SprSeg, SprOff
NEXT
PutTestMem2 = n& / (TIMER - T#)
'VIDEO
T# = TIMER
FOR n& = 1 TO 500000
RelPutF &HA000, 100, 100, SprSeg, SprOff
NEXT
PutTestVid = n& / (TIMER - T#)
T# = TIMER
FOR n& = 1 TO 500000
RelPutF2 &HA000, 100, 100, SprSeg, SprOff
NEXT
PutTestVid2 = n& / (TIMER - T#)
CLS
SCREEN 0
WIDTH 80
PRINT STR$(PutTestMem) + " For PutF aka [Si/di+bx] trick (Base mem)"
PRINT STR$(PutTestMem2) + " For PutF2 aka inc si/di (Base mem)"
PRINT STR$(PutTestVid) + " For PutF aka [Si/di+bx] trick (Video mem)"
PRINT STR$(PutTestVid2) + " For PutF2 aka inc si/di (Video mem)"
C$ = INPUT$(1)
CLS
SCREEN 0
END
Who's up for a real challenge? - Blitz - 04-22-2003
i never said i was going to use it, pointless opn 486. Since noone enterd i didn't bother myself either. So i never finished it, doesn't align to vram. But beat this. It's faster then ugl on 486.
Code: .model medium, basic
.386
.data
innerloop word ?
.code
Blit486 proc public uses bx di si ds es,\
pdst:far ptr, x:word, y:word, psrc:far ptr
local dstXres:word, dstYres:word
local srcXres:word, srcYres:word
;;
;; es:di -> dst
;; ds:si -> src
;;
lds si, psrc
les di, pdst
cmp word ptr [pdst+2], 0a000h
jne @@sramloop
mov innerloop, offset vram_inner
jmp @@conta
@@sramloop: mov innerloop, offset sram_inner
@@conta:
;;
;; Get destination width and height
;;
mov dstYres, 200
mov dstXres, 320
;;
;; Get source width and height
;;
mov ax, ds:[si]
mov cx, ds:[si+2]
shr ax, 3
mov srcYres, cx
mov srcXres, ax
;;
;; Setup destination adress
;;
mov ax, y
mul dstXres
add di, x
add di, ax
add si, 4
mov bx, srcXres
mov cx, srcYres
or bx, bx
jz @@exit
or cx, cx
jz @@exit
@@oloop: call innerloop
add si, srcXres
add di, dstXres
dec cx
jnz @@oloop
@@exit: ret
Blit486 endp
;;
;; bx -> pixel count
;; ds:si -> source
;; es:di -> destination
;;
sram_inner proc near private uses ax bx cx dx di si bp
;;
;; fs -> single loop count
;; gs -> quad loop count
;;
xor ax, ax
mov fs, bx
mov gs, ax
cmp bx, 8
jl @@bloop_s
;;
;; Quad pixels
;;
mov bp, bx
shr bp, 2
shl bp, 2
mov gs, bp
;;
;; Single pixels
;;
and bx, 3
jz @@qloop_s
mov fs, bx
;;
;; Single pixel loop
;;
@@bloop_s: mov bp, fs
add si, bp
add di, bp
neg bp
@@bloop_i: mov cl, ds:[si+bp] ;; 1
mov dl, es:[di+bp] ;; 2
;;
;; Create mask
;;
cmp cl, 1 ;; 3
sbb al, al ;; 4
;;
;; Combine pixels
;;
and al, dl ;; 5
or al, cl ;; 6
mov es:[di+bp], al ;; 7
;;
;; Pixels left ?
;;
inc bp ;; 8
jnz @@bloop_i ;; 8-11
;;
;; Quad pixel loop
;;
@@qloop_s: mov bp, gs
or bp, bp
jz @@exit
add si, bp
add di, bp
neg bp
@@qloop_i: mov ecx, ds:[si+bp] ;; 1
mov edx, es:[di+bp] ;; 2
;; ecx -> source
;; edx -> destination
mov ebx, ecx ;; 3
shr ebx, 16 ;; 3-5
;;
;; Create mask
;;
cmp bl, 1 ;; 6
sbb al, al ;; 7
cmp bh, 1 ;; 8
sbb ah, ah ;; 9
shl eax, 16 ;; 9-11
cmp cl, 1 ;; 12
sbb al, al ;; 13
cmp ch, 1 ;; 14
sbb ah, ah ;; 15
;;
;; Combine source and destination
;;
and eax, edx ;; 16
or eax, ecx ;; 17
mov es:[di+bp], eax ;; 18 (Hopefully)
;;
;; More pixels?
;;
add bp, 4 ;; 19
jnz @@qloop_i ;; 19-22
@@exit: ret
sram_inner endp
;;
;; bx -> pixel count
;; ds:si -> source
;; es:di -> destination
;;
vram_inner proc near private uses ax bx cx di si bp
jmp @@begin
vramTB word @@cont, @@casea, @@caseb, @@casec
@@begin: mov bp, bx
add si, bx
add di, bx
neg bp
@@loop: mov ax, ds:[si+bp]
or al, al
setnz bl
or ah, ah
setnz bh
shl bl, 1
shl bh, 2
or bl, bh
xor bh, bh
jmp [vramTB+bx]
@@casea: mov es:[di+bp+0], al
jmp @@cont
@@caseb: mov es:[di+bp+1], ah
jmp @@cont
@@casec: mov es:[di+bp+0], ax
@@cont: add bp, 2
jnz @@loop
@@exit: ret
vram_inner endp
end
Who's up for a real challenge? - Blitz - 04-22-2003
Oh, and it's.
Code: add si, bx
add di, bx
neg bx
@@iloop:
mov al, ds:[si+bx]
or al, al
jz @@skip
mov es:[di+bx], al
@@skip: inc bx
jnz @@iloop
Who's up for a real challenge? - Blitz - 04-25-2003
No comments rel? That's a first
Who's up for a real challenge? - Agamemnus - 04-26-2003
I'd post something but this whole thread isn't in English...
Who's up for a real challenge? - LooseCaboose - 04-26-2003
I just glanced at this thread so I could be way off, but wouldnt something like the following be sufficient:
Code: void blit(void *dst, int x, int y, void *src) {
int i, width, height;
/*
* Work out the height and width and offset
* dst correctly. Cant be bothered.
*/
for(i = 0; i < width * height; i++) {
/* Gives transparent colour = 0 */
if(*src) {
/* Copy */
*dst++ = *src++;
}
}
}
Much nicer than programming in assembly, plus its completely portable and should give reasonably good object good if you use a good compiler (gcc -O3).
|