Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Who's up for a real challenge?
#11
The contest is over, noone enterd. You guys suck. Tongue
oship me and i will give you lots of guurrls and beeea
Reply
#12
Quote:The contest is over, noone enterd. You guys suck. Tongue

BAHAHA! :lol:
am an asshole. Get used to it.
Reply
#13
:rotfl:
Reply
#14
Cya tomorrow. I'll post something... Just that I was on vacation for a little R&R. :*)
y smiley is 24 bit.
[Image: anya2.jpg]

Genso's Junkyard:
http://rel.betterwebber.com/
Reply
#15
Here:

Notes. Your BX indexing technique is slow. ;*) as compared to a di/si inc.

PutF uses your BX indexing technique and PutF2 uses the standard Inc Si/Di.

Code:
MODEL MEDIUM,BASIC
.STACK 30H
.386
.CODE

;SUB RelPutF(BYVAL DESTSEG%,BYVAL X%,BYVAL Y%,BYVAL SPRITESEGMENT%,BYVAL SPRITEOFFSET%)
;STACK
;DEST SEG       =14
;X              =12  
;Y              =10
;SPRITESEGMENT  =8
;SPRITOFFSET    =6
;RET SEG        =4
;RET OFF        =2
;BP             =0
;ds             =-2     ;REAL WIDTH
;320-WIDTH      =-4     ;FOR SPEEDIER LOOP NO SUB

;DS:SI                  =SPRITE SEG:SPRITE OFF
;ES:DI                  =DEST SEG:DEST OFF

align 2

PUBLIC RelPutF
RelPutF PROC

PUSH BP
mov bp,sp
sub sp,4

mov [bp-2],ds                   ;save ds

mov es,[bp+14]                  ;layer

mov ds,[bp+8]                   ;spr seg
mov si,[bp+6]                   ;spr off

mov dx,[si]                     ;wid
mov ax,[si+2]                   ;height

add si,4

shr dx,3                        ;wid\8

                                ;calc offset

mov cx,[bp+10]                  ;y
xchg ch,cl                      ;Y*256
mov di,cx                       ;save
shr di,2                        ;Y*64
add di,cx                       ;Y*64+Y*256=320
add di,[bp+12]                  ;Offset=Y*320+X

;notes:
;Al = Hieght
;dx = Wid
;bx = free
;cx = free

mov cx,320
sub cx,dx
mov [bp-4],cx                   ;save to stack

Yloop:
    xor bx,bx
    mov cx,dx                   ;save wid

Xloop:

    mov ah,[si+bx]
    or ah,ah

    jz skip
        mov es:[di+bx],ah
    skip:

    inc bx
    dec cx

jnz Xloop

    add si,bx
    add bx,[bp-4]
    add di,bx
    dec al
jnz Yloop

mov ds,[bp-2]
ADD SP,4
POP BP                          ;RESTORE BP

RET 10

RelPutF ENDP

END


Here's the one which is faster:

Code:
MODEL MEDIUM,BASIC
.STACK 30H
.386
.CODE

;SUB RelPutF2(BYVAL DESTSEG%,BYVAL X%,BYVAL Y%,BYVAL SPRITESEGMENT%,BYVAL SPRITEOFFSET%)
;STACK
;DEST SEG       =14
;X              =12  
;Y              =10
;SPRITESEGMENT  =8
;SPRITOFFSET    =6
;RET SEG        =4
;RET OFF        =2
;BP             =0
;ds             =-2     ;REAL WIDTH
;320-WIDTH      =-4     ;FOR SPEEDIER LOOP NO SUB

;DS:SI                  =SPRITE SEG:SPRITE OFF
;ES:DI                  =DEST SEG:DEST OFF

align 2

PUBLIC RelPutF2
RelPutF2 PROC

PUSH BP
mov bp,sp
sub sp,4

mov [bp-2],ds                   ;save ds

mov es,[bp+14]                  ;layer

mov ds,[bp+8]                   ;spr seg
mov si,[bp+6]                   ;spr off

mov dx,[si]                     ;wid
mov bx,[si+2]                   ;height

add si,4

shr dx,3                        ;wid\8

                                ;calc offset

mov cx,[bp+10]                  ;y
xchg ch,cl                      ;Y*256
mov di,cx                       ;save
shr di,2                        ;Y*64
add di,cx                       ;Y*64+Y*256=320
add di,[bp+12]                  ;Offset=Y*320+X

;notes:
;Ax = free
;dx = Wid
;bx = Height
;cx = free

mov cx,320
sub cx,dx
mov [bp-4],cx                   ;save to stack

Yloop:
    mov cx,dx                   ;save wid

Xloop:

    mov al,[si]
    inc si

    or al,al
    jz skip
        mov es:[di],al
    skip:

    inc di
    dec cx

jnz Xloop
    add di,[bp-4]
    dec bx
jnz Yloop

mov ds,[bp-2]
ADD SP,4
POP BP                          ;RESTORE BP

RET 10

RelPutF2 ENDP

END

To test:

Code:
DECLARE SUB RelPutF (BYVAL DESTSEG%, BYVAL X%, BYVAL Y%, BYVAL SPRITESEGMENT%, BYVAL SPRITEOFFSET%)
DECLARE SUB RelPutF2 (BYVAL DESTSEG%, BYVAL X%, BYVAL Y%, BYVAL SPRITESEGMENT%, BYVAL SPRITEOFFSET%)

DEFINT A-Z

DIM Vpage(31999) AS INTEGER

'Test Vars
DIM PutTestMem  AS SINGLE
DIM PutTestMem2 AS SINGLE
DIM PutTestVid  AS SINGLE
DIM PutTestVid2 AS SINGLE


CLS
SCREEN 13
W = 15
H = 15



Layer = VARSEG(Vpage(0))
Size = ((W + 1) * (H + 1) + 4) \ 2
DIM Array(Size) AS INTEGER
FOR I = 0 TO H
  FOR J = 0 TO W
        PSET (J, I), (I OR J) + 150
  NEXT
NEXT

GET (0, 0)-(W, H), Array

SprSeg = VARSEG(Array(0))
SprOff = VARPTR(Array(0))
TIMER ON
    T# = TIMER
    FOR n& = 1 TO 500000
      RelPutF Layer, 100, 100, SprSeg, SprOff
    NEXT
    PutTestMem = n& / (TIMER - T#)

    T# = TIMER
    FOR n& = 1 TO 500000
      RelPutF2 Layer, 100, 100, SprSeg, SprOff
    NEXT
    PutTestMem2 = n& / (TIMER - T#)

    'VIDEO
    T# = TIMER
    FOR n& = 1 TO 500000
      RelPutF &HA000, 100, 100, SprSeg, SprOff
    NEXT
    PutTestVid = n& / (TIMER - T#)

    T# = TIMER
    FOR n& = 1 TO 500000
      RelPutF2 &HA000, 100, 100, SprSeg, SprOff
    NEXT
    PutTestVid2 = n& / (TIMER - T#)


    CLS
    SCREEN 0
    WIDTH 80

    PRINT STR$(PutTestMem) + " For PutF aka [Si/di+bx] trick (Base mem)"
    PRINT STR$(PutTestMem2) + " For PutF2 aka inc si/di (Base mem)"
    PRINT STR$(PutTestVid) + " For PutF aka [Si/di+bx] trick  (Video mem)"
    PRINT STR$(PutTestVid2) + " For PutF2 aka inc si/di (Video mem)"

    C$ = INPUT$(1)

CLS
SCREEN 0
END
y smiley is 24 bit.
[Image: anya2.jpg]

Genso's Junkyard:
http://rel.betterwebber.com/
Reply
#16
i never said i was going to use it, pointless opn 486. Since noone enterd i didn't bother myself either. So i never finished it, doesn't align to vram. But beat this. It's faster then ugl on 486.

Code:
.model medium, basic            
            .386
            .data
innerloop   word        ?
            .code
            
Blit486     proc    public uses bx di si ds es,\
                    pdst:far ptr, x:word, y:word, psrc:far ptr

            local   dstXres:word, dstYres:word                    
            local   srcXres:word, srcYres:word
            
            ;;
            ;; es:di -> dst
            ;; ds:si -> src
            ;;
            lds     si, psrc
            les     di, pdst            

            cmp     word ptr [pdst+2], 0a000h
            jne     @@sramloop
            mov     innerloop, offset vram_inner
            jmp     @@conta
@@sramloop: mov     innerloop, offset sram_inner
            

@@conta:            
            ;;
            ;; Get destination width and height
            ;;
            mov     dstYres, 200
            mov     dstXres, 320
            
            ;;
            ;; Get source width and height
            ;;
            mov     ax, ds:[si]
            mov     cx, ds:[si+2]
            shr     ax, 3
            mov     srcYres, cx
            mov     srcXres, ax
            
            ;;
            ;; Setup destination adress
            ;;
            mov     ax, y
            mul     dstXres
            add     di, x
            add     di, ax            
            add     si, 4
            
            mov     bx, srcXres
            mov     cx, srcYres
            
            or      bx, bx
            jz      @@exit
            or      cx, cx
            jz      @@exit            
            
            
@@oloop:    call    innerloop
            add     si, srcXres
            add     di, dstXres
            dec     cx
            jnz     @@oloop

@@exit:     ret
Blit486     endp
                    
            

            
            
;;
;; bx -> pixel count
;; ds:si -> source
;; es:di -> destination
;;
sram_inner  proc    near private uses ax bx cx dx di si bp
            
            ;;
            ;; fs -> single loop count
            ;; gs -> quad loop count
            ;;
            xor     ax, ax
            mov     fs, bx            
            mov     gs, ax
            cmp     bx, 8
            jl      @@bloop_s
            
            ;;
            ;; Quad pixels
            ;;            
            mov     bp, bx
            shr     bp, 2
            shl     bp, 2
            mov     gs, bp
            
            ;;
            ;; Single pixels
            ;;
            and     bx, 3
            jz      @@qloop_s
            mov     fs, bx            
            
            
            ;;
            ;; Single pixel loop
            ;;            
@@bloop_s:  mov    bp, fs
            add    si, bp
            add    di, bp
            neg    bp            
            
@@bloop_i:  mov    cl, ds:[si+bp]  ;; 1
            mov    dl, es:[di+bp]  ;; 2

            ;;
            ;; Create mask
            ;;
            cmp    cl, 1           ;; 3
            sbb    al, al          ;; 4
            
            ;;
            ;; Combine pixels
            ;;            
            and    al, dl          ;; 5
            or     al, cl          ;; 6
            
            mov    es:[di+bp], al  ;; 7
            
            ;;
            ;; Pixels left ?
            ;;
            inc    bp              ;; 8
            jnz    @@bloop_i       ;; 8-11
            
            
            ;;
            ;; Quad pixel loop
            ;;
@@qloop_s:  mov    bp, gs
            or     bp, bp
            jz     @@exit
            
            add    si, bp
            add    di, bp
            neg    bp            
            
@@qloop_i:  mov    ecx, ds:[si+bp] ;; 1
            mov    edx, es:[di+bp] ;; 2

            ;; ecx -> source
            ;; edx -> destination            
            mov    ebx, ecx        ;; 3
            shr    ebx, 16         ;; 3-5
            
            ;;
            ;; Create mask
            ;;
            cmp    bl, 1           ;; 6
            sbb    al, al          ;; 7
            cmp    bh, 1           ;; 8
            sbb    ah, ah          ;; 9
            shl    eax, 16         ;; 9-11
            cmp    cl, 1           ;; 12
            sbb    al, al          ;; 13
            cmp    ch, 1           ;; 14
            sbb    ah, ah          ;; 15
            
            ;;
            ;; Combine source and destination
            ;;            
            and    eax, edx        ;; 16
            or     eax, ecx        ;; 17
            mov    es:[di+bp], eax ;; 18 (Hopefully)
            
            ;;
            ;; More pixels?
            ;;
            add    bp, 4           ;; 19
            jnz    @@qloop_i       ;; 19-22
                        
@@exit:     ret
sram_inner  endp


;;
;; bx -> pixel count
;; ds:si -> source
;; es:di -> destination
;;
vram_inner  proc    near private uses ax bx cx di si bp
            jmp     @@begin
vramTB      word    @@cont, @@casea, @@caseb, @@casec

@@begin:    mov     bp, bx
            add     si, bx
            add     di, bx
            neg     bp

@@loop:     mov     ax, ds:[si+bp]

            or      al, al
            setnz   bl
            or      ah, ah
            setnz   bh
            
            shl     bl, 1
            shl     bh, 2
            or      bl, bh
            xor     bh, bh
            jmp     [vramTB+bx]
            
@@casea:    mov     es:[di+bp+0], al
            jmp     @@cont
@@caseb:    mov     es:[di+bp+1], ah
            jmp     @@cont
@@casec:    mov     es:[di+bp+0], ax
            
@@cont:     add     bp, 2
            jnz     @@loop

@@exit:     ret
vram_inner  endp
            end
oship me and i will give you lots of guurrls and beeea
Reply
#17
Oh, and it's.
Code:
add     si, bx
        add     di, bx
        neg     bx
@@iloop:
        mov     al, ds:[si+bx]
        or      al, al
        jz      @@skip
        mov     es:[di+bx], al
@@skip: inc     bx
        jnz     @@iloop
oship me and i will give you lots of guurrls and beeea
Reply
#18
No comments rel? That's a first Tongue
oship me and i will give you lots of guurrls and beeea
Reply
#19
I'd post something but this whole thread isn't in English...
Peace cannot be obtained without war. Why? If there is already peace, it is unnecessary for war. If there is no peace, there is already war."

Visit www.neobasic.net to see rubbish in all its finest.
Reply
#20
I just glanced at this thread so I could be way off, but wouldnt something like the following be sufficient:

Code:
void blit(void *dst, int x, int y, void *src) {
  int i, width, height;

  /*
   * Work out the height and width and offset
   * dst correctly. Cant be bothered.
   */

  for(i = 0; i < width * height; i++) {
    /* Gives transparent colour = 0 */
    if(*src) {
      /* Copy */
      *dst++ = *src++;
    }
  }
}

Much nicer than programming in assembly, plus its completely portable and should give reasonably good object good if you use a good compiler (gcc -O3).
esus saves.... Passes to Moses, shoots, he scores!
Reply


Forum Jump:


Users browsing this thread: 1 Guest(s)