Qbasicnews.com
Who's up for a real challenge? - Printable Version

+- Qbasicnews.com (http://qbasicnews.com/newforum)
+-- Forum: QbasicNews.Com (http://qbasicnews.com/newforum/forum-3.html)
+--- Forum: Challenges (http://qbasicnews.com/newforum/forum-10.html)
+--- Thread: Who's up for a real challenge? (/thread-632.html)

Pages: 1 2 3 4


Who's up for a real challenge? - Blitz - 04-19-2003

The contest is over, noone enterd. You guys suck. Tongue


Who's up for a real challenge? - Ninkazu - 04-19-2003

Quote:The contest is over, noone enterd. You guys suck. Tongue

BAHAHA! :lol:


Who's up for a real challenge? - Fling-master - 04-19-2003

:rotfl:


Who's up for a real challenge? - relsoft - 04-21-2003

Cya tomorrow. I'll post something... Just that I was on vacation for a little R&R. :*)


Who's up for a real challenge? - relsoft - 04-22-2003

Here:

Notes. Your BX indexing technique is slow. ;*) as compared to a di/si inc.

PutF uses your BX indexing technique and PutF2 uses the standard Inc Si/Di.

Code:
MODEL MEDIUM,BASIC
.STACK 30H
.386
.CODE

;SUB RelPutF(BYVAL DESTSEG%,BYVAL X%,BYVAL Y%,BYVAL SPRITESEGMENT%,BYVAL SPRITEOFFSET%)
;STACK
;DEST SEG       =14
;X              =12  
;Y              =10
;SPRITESEGMENT  =8
;SPRITOFFSET    =6
;RET SEG        =4
;RET OFF        =2
;BP             =0
;ds             =-2     ;REAL WIDTH
;320-WIDTH      =-4     ;FOR SPEEDIER LOOP NO SUB

;DS:SI                  =SPRITE SEG:SPRITE OFF
;ES:DI                  =DEST SEG:DEST OFF

align 2

PUBLIC RelPutF
RelPutF PROC

PUSH BP
mov bp,sp
sub sp,4

mov [bp-2],ds                   ;save ds

mov es,[bp+14]                  ;layer

mov ds,[bp+8]                   ;spr seg
mov si,[bp+6]                   ;spr off

mov dx,[si]                     ;wid
mov ax,[si+2]                   ;height

add si,4

shr dx,3                        ;wid\8

                                ;calc offset

mov cx,[bp+10]                  ;y
xchg ch,cl                      ;Y*256
mov di,cx                       ;save
shr di,2                        ;Y*64
add di,cx                       ;Y*64+Y*256=320
add di,[bp+12]                  ;Offset=Y*320+X

;notes:
;Al = Hieght
;dx = Wid
;bx = free
;cx = free

mov cx,320
sub cx,dx
mov [bp-4],cx                   ;save to stack

Yloop:
    xor bx,bx
    mov cx,dx                   ;save wid

Xloop:

    mov ah,[si+bx]
    or ah,ah

    jz skip
        mov es:[di+bx],ah
    skip:

    inc bx
    dec cx

jnz Xloop

    add si,bx
    add bx,[bp-4]
    add di,bx
    dec al
jnz Yloop

mov ds,[bp-2]
ADD SP,4
POP BP                          ;RESTORE BP

RET 10

RelPutF ENDP

END


Here's the one which is faster:

Code:
MODEL MEDIUM,BASIC
.STACK 30H
.386
.CODE

;SUB RelPutF2(BYVAL DESTSEG%,BYVAL X%,BYVAL Y%,BYVAL SPRITESEGMENT%,BYVAL SPRITEOFFSET%)
;STACK
;DEST SEG       =14
;X              =12  
;Y              =10
;SPRITESEGMENT  =8
;SPRITOFFSET    =6
;RET SEG        =4
;RET OFF        =2
;BP             =0
;ds             =-2     ;REAL WIDTH
;320-WIDTH      =-4     ;FOR SPEEDIER LOOP NO SUB

;DS:SI                  =SPRITE SEG:SPRITE OFF
;ES:DI                  =DEST SEG:DEST OFF

align 2

PUBLIC RelPutF2
RelPutF2 PROC

PUSH BP
mov bp,sp
sub sp,4

mov [bp-2],ds                   ;save ds

mov es,[bp+14]                  ;layer

mov ds,[bp+8]                   ;spr seg
mov si,[bp+6]                   ;spr off

mov dx,[si]                     ;wid
mov bx,[si+2]                   ;height

add si,4

shr dx,3                        ;wid\8

                                ;calc offset

mov cx,[bp+10]                  ;y
xchg ch,cl                      ;Y*256
mov di,cx                       ;save
shr di,2                        ;Y*64
add di,cx                       ;Y*64+Y*256=320
add di,[bp+12]                  ;Offset=Y*320+X

;notes:
;Ax = free
;dx = Wid
;bx = Height
;cx = free

mov cx,320
sub cx,dx
mov [bp-4],cx                   ;save to stack

Yloop:
    mov cx,dx                   ;save wid

Xloop:

    mov al,[si]
    inc si

    or al,al
    jz skip
        mov es:[di],al
    skip:

    inc di
    dec cx

jnz Xloop
    add di,[bp-4]
    dec bx
jnz Yloop

mov ds,[bp-2]
ADD SP,4
POP BP                          ;RESTORE BP

RET 10

RelPutF2 ENDP

END

To test:

Code:
DECLARE SUB RelPutF (BYVAL DESTSEG%, BYVAL X%, BYVAL Y%, BYVAL SPRITESEGMENT%, BYVAL SPRITEOFFSET%)
DECLARE SUB RelPutF2 (BYVAL DESTSEG%, BYVAL X%, BYVAL Y%, BYVAL SPRITESEGMENT%, BYVAL SPRITEOFFSET%)

DEFINT A-Z

DIM Vpage(31999) AS INTEGER

'Test Vars
DIM PutTestMem  AS SINGLE
DIM PutTestMem2 AS SINGLE
DIM PutTestVid  AS SINGLE
DIM PutTestVid2 AS SINGLE


CLS
SCREEN 13
W = 15
H = 15



Layer = VARSEG(Vpage(0))
Size = ((W + 1) * (H + 1) + 4) \ 2
DIM Array(Size) AS INTEGER
FOR I = 0 TO H
  FOR J = 0 TO W
        PSET (J, I), (I OR J) + 150
  NEXT
NEXT

GET (0, 0)-(W, H), Array

SprSeg = VARSEG(Array(0))
SprOff = VARPTR(Array(0))
TIMER ON
    T# = TIMER
    FOR n& = 1 TO 500000
      RelPutF Layer, 100, 100, SprSeg, SprOff
    NEXT
    PutTestMem = n& / (TIMER - T#)

    T# = TIMER
    FOR n& = 1 TO 500000
      RelPutF2 Layer, 100, 100, SprSeg, SprOff
    NEXT
    PutTestMem2 = n& / (TIMER - T#)

    'VIDEO
    T# = TIMER
    FOR n& = 1 TO 500000
      RelPutF &HA000, 100, 100, SprSeg, SprOff
    NEXT
    PutTestVid = n& / (TIMER - T#)

    T# = TIMER
    FOR n& = 1 TO 500000
      RelPutF2 &HA000, 100, 100, SprSeg, SprOff
    NEXT
    PutTestVid2 = n& / (TIMER - T#)


    CLS
    SCREEN 0
    WIDTH 80

    PRINT STR$(PutTestMem) + " For PutF aka [Si/di+bx] trick (Base mem)"
    PRINT STR$(PutTestMem2) + " For PutF2 aka inc si/di (Base mem)"
    PRINT STR$(PutTestVid) + " For PutF aka [Si/di+bx] trick  (Video mem)"
    PRINT STR$(PutTestVid2) + " For PutF2 aka inc si/di (Video mem)"

    C$ = INPUT$(1)

CLS
SCREEN 0
END



Who's up for a real challenge? - Blitz - 04-22-2003

i never said i was going to use it, pointless opn 486. Since noone enterd i didn't bother myself either. So i never finished it, doesn't align to vram. But beat this. It's faster then ugl on 486.

Code:
.model medium, basic            
            .386
            .data
innerloop   word        ?
            .code
            
Blit486     proc    public uses bx di si ds es,\
                    pdst:far ptr, x:word, y:word, psrc:far ptr

            local   dstXres:word, dstYres:word                    
            local   srcXres:word, srcYres:word
            
            ;;
            ;; es:di -> dst
            ;; ds:si -> src
            ;;
            lds     si, psrc
            les     di, pdst            

            cmp     word ptr [pdst+2], 0a000h
            jne     @@sramloop
            mov     innerloop, offset vram_inner
            jmp     @@conta
@@sramloop: mov     innerloop, offset sram_inner
            

@@conta:            
            ;;
            ;; Get destination width and height
            ;;
            mov     dstYres, 200
            mov     dstXres, 320
            
            ;;
            ;; Get source width and height
            ;;
            mov     ax, ds:[si]
            mov     cx, ds:[si+2]
            shr     ax, 3
            mov     srcYres, cx
            mov     srcXres, ax
            
            ;;
            ;; Setup destination adress
            ;;
            mov     ax, y
            mul     dstXres
            add     di, x
            add     di, ax            
            add     si, 4
            
            mov     bx, srcXres
            mov     cx, srcYres
            
            or      bx, bx
            jz      @@exit
            or      cx, cx
            jz      @@exit            
            
            
@@oloop:    call    innerloop
            add     si, srcXres
            add     di, dstXres
            dec     cx
            jnz     @@oloop

@@exit:     ret
Blit486     endp
                    
            

            
            
;;
;; bx -> pixel count
;; ds:si -> source
;; es:di -> destination
;;
sram_inner  proc    near private uses ax bx cx dx di si bp
            
            ;;
            ;; fs -> single loop count
            ;; gs -> quad loop count
            ;;
            xor     ax, ax
            mov     fs, bx            
            mov     gs, ax
            cmp     bx, 8
            jl      @@bloop_s
            
            ;;
            ;; Quad pixels
            ;;            
            mov     bp, bx
            shr     bp, 2
            shl     bp, 2
            mov     gs, bp
            
            ;;
            ;; Single pixels
            ;;
            and     bx, 3
            jz      @@qloop_s
            mov     fs, bx            
            
            
            ;;
            ;; Single pixel loop
            ;;            
@@bloop_s:  mov    bp, fs
            add    si, bp
            add    di, bp
            neg    bp            
            
@@bloop_i:  mov    cl, ds:[si+bp]  ;; 1
            mov    dl, es:[di+bp]  ;; 2

            ;;
            ;; Create mask
            ;;
            cmp    cl, 1           ;; 3
            sbb    al, al          ;; 4
            
            ;;
            ;; Combine pixels
            ;;            
            and    al, dl          ;; 5
            or     al, cl          ;; 6
            
            mov    es:[di+bp], al  ;; 7
            
            ;;
            ;; Pixels left ?
            ;;
            inc    bp              ;; 8
            jnz    @@bloop_i       ;; 8-11
            
            
            ;;
            ;; Quad pixel loop
            ;;
@@qloop_s:  mov    bp, gs
            or     bp, bp
            jz     @@exit
            
            add    si, bp
            add    di, bp
            neg    bp            
            
@@qloop_i:  mov    ecx, ds:[si+bp] ;; 1
            mov    edx, es:[di+bp] ;; 2

            ;; ecx -> source
            ;; edx -> destination            
            mov    ebx, ecx        ;; 3
            shr    ebx, 16         ;; 3-5
            
            ;;
            ;; Create mask
            ;;
            cmp    bl, 1           ;; 6
            sbb    al, al          ;; 7
            cmp    bh, 1           ;; 8
            sbb    ah, ah          ;; 9
            shl    eax, 16         ;; 9-11
            cmp    cl, 1           ;; 12
            sbb    al, al          ;; 13
            cmp    ch, 1           ;; 14
            sbb    ah, ah          ;; 15
            
            ;;
            ;; Combine source and destination
            ;;            
            and    eax, edx        ;; 16
            or     eax, ecx        ;; 17
            mov    es:[di+bp], eax ;; 18 (Hopefully)
            
            ;;
            ;; More pixels?
            ;;
            add    bp, 4           ;; 19
            jnz    @@qloop_i       ;; 19-22
                        
@@exit:     ret
sram_inner  endp


;;
;; bx -> pixel count
;; ds:si -> source
;; es:di -> destination
;;
vram_inner  proc    near private uses ax bx cx di si bp
            jmp     @@begin
vramTB      word    @@cont, @@casea, @@caseb, @@casec

@@begin:    mov     bp, bx
            add     si, bx
            add     di, bx
            neg     bp

@@loop:     mov     ax, ds:[si+bp]

            or      al, al
            setnz   bl
            or      ah, ah
            setnz   bh
            
            shl     bl, 1
            shl     bh, 2
            or      bl, bh
            xor     bh, bh
            jmp     [vramTB+bx]
            
@@casea:    mov     es:[di+bp+0], al
            jmp     @@cont
@@caseb:    mov     es:[di+bp+1], ah
            jmp     @@cont
@@casec:    mov     es:[di+bp+0], ax
            
@@cont:     add     bp, 2
            jnz     @@loop

@@exit:     ret
vram_inner  endp
            end



Who's up for a real challenge? - Blitz - 04-22-2003

Oh, and it's.
Code:
add     si, bx
        add     di, bx
        neg     bx
@@iloop:
        mov     al, ds:[si+bx]
        or      al, al
        jz      @@skip
        mov     es:[di+bx], al
@@skip: inc     bx
        jnz     @@iloop



Who's up for a real challenge? - Blitz - 04-25-2003

No comments rel? That's a first Tongue


Who's up for a real challenge? - Agamemnus - 04-26-2003

I'd post something but this whole thread isn't in English...


Who's up for a real challenge? - LooseCaboose - 04-26-2003

I just glanced at this thread so I could be way off, but wouldnt something like the following be sufficient:

Code:
void blit(void *dst, int x, int y, void *src) {
  int i, width, height;

  /*
   * Work out the height and width and offset
   * dst correctly. Cant be bothered.
   */

  for(i = 0; i < width * height; i++) {
    /* Gives transparent colour = 0 */
    if(*src) {
      /* Copy */
      *dst++ = *src++;
    }
  }
}

Much nicer than programming in assembly, plus its completely portable and should give reasonably good object good if you use a good compiler (gcc -O3).