Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
asm coding question (for an fb project)
#11
Quote:There isn't supposed to be anything there. The only pixel that was coloured is (99,99). Thanks for clearing it up for me, these routines are sooo fast.

There is no way to dereference using asm and have the function return an integer? I have a feeling that I will forget a * somewhere and spend hours pulling my hair out over it.
Code:
'asm2.bas
function point32 (byval x as integer,byval y as integer, byval memory as integer ptr) as integer
asm
mov eax, dword ptr [y]
mov ebx, eax

' multiply y by 320*4
shl eax, 10 '*1024
shl ebx, 8  '* 256
add eax, ebx

' add x*4
mov ebx, dword ptr [x]
shl ebx,2
add eax,ebx

' add the buffer offset and the size of the surface info
mov ebx,dword ptr [memory]
add eax,ebx
add eax, 4

'return
'deref eax
mov [function], eax
end asm
END function

dim memory as integer ptr
dim coladr as integer
dim col    as integer
screenres 320,200,32

screenlock
memory = screenptr
pset(99,99),&H123456
coladr=point32(99,99,memory)
coladr-=4 ' 4 bytes !!!
col=peek(integer,coladr)
screenunlock

? "color on (99,99)=";hex(col)

sleep
Why you will add 4 byte?

Joshy
sorry about my english
Reply
#12
Because I am using it for Inspiration. (Of course I'll credit you.) Inspiration is double buffered. I used imagecreate to make the buffer and there is a 4 byte header that stores the height and width of the buffer.

I got similar speed results. What the hell is PSET doing that takes so long? I know it checks for valid inputs, and has multiple bitdepths to worry about, but still.
f you play a Microsoft CD backwards you can hear demonic voices. The scary part is that if you play it forwards it installs Windows.
Reply
#13
Quote:Because I am using it for Inspiration. (Of course I'll credit you.) Inspiration is double buffered. I used imagecreate to make the buffer and there is a 4 byte header that stores the height and width of the buffer.

I got similar speed results. What the hell is PSET doing that takes so long? I know it checks for valid inputs, and has multiple bitdepths to worry about, but still.

there are nothing to credit me

many things are fast in basic too if you use pointers and shift's
i added the basic_32 sub please compare it again and look the speed of it.

Joshy
Code:
option explicit
'asm.bas

#define scr_w 1024
#define scr_h  768

sub set_clip32(byval memory as integer ptr,byval x as integer,byval y as integer,byval c as integer)
  if (x<0) or (x>(scr_w-1)) then exit sub
  if (y<0) or (y>(scr_h-1)) then exit sub
  x=x shl 2:y=y shl 2
asm
  mov dword ptr edi,[memory] ' videoadr
  mov eax,scr_w
  mul dword ptr [y]          ' adr = y * 4 * scr_W
  add eax,dword ptr [x]      ' adr = y * 4 * scr_w + x * 4
  mov ebx,dword ptr [c]
  mov [edi+eax],ebx          ' poke videoadr + y * 4 * scr_w + x * 4,color
end asm
end sub

sub set_32(byval memory as integer ptr,byval x as integer,byval y as integer,byval c as integer)
  x=x shl 2:y=y shl 2
asm
  mov dword ptr edi,[memory]
  mov eax,scr_w
  mul dword ptr [y]
  add eax,dword ptr [x]
  mov ebx,dword ptr [c]
  mov [edi+eax],ebx
end asm
end sub

'!!! only with screenwidth=1024 !!!
sub set_fast32(byval memory as integer ptr,byval x as integer,byval y as integer,byval c as integer)
asm
  mov eax, dword ptr [y]
  shl eax, 10            ' = y * 1024
  add eax, dword ptr [x] ' = y * 1024 + x
  shl eax, 2             ' = (Y * 1024 + x) * 4
  add eax, dword ptr [memory]
  mov ebx, dword ptr [c]
  mov dword ptr [eax],ebx
end asm
end sub

sub pset_32(byval x as integer,byval y as integer,byval c as integer)
  pset (x,y),c
end sub

'!!! only with screenwidth=1024 !!!
sub basic_32(byval memory as integer ptr,byval x as integer,byval y as integer,byval c as integer)
  dim index as integer
  index=(y shl 10 + x)
  memory[index]=c
end sub



screenres scr_w,scr_h,32
dim as integer ptr videomemory
dim as integer i,x,y,index
dim as double  stime,etime,pset_32time,set_clip32time,set_32time,set_fast32time,basic_32time

screenlock:videomemory=screenptr
stime=timer
for i=1 to 100
  for y=0 to scr_h-1
    for x=0 to scr_w-1
      set_clip32 videomemory,x,y,rgb(255,0,0)
    next
  next
next
etime=timer
set_clip32time=etime-stime
screenunlock

screenlock:videomemory=screenptr
stime=timer
for i=1 to 100
  for y=0 to scr_h-1
    for x=0 to scr_w-1
      set_32 videomemory,x,y,rgb(0,255,0)
    next
  next
next
etime=timer
set_32time=etime-stime
screenunlock

screenlock:videomemory=screenptr
stime=timer
for i=1 to 100
  for y=0 to scr_h-1
    for x=0 to scr_w-1
      set_fast32 videomemory,x,y,rgb(0,0,255)
    next
  next
next
etime=timer
set_fast32time=etime-stime
screenunlock

screenlock
stime=timer
for i=1 to 100
  for y=0 to scr_h-1
    for x=0 to scr_w-1
       basic_32 videomemory,x,y,rgb(255,0,255)
    next
  next
next
etime=timer
basic_32time=etime-stime
screenunlock

screenlock
stime=timer
for i=1 to 100
  for y=0 to scr_h-1
    for x=0 to scr_w-1
       pset_32 x,y,rgb(255,255,255)
    next
  next
next
etime=timer
pset_32time=etime-stime
screenunlock


print "set_clip32  ="; set_clip32time
print "set_32      ="; set_32time
print "set_fast32  ="; set_fast32time
print "basic ptr   ="; basic_32time
print "pset(x,y)   ="; pset_32time
sleep
end
EDIT:my new results basic ptr and shift !!!1.18!!!
Code:
set_clip32  = 1.78
set_32      = 1.34
set_fast32  = 1.15
basic ptr   = 1.18
pset(x,y)   = 9.30
sorry about my english
Reply
#14
PSET has to deal with QB-compatible coordinate scaling, floating-point coordinates, WINDOW, etc., which is why it's a bit slower. See the source for the final word. Wink
Reply


Forum Jump:


Users browsing this thread: 1 Guest(s)