Posts: 140
Threads: 13
Joined: Apr 2005
Quote:There isn't supposed to be anything there. The only pixel that was coloured is (99,99). Thanks for clearing it up for me, these routines are sooo fast.
There is no way to dereference using asm and have the function return an integer? I have a feeling that I will forget a * somewhere and spend hours pulling my hair out over it.
Code: 'asm2.bas
function point32 (byval x as integer,byval y as integer, byval memory as integer ptr) as integer
asm
mov eax, dword ptr [y]
mov ebx, eax
' multiply y by 320*4
shl eax, 10 '*1024
shl ebx, 8 '* 256
add eax, ebx
' add x*4
mov ebx, dword ptr [x]
shl ebx,2
add eax,ebx
' add the buffer offset and the size of the surface info
mov ebx,dword ptr [memory]
add eax,ebx
add eax, 4
'return
'deref eax
mov [function], eax
end asm
END function
dim memory as integer ptr
dim coladr as integer
dim col as integer
screenres 320,200,32
screenlock
memory = screenptr
pset(99,99),&H123456
coladr=point32(99,99,memory)
coladr-=4 ' 4 bytes !!!
col=peek(integer,coladr)
screenunlock
? "color on (99,99)=";hex(col)
sleep
Why you will add 4 byte?
Joshy
sorry about my english
Posts: 357
Threads: 118
Joined: Oct 2004
Because I am using it for Inspiration. (Of course I'll credit you.) Inspiration is double buffered. I used imagecreate to make the buffer and there is a 4 byte header that stores the height and width of the buffer.
I got similar speed results. What the hell is PSET doing that takes so long? I know it checks for valid inputs, and has multiple bitdepths to worry about, but still.
f you play a Microsoft CD backwards you can hear demonic voices. The scary part is that if you play it forwards it installs Windows.
Posts: 140
Threads: 13
Joined: Apr 2005
Quote:Because I am using it for Inspiration. (Of course I'll credit you.) Inspiration is double buffered. I used imagecreate to make the buffer and there is a 4 byte header that stores the height and width of the buffer.
I got similar speed results. What the hell is PSET doing that takes so long? I know it checks for valid inputs, and has multiple bitdepths to worry about, but still.
there are nothing to credit me
many things are fast in basic too if you use pointers and shift's
i added the basic_32 sub please compare it again and look the speed of it.
Joshy
Code: option explicit
'asm.bas
#define scr_w 1024
#define scr_h 768
sub set_clip32(byval memory as integer ptr,byval x as integer,byval y as integer,byval c as integer)
if (x<0) or (x>(scr_w-1)) then exit sub
if (y<0) or (y>(scr_h-1)) then exit sub
x=x shl 2:y=y shl 2
asm
mov dword ptr edi,[memory] ' videoadr
mov eax,scr_w
mul dword ptr [y] ' adr = y * 4 * scr_W
add eax,dword ptr [x] ' adr = y * 4 * scr_w + x * 4
mov ebx,dword ptr [c]
mov [edi+eax],ebx ' poke videoadr + y * 4 * scr_w + x * 4,color
end asm
end sub
sub set_32(byval memory as integer ptr,byval x as integer,byval y as integer,byval c as integer)
x=x shl 2:y=y shl 2
asm
mov dword ptr edi,[memory]
mov eax,scr_w
mul dword ptr [y]
add eax,dword ptr [x]
mov ebx,dword ptr [c]
mov [edi+eax],ebx
end asm
end sub
'!!! only with screenwidth=1024 !!!
sub set_fast32(byval memory as integer ptr,byval x as integer,byval y as integer,byval c as integer)
asm
mov eax, dword ptr [y]
shl eax, 10 ' = y * 1024
add eax, dword ptr [x] ' = y * 1024 + x
shl eax, 2 ' = (Y * 1024 + x) * 4
add eax, dword ptr [memory]
mov ebx, dword ptr [c]
mov dword ptr [eax],ebx
end asm
end sub
sub pset_32(byval x as integer,byval y as integer,byval c as integer)
pset (x,y),c
end sub
'!!! only with screenwidth=1024 !!!
sub basic_32(byval memory as integer ptr,byval x as integer,byval y as integer,byval c as integer)
dim index as integer
index=(y shl 10 + x)
memory[index]=c
end sub
screenres scr_w,scr_h,32
dim as integer ptr videomemory
dim as integer i,x,y,index
dim as double stime,etime,pset_32time,set_clip32time,set_32time,set_fast32time,basic_32time
screenlock:videomemory=screenptr
stime=timer
for i=1 to 100
for y=0 to scr_h-1
for x=0 to scr_w-1
set_clip32 videomemory,x,y,rgb(255,0,0)
next
next
next
etime=timer
set_clip32time=etime-stime
screenunlock
screenlock:videomemory=screenptr
stime=timer
for i=1 to 100
for y=0 to scr_h-1
for x=0 to scr_w-1
set_32 videomemory,x,y,rgb(0,255,0)
next
next
next
etime=timer
set_32time=etime-stime
screenunlock
screenlock:videomemory=screenptr
stime=timer
for i=1 to 100
for y=0 to scr_h-1
for x=0 to scr_w-1
set_fast32 videomemory,x,y,rgb(0,0,255)
next
next
next
etime=timer
set_fast32time=etime-stime
screenunlock
screenlock
stime=timer
for i=1 to 100
for y=0 to scr_h-1
for x=0 to scr_w-1
basic_32 videomemory,x,y,rgb(255,0,255)
next
next
next
etime=timer
basic_32time=etime-stime
screenunlock
screenlock
stime=timer
for i=1 to 100
for y=0 to scr_h-1
for x=0 to scr_w-1
pset_32 x,y,rgb(255,255,255)
next
next
next
etime=timer
pset_32time=etime-stime
screenunlock
print "set_clip32 ="; set_clip32time
print "set_32 ="; set_32time
print "set_fast32 ="; set_fast32time
print "basic ptr ="; basic_32time
print "pset(x,y) ="; pset_32time
sleep
end
EDIT:my new results basic ptr and shift !!!1.18!!!
Code: set_clip32 = 1.78
set_32 = 1.34
set_fast32 = 1.15
basic ptr = 1.18
pset(x,y) = 9.30
sorry about my english
Posts: 1,439
Threads: 15
Joined: Apr 2003
PSET has to deal with QB-compatible coordinate scaling, floating-point coordinates, WINDOW, etc., which is why it's a bit slower. See the source for the final word.
|