05-08-2006, 01:38 AM
yetifoot: Here it is my entry, and it's slower than your code.
Can' t think of anything that would make it faster. I profiled it and it stays a 55% of the time putting data to the file. I guess my way of putting data at 255 different offsets in the file requires a lot of disk cache flushes, that makes it slow. QB1.1 version was fast because there were only 10 offsets to deal with.
Congratulations, yetifoot: unless some one else comes with more code you are the winner in the FB category!
About radix sort I posted a litle demo of in-memory sorting at The Code Post that's faster than any implementation of quicksort i know of.
Can' t think of anything that would make it faster. I profiled it and it stays a 55% of the time putting data to the file. I guess my way of putting data at 255 different offsets in the file requires a lot of disk cache flushes, that makes it slow. QB1.1 version was fast because there were only 10 offsets to deal with.
Congratulations, yetifoot: unless some one else comes with more code you are the winner in the FB category!
About radix sort I posted a litle demo of in-memory sorting at The Code Post that's faster than any implementation of quicksort i know of.
Code:
'Antoni's entry for the FB category
'QBN Forums External Sort Challenge 5 -2006
'---------------------------------------------
option explicit
'uses radix sort
CONST regsize = 4
CONST passes = 3
CONST numkeys = 255
CONST maxi = 9999999
'buffers
dim shared as uinteger ptr s1,s2
'tallies
DIM SHARED c(numkeys) AS INTEGER
DIM SHARED d(numkeys) AS INTEGER
DIM SHARED cd(numkeys, passes) AS LONG
dim shared filerecs,cnt
'-------------------------------------------------
SUB acumbig
dim i,i1,i2,j
FOR i = 0 TO passes
i1 = 0
FOR j = 0 TO numkeys
i2 = cd(j, i): cd(j, i) = (i1*4)+1: i1 = i1 + i2
NEXT
NEXT
? "A";
END SUB
'--------------------------------------------
FUNCTION getform
GET #1,,*s1,maxi+1
cnt+=maxi
Function=iif (eof(1),filerecs-cnt ,maxi)
?"G";
END FUNCTION
'---------------------------------------------
SUB putform (digit)
'copy 255 mem buffers to 255 offsets of temp file
dim i
FOR i = 0 TO numkeys
SEEK #2, cd(i, digit)
PUT #2,,s2[d(i)],c(i) - d(i)
cd(i, digit)=SEEK(2)
NEXT
?"P";
END SUB
'------------------------------------------------
SUB radix (digit, max)
dim p as ubyte ptr,j,t
p = cptr(ubyte ptr,s1) + digit
FOR j = 0 TO max
t = *p
s2[c(t)] = s1[j]
c(t) += 1
p+=regsize
NEXT
?"x";
END SUB
'--------------------------------------------------
SUB tallybig (max)
dim p as ubyte ptr,j,k
p=cptr(ubyte ptr,s1)
FOR j = 0 TO max
FOR k = 0 TO passes
cd(*p, k) += 1
p+=1
NEXT
'p+=regsize
NEXT
?"T";
END SUB
'--------------------------------------------------
SUB tallysmall (max, digit)
dim p as ubyte ptr,t,j,i1,i2
p=cptr(ubyte ptr,s1)+digit
erase c
FOR j = 0 TO max
c(*p) += 1
p+=regsize
NEXT
i1 = 0
FOR j = 0 TO numkeys
i2 = c(j): c(j) = i1: d(j) = i1: i1 = i1 + i2
NEXT
?"t";
END SUB
'-----------------------------------------------------
dim f1,i,t!
dim f1$,f2$
s1=allocate((maxi+1) *sizeof(integer))
s2=allocate((maxi+1) *sizeof(integer))
f1$ = "temp1.dat"
f2$ = "temp2.dat"
t! = TIMER
'data buffers
OPEN "unsortfb.dat" FOR binary AS #1
filerecs=lof(1)\regsize
PRINT "Sorting a file of "; filerecs; " records"
' tally the file offsets
cnt=1
do
tallybig (getform)
loop until eof(1)
acumbig
CLOSE
PRINT TIMER - t!
'radix sort
FOR i = 0 TO passes
if i=0 then
OPEN "unsortfb.dat" FOR binary AS #1
else
OPEN f1$ FOR binary ACCESS READ AS #1
end if
OPEN f2$ FOR binary ACCESS WRITE AS #2
cnt=1
WHILE NOT EOF(1)
f1 = getform
tallysmall f1, i
radix i, f1
putform i
WEND
CLOSE
PRINT "pass "; i+1; " of 4 "; TIMER - t!
SWAP f1$, f2$
NEXT
'format back
NAME f1$ AS "sort.dat"
kill f2$
deallocate s1
deallocate s2
PRINT : PRINT "File sorted in "; TIMER - t!; "seconds"
END
Antoni