05-05-2006, 07:57 PM
Ok, here is my entry for the QB1.1 category.
I'm using a radix sort on the disk, using two auxiliar files, one for input, the other for output. The actual sorting is made in memory, in chunks as big as possible. No aditional merge passes are required.
Due to the ASCII variable length formatting of the input file of this category, there is an aditional de-formatting pass at the start and a reformatting pass at the end.
I'm using a radix sort on the disk, using two auxiliar files, one for input, the other for output. The actual sorting is made in memory, in chunks as big as possible. No aditional merge passes are required.
Due to the ASCII variable length formatting of the input file of this category, there is an aditional de-formatting pass at the start and a reformatting pass at the end.
Code:
'Antoni's entry for the QB1.1 category
'External sort challenge QBN may 2006
'--------------------------------------------------
DEFINT A-Z
DECLARE SUB putform (digit%)
DECLARE SUB radix (i%, max%)
DECLARE SUB tallysmall (max%, i%)
DECLARE SUB tallybig (max%)
DECLARE SUB putunf (max%)
DECLARE SUB putstraight (max%)
DECLARE SUB acumbig ()
DECLARE FUNCTION getunf% ()
DECLARE FUNCTION getform% ()
'Qbasic external sort compo entry by Antoni Gual
'uses radix sort
CLEAR
t! = TIMER
'data buffers
CONST maxi = 10920
DIM SHARED s1(maxi + 1) AS STRING * 6
DIM SHARED s2(maxi + 1) AS STRING * 6
'mem tallies
DIM SHARED c(-1 TO 9) AS INTEGER
DIM SHARED d(-1 TO 9) AS INTEGER
'file tallies
DIM SHARED cd(9, 5) AS LONG
f1$ = "temp1.txt"
f2$ = "temp2.txt"
PRINT "sorting a file of 1M registers": PRINT
'reformat data to right-aligned
OPEN "qb1data.txt" FOR INPUT AS #1
OPEN f1$ FOR OUTPUT AS #2
'poke is used to avoid the horrible asc(mid$(a$,i,1))
DEF SEG = VARSEG(s1(0))
'preformat and tally the file offsets
WHILE NOT EOF(1)
f1 = getunf
tallybig (f1)
putstraight (f1)
WEND
acumbig
CLOSE
PRINT TIMER - t!
'radix sort
FOR i = 5 TO 0 STEP -1
OPEN f1$ FOR INPUT AS #1
OPEN f2$ FOR OUTPUT AS #2
WHILE NOT EOF(1)
f1 = getform
tallysmall f1, i
radix i, f1
putform (i)
WEND
CLOSE
PRINT "pass ";6-i;" of 6 ";TIMER - t!
SWAP f1$, f2$
NEXT
'format back
KILL f2$
OPEN f1$ FOR INPUT AS #1
OPEN "sort.txt" FOR OUTPUT AS #2
WHILE NOT EOF(1)
putunf getform
WEND
CLOSE
KILL f1$
PRINT : PRINT "File sorted in "; TIMER - t!; "seconds"
SYSTEM
'-------------------------------------------------
SUB acumbig
FOR i = 0 TO 5
i1& = 0
FOR j = 0 TO 9
i2& = cd(j, i): cd(j, i) = i1& * 8 + 1: i1& = i1& + i2&
NEXT
NEXT
END SUB
'--------------------------------------------
FUNCTION getform
FOR j = 0 TO maxi
LINE INPUT #1, s1(j)
IF EOF(1) THEN EXIT FOR
NEXT
IF j < maxi THEN getform = j ELSE getform = maxi
END FUNCTION
'--------------------------------------------
FUNCTION getunf
'gets to the output buffer!!!
FOR j = 0 TO maxi
LINE INPUT #1, t$
RSET s1(j) = LTRIM$(RTRIM$(t$))
IF EOF(1) THEN EXIT FOR
NEXT
'PRINT "R";
IF j < maxi THEN getunf = j ELSE getunf = maxi
END FUNCTION
'---------------------------------------------
SUB putform (digit)
'copy 10 mem buffers to 10 offsets of temp file
FOR i = 0 TO 9
SEEK #2, cd(i, digit)
FOR k = d(i) TO c(i) - 1
PRINT #2 + j, s2(k)
NEXT
cd(i, digit) = SEEK(2)
NEXT
END SUB
'----------------------------------------------
SUB putstraight (max)
'copy a sorted memory buffer to disk
FOR k = 0 TO max
PRINT #2, s1(k)
NEXT
END SUB
'----------------------------------------------
SUB putunf (max)
'copy a sorted memory buffer to disk
CONST sp$ = " "
FOR k = 0 TO max
PRINT #2, sp$ + LTRIM$(s1(k)) + sp$
NEXT
END SUB
'-----------------------------------------------------
SUB radix (digit, max)
offs& = VARPTR(s1(0)) + digit
FOR j = 0 TO max
t1 = PEEK(offs&)
IF t1 > 32 THEN t1 = t1 - 48 ELSE t1 = 0
s2(c(t1)) = s1(j)
c(t1) = c(t1) + 1
offs& = offs& + 6
NEXT
END SUB
'----------------------------------------------
SUB tallybig (max)
offs& = VARPTR(s1(0))
FOR j = 0 TO max
FOR k = 5 TO 0 STEP -1
t = PEEK(offs& + k)
IF t > 32 THEN t = t - 48 ELSE t = 0
cd(t, k) = cd(t, k) + 1
NEXT
offs& = offs& + 6
NEXT
END SUB
'--------------------------------------------------
SUB tallysmall (max, digit)
offs& = VARPTR(s1(0)) + digit
ERASE c
FOR j = 0 TO max
t = PEEK(offs&)
IF t > 32 THEN t = t - 48 ELSE t = 0
c(t) = c(t) + 1
offs& = offs& + 6
NEXT
i1 = 0
FOR j = 0 TO 9
i2 = c(j): c(j) = i1: d(j) = i1: i1 = i1 + i2
NEXT
END SUB
Antoni