Alright... if by "somewhere" you mean here, then here it is.
Code:
'NDEM Sort
'Sorts files internally on UIntegers, with filesizes up to the maximum file size of your file system.
'24 May 2006
'No-Comment version
'No-Optimization version
'Hideously bugged version
Option Dynamic
Option Explicit
Declare Sub PrepareIndex()
Declare Sub DestroyIndex()
Declare Sub CreateIndex()
Declare Sub ConvertIndex()
#include "crt.bi"
#define BUFFERSIZE_STD 131072
#define INTEGERSIZE_STD (BUFFERSIZE_STD\4)
#define WRITEBUFFER_STD 512
Type BufferType
size As Integer
data As UInteger Ptr
End Type
Dim Shared FileAccess As Integer
Dim Shared length As UInteger
Dim Shared Buffers(255) As BufferType
Dim timestarted As Single
timestarted = Timer
Cls
Call PrepareIndex()
Call CreateIndex()
Call ConvertIndex()
Call DestroyIndex()
Print "Process completed. Took "; Int(Timer - timestarted); " seconds."
'Sleep
End
Private Sub ConvertIndex()
Print "Converting index... ";
Dim starttime As Single
starttime = Timer
Dim i As UInteger, j As Integer, k As Integer, l As Integer, z As UInteger
Dim buffer(INTEGERSIZE_STD-1) As UInteger, got As UInteger = 0, buffersiz As Integer = BUFFERSIZE_STD, length As UInteger
Dim MasterIndexArray(16777215) As UInteger
For i = 0 To 255
j = FreeFile
Open ".\Index\" + Str$(i) + ".index" For Binary As #j
length = LoF(j)
got = 0
buffersiz = BUFFERSIZE_STD
ReDim buffer(INTEGERSIZE_STD-1) As UInteger
Do Until got >= length
If got + buffersiz > length Then
buffersiz = length - got
ReDim buffer(buffersiz\4-1) As UInteger
End If
Get #j, , buffer()
z = UBound(buffer)
For k = 0 To z
MasterIndexArray(buffer(k)) += 1
Next k
got += buffersiz
Loop
Close #j
Open "sort.dat" For Binary As #j
Seek #j, LoF(j)+1
For k = 0 To 16777215&
If MasterIndexArray(k) > 0 Then
z = k Xor (i Shl 24)
For got = 1 To MasterIndexArray(k)
Put #j, , z
Next got
MasterIndexArray(k) = 0
End If
Next k
Close #j
Locate 3,1: Print "Converting index... "; Int(i/255*1000)/10; "% "
Next i
Locate 3,1: Print "Converting index... 100%. Took "; Int(Timer - starttime); " seconds"
End Sub
Private Sub CreateIndex()
Print "Creating index... ";
Dim starttime As Single, lasttimed As Single
starttime = Timer
lasttimed = Timer
length = LoF(FileAccess)
Dim got As UInteger = 0, buffersiz As Integer = BUFFERSIZE_STD, i As Integer, z As UInteger
Dim buffer(INTEGERSIZE_STD-1) As UInteger, relevantbyte As UByte, lessrelevantbytes As UInteger
Dim writebuffer(INTEGERSIZE_STD-1) As UInteger, j As Integer
Do Until got >= length
If got + buffersiz > length Then
buffersiz = length - got
ReDim buffer(buffersiz\4-1) As UInteger
End If
Get #FileAccess, , buffer()
z = UBound(buffer)
For i = 0 To z
relevantbyte = ((buffer(i) And &HFF000000) Shr 24)
lessrelevantbytes = (buffer(i) And &HFFFFFF)
Buffers(relevantbyte).data[Buffers(relevantbyte).size] = lessrelevantbytes
Buffers(relevantbyte).size += 1
If Buffers(relevantbyte).size = INTEGERSIZE_STD Then
MemCpy(@writebuffer(0), Buffers(relevantbyte).data, BUFFERSIZE_STD)
j = FreeFile
Open ".\Index\" + Str$(relevantbyte) + ".index" For Binary As #j
Put #j, LoF(j)+1, writebuffer()
Close #j
Buffers(relevantbyte).size = 0
End If
Next i
got += buffersiz
If (Timer - lasttimed > 2.5) Then Locate 2,1: Print "Creating index... "; Str$(Int(got/length*1000)/10); "% ": lasttimed = Timer
Loop
For i = 0 To 255
If Buffers(i).size > 0 Then
ReDim writebuffer(Buffers(i).size - 1) As UInteger
MemCpy(@writebuffer(0), Buffers(i).data, Buffers(i).size * 4)
j = FreeFile
Open ".\Index\" + Str$(i) + ".index" For Binary As #j
Put #j, LoF(j)+1, writebuffer()
Close #j
Buffers(i).size = 0
End If
Next i
Close #FileAccess
Locate 2,1: Print "Creating index... 100%. Took "; Int(Timer - starttime); " seconds"
End Sub
Private Sub PrepareIndex()
Print "Preparing index... ";
MkDir "Index"
Dim i As Integer, j As Integer
j = FreeFile
For i = 0 To 255
Open ".\Index\" + Str$(i) + ".index" For Output As #j
Close #j
Buffers(i).data = CAllocate(BUFFERSIZE_STD)
Buffers(i).size = 0
Next i
j = FreeFile
Open "sort.dat" For Output As #j
Close #j
j = FreeFile
FileAccess = j
Open "unsortfb.dat" For Binary As #j
Print "100%."
End Sub
Private Sub DestroyIndex()
Print "Destroying index... ";
Dim i As Integer
For i = 0 To 255
Kill ".\Index\" + Str$(i) + ".index"
Deallocate Buffers(i).data
Buffers(i).size = 0
Next i
RmDir "Index"
Print "100%."
End Sub
As I said, the code is highly unoptimized, uncommented, and looks hideous
Anyway, as you might've noticed it doesn't really use a conventional sorting algorithm like bubble sort or so. And to be able to run it quickly, I hope you have a fast disk cache... ^^
The algorithm itself is probably really sucky, but I wanted to demonstrate another "unconventional" solution to the problem. It also works with files up to 2GB (didn't test though, dont have enough free space).
Hope it's at least worth 1 word (I'm so noobish!
),
- Neo