|
Language: VB5, VB6 Expertise: Advanced
Jan 13, 2003
Ternary QuickSort - A modification of QuickSort
' Ternary QuickSort. See the summary of QuickSort for background before
' reading this one. Ternary QuickSort (also called MultiKey QuickSort) differs
' from the original QuickSort by examining keys one byte at a time (like
' RadixSort), and by handling keys in three categories -- less than pivot,
' equal to pivot, and more than pivot -- instead of two. (Separate handling
' of equal keys is why this sort is stable while ordinary QuickSort is not.)
' As in QuickSort, the left and right sublists are processed recursively until
' they are short enough for InsertionSort. The equal-to-pivot sublist is also
' handled recursively, except that a pointer indicating the "depth" of the byte
' to be examined is advanced to the next position before the call. Since
' sublists handed off to InsertionSort consist of keys whose first bytes are
' identical up to DEPTH - 1, we use a special version of InsertionSort that
' compares keys only from DEPTH onward.
'
' Needs no extra arrays, but uses some stack space for recursion. In practice
' in VBA, I don't find this sort to be as fast as the original QuickSort,
' but results may differ in languages that offer faster access to bytes within
' strings. This algorithm is definitely oriented toward strings; see comments
' under MSD Radix Sort on possible adaptation to integers and longs.
'
' Reference: Jon Bentley and Robert Sedgewick, "Fast Algorithms for Sorting
' and Searching Strings", Proceedings of the 8th Annual ACM-SIAM Symposium on
' Discrete Algorithms, January 1997. See also http://www.cs.princeton.edu/~rs/
' strings/.
'
' Speed: TernaryQuickSort sorts 500,000 random strings in 39 sec; sorts 100186
' library call numbers in 14 sec; sorts 25479 dictionary words in 1.8 sec
' (random order), 1.9 sec (presorted) or 1.8 sec (reverse sorted). Timed in
' Excel 2001 on an 800 mhz PowerBook.
'
' Bottom line: a stable version of QuickSort good for strings,
' but not (at least in VBA) as fast as the original.
' Usage:
Dim S1(L To R) As Strings
Dim B1(1 To nChars) As Byte
Dim P1(L To R) As Long
For I = L To R
S1(I) = GetRandomString()
Next I
StrsToBytes S1, L, R, B1, P1 'a routine that stores the strings in 0-
' terminated byte
'arrays with P1() holding pointers to the first
' byte of
'each string
TernaryQuickSort L, R, B1, P1
' CODE:
Sub TernaryQuickSort(L As Long, R As Long, B() As Byte, P() As Long)
TernQuick L, R, B, P, 0
End Sub
Sub TernQuick(L As Long, R As Long, B() As Byte, P() As Long, _
ByVal DEPTH As Integer)
Dim TMP As Long
Dim I As Long
Dim J As Long
Dim pMED As Long
Dim Pivot As Integer
Dim OuterL As Long
Dim InnerL As Long
Dim InnerR As Long
Dim OuterR As Long
Dim DIF As Long
Dim N As Long
Dim SwapN As Long
Dim NLO As Long
Dim NHI As Long
Dim NEQ As Long
N = 1 + R - L
'Short sublists will be handled by lower overhead InsertionSort.
If N > 10 Then
'Get a pivot value from the median of three or nine keys.
pMED = BGetMed(B, P, L, N, DEPTH)
'Swap the median into the leftmost position.
TMP = P(L)
P(L) = P(pMED)
P(pMED) = TMP
'Our pivot will be the byte value at DEPTH.
Pivot = B(P(L) + DEPTH)
'Set up two pointers on the left and two on the right.
OuterL = L
InnerL = OuterL
OuterR = R
InnerR = OuterR
Do
'Look for a lefthand key/pointer to swap.
Do While InnerL <= InnerR
'DIF is the key's byte minus the Pivot byte.
DIF = B(P(InnerL) + DEPTH) - Pivot
'If the key's byte is greater, we've found a pointer to swap to the
' right side.
If DIF > 0 Then Exit Do
'If our byte is equal to the Pivot byte, we swap it to the extreme
' left end.
If DIF = 0 Then
TMP = P(OuterL)
P(OuterL) = P(InnerL)
P(InnerL) = TMP
OuterL = OuterL + 1
End If
'If our byte is less than Pivot, we just scan over it.
InnerL = InnerL + 1
Loop
'Now look for a righthand key/pointer to swap.
Do While InnerL <= InnerR
DIF = B(P(InnerR) + DEPTH) - Pivot
'If the key's byte is less, we've found a pointer to swap to the left
' side.
If DIF < 0 Then Exit Do
'If our byte is equal to the Pivot byte, we swap it to the extreme
' right end.
If DIF = 0 Then
TMP = P(OuterR)
P(OuterR) = P(InnerR)
P(InnerR) = TMP
OuterR = OuterR - 1
End If
InnerR = InnerR - 1
Loop
'If the inner pointers have crossed, we're done.
If InnerL > InnerR Then Exit Do
'Otherwise, we do the left/right swap we just set up.
TMP = P(InnerL)
P(InnerL) = P(InnerR)
P(InnerR) = TMP
InnerL = InnerL + 1
InnerR = InnerR - 1
Loop
'We've arranged pointers to equal bytes on the far left and right,
' pointers to lower bytes
'on the inner left, and pointers to higher bytes on the inner right. Now
' we will swap the
'equals to the center, between the lowers and the highers.
NLO = InnerL - OuterL
NHI = OuterR - InnerR
NEQ = N - (NLO + NHI)
If OuterL - L < NLO Then SwapN = OuterL - L Else SwapN = NLO
I = L
J = InnerL - SwapN
'Move the lefthand equals to center.
Do While SwapN > 0
TMP = P(I)
P(I) = P(J)
P(J) = TMP
I = I + 1
J = J + 1
SwapN = SwapN - 1
Loop
If R - OuterR < NHI Then SwapN = R - OuterR Else SwapN = NHI
I = InnerL
J = R + 1 - SwapN
'Move the righthand equals to center.
Do While SwapN > 0
TMP = P(I)
P(I) = P(J)
P(J) = TMP
I = I + 1
J = J + 1
SwapN = SwapN - 1
Loop
'If there are more bytes, we increment DEPTH and recurse on the equals.
If B(P(L+NLO) + DEPTH) <> 0 Then TernQuick L+NLO, L+NLO+NEQ-1, B, P, _
DEPTH+1
'Now we recurse on the lowers and highers; we do the shorter sublist first
' to hold stack
'depth to log N.
If NLO < NHI Then
TernQuick L, L + NLO - 1, B, P, DEPTH
TernQuick L + NLO + NEQ, L + NLO + NEQ + NHI - 1, B, P, DEPTH
Else
TernQuick L + NLO + NEQ, L + NLO + NEQ + NHI - 1, B, P, DEPTH
TernQuick L, L + NLO - 1, B, P, DEPTH
End If
Else
'A special version of InsertionSort that compares keys starting at depth
' (since the
'sublists we hand off to it will have identical prefixes.
DeepInsertS B, P, L, N, DEPTH
End If
End Sub
Function BGetMed(B() As Byte, P() As Long, L As Long, N As Long, _
DEPTH As Integer) As Long
Dim D As Long
Dim PL As Long
Dim PM As Long
Dim PN As Long
PL = L
PN = L + N - 1
PM = (PL + PN) \ 2
If N > 30 Then
D = N \ 8
PL = BMed3(B, P, PL, PL + D, PL + 2 * D, DEPTH)
PM = BMed3(B, P, PM - D, PM, PM + D, DEPTH)
PL = BMed3(B, P, PN - 2 * D, PN - D, PN, DEPTH)
End If
BGetMed = BMed3(B, P, PL, PM, PN, DEPTH)
End Function
Function BMed3(B() As Byte, P() As Long, I As Long, J As Long, K As Long, _
DEPTH As Integer) As Long
Dim CI As Byte
Dim CJ As Byte
Dim CK As Byte
CI = B(P(I) + DEPTH)
CJ = B(P(J) + DEPTH)
CK = B(P(K) + DEPTH)
If (CI <= CJ And CJ <= CK) Or (CI >= CJ And CJ >= CK) Then
BMed3 = J
ElseIf (CJ <= CI And CI <= CK) Or (CJ >= CI And CI >= CK) Then
BMed3 = I
ElseIf (CI <= CK And CK <= CJ) Or (CI >= CK And CK >= CJ) Then
BMed3 = K
End If
End Function
Sub DeepInsertS(B() As Byte, P() As Long, L As Long, N As Long, D As Integer)
Dim LP As Long
Dim RP As Long
Dim TMP As Long
Dim I As Long
Dim J As Long
For RP = L + 1 To L + N - 1
TMP = P(RP)
For LP = RP To L + 1 Step -1
I = TMP + D
J = P(LP - 1) + D
Do While B(I) = B(J)
If B(I) = 0 Or B(J) = 0 Then Exit Do
I = I + 1
J = J + 1
Loop
If CInt(B(I)) - CInt(B(J)) < 0 Then P(LP) = P(LP - 1) Else Exit For
Next LP
P(LP) = TMP
Next RP
End Sub
Sub Strs2Bytes(A() As String, L As Long, R As Long, B() As Byte, P() As Long)
Dim I As Long
Dim nPtrs As Long
Dim nBytes As Long
Dim DEPTH As Integer
nBytes = 0
nPtrs = 0
For I = L To R
nBytes = nBytes + Strings.Len(A(I)) + 1
nPtrs = nPtrs + 1
Next I
ReDim B(1 To nBytes)
ReDim P(1 To nPtrs)
nPtrs = 1
nBytes = 1
For I = L To R
P(nPtrs) = nBytes
For DEPTH = 1 To Strings.Len(A(I))
B(nBytes) = Asc(Strings.MID(A(I), DEPTH, 1))
nBytes = nBytes + 1
Next DEPTH
B(nBytes) = 0
nBytes = nBytes + 1
nPtrs = nPtrs + 1
Next I
End Sub
David B. Ring
|