Login | Register   
LinkedIn
Google+
Twitter
RSS Feed
Download our iPhone app
TODAY'S HEADLINES  |   ARTICLE ARCHIVE  |   FORUMS  |   TIP BANK
Browse DevX
Sign up for e-mail newsletters from DevX


Tip of the Day
Language: VB5, VB6
Expertise: Advanced
Jan 13, 2003

Ternary QuickSort - A modification of QuickSort

' Ternary QuickSort.  See the summary of QuickSort for background before 
' reading this one.  Ternary QuickSort (also called MultiKey QuickSort) differs 
' from the original QuickSort by examining keys one byte at a time (like 
' RadixSort), and by handling keys in three categories -- less than pivot,
'  equal to pivot, and more than pivot -- instead of two.  (Separate handling 
' of equal keys is why this sort is stable while ordinary QuickSort is not.)  
' As in QuickSort, the left and right sublists are processed recursively until 
' they are short enough for InsertionSort.  The equal-to-pivot sublist is also 
' handled recursively, except that a pointer indicating the "depth" of the byte 
' to be examined is advanced to the next position before the call.  Since 
' sublists handed off to InsertionSort consist of keys whose first bytes are 
' identical up to DEPTH - 1, we use a special version of InsertionSort that 
' compares keys only from DEPTH onward.
'
' Needs no extra arrays, but uses some stack space for recursion.  In practice 
' in VBA, I don't find this sort to be as fast as the original QuickSort,
'  but results may differ in languages that offer faster access to bytes within 
' strings.  This algorithm is definitely oriented toward strings; see comments 
' under MSD Radix Sort on possible adaptation to integers and longs.
'
' Reference:  Jon Bentley and Robert Sedgewick, "Fast Algorithms for Sorting 
' and Searching Strings", Proceedings of the 8th Annual ACM-SIAM Symposium on 
' Discrete Algorithms, January 1997.  See also http://www.cs.princeton.edu/~rs/
' strings/.
'
' Speed:  TernaryQuickSort sorts 500,000 random strings in 39 sec; sorts 100186 
' library call numbers in 14 sec; sorts 25479 dictionary words in 1.8 sec 
' (random order), 1.9 sec (presorted) or 1.8 sec (reverse sorted).  Timed in 
' Excel 2001 on an 800 mhz PowerBook.
'
' Bottom line:  a stable version of QuickSort good for strings,
'  but not (at least in VBA) as fast as the original.

' Usage:  

Dim S1(L To R) As Strings
Dim B1(1 To nChars) As Byte
Dim P1(L To R) As Long

For I = L To R
    S1(I) = GetRandomString()
Next I

StrsToBytes S1, L, R, B1, P1        'a routine that stores the strings in 0-
                                    ' terminated byte
                            'arrays with P1() holding pointers to the first 
                            ' byte of
                            'each string

TernaryQuickSort L, R, B1, P1

' CODE:

Sub TernaryQuickSort(L As Long, R As Long, B() As Byte, P() As Long)
    TernQuick L, R, B, P, 0
End Sub

Sub TernQuick(L As Long, R As Long, B() As Byte, P() As Long, _
    ByVal DEPTH As Integer)
    Dim TMP As Long
    Dim I As Long
    Dim J As Long
    Dim pMED As Long
    Dim Pivot As Integer
    Dim OuterL As Long
    Dim InnerL As Long
    Dim InnerR As Long
    Dim OuterR As Long
    Dim DIF As Long
    Dim N As Long
    Dim SwapN As Long
    Dim NLO As Long
    Dim NHI As Long
    Dim NEQ As Long

    N = 1 + R - L
    'Short sublists will be handled by lower overhead InsertionSort.
    If N > 10 Then
    'Get a pivot value from the median of three or nine keys.
        pMED = BGetMed(B, P, L, N, DEPTH)
    'Swap the median into the leftmost position.
        TMP = P(L)
        P(L) = P(pMED)
        P(pMED) = TMP
    'Our pivot will be the byte value at DEPTH.
        Pivot = B(P(L) + DEPTH)
    'Set up two pointers on the left and two on the right.
        OuterL = L
        InnerL = OuterL
        OuterR = R
        InnerR = OuterR
        Do
        'Look for a lefthand key/pointer to swap.
            Do While InnerL <= InnerR
         'DIF is the key's byte minus the Pivot byte.
                DIF = B(P(InnerL) + DEPTH) - Pivot
         'If the key's byte is greater, we've found a pointer to swap to the 
         ' right side.
                If DIF > 0 Then Exit Do
         'If our byte is equal to the Pivot byte, we swap it to the extreme 
         ' left end.
                If DIF = 0 Then
                    TMP = P(OuterL)
                    P(OuterL) = P(InnerL)
                    P(InnerL) = TMP
                    OuterL = OuterL + 1
                End If
         'If our byte is less than Pivot, we just scan over it.
                InnerL = InnerL + 1
            Loop
        'Now look for a righthand key/pointer to swap.
            Do While InnerL <= InnerR
                DIF = B(P(InnerR) + DEPTH) - Pivot
         'If the key's byte is less, we've found a pointer to swap to the left 
         ' side.
                If DIF < 0 Then Exit Do
         'If our byte is equal to the Pivot byte, we swap it to the extreme 
         ' right end.
                If DIF = 0 Then
                    TMP = P(OuterR)
                    P(OuterR) = P(InnerR)
                    P(InnerR) = TMP
                    OuterR = OuterR - 1
                End If
                InnerR = InnerR - 1
            Loop
        'If the inner pointers have crossed, we're done.
            If InnerL > InnerR Then Exit Do
        'Otherwise, we do the left/right swap we just set up.
            TMP = P(InnerL)
            P(InnerL) = P(InnerR)
            P(InnerR) = TMP
            InnerL = InnerL + 1
            InnerR = InnerR - 1
        Loop
    'We've arranged pointers to equal bytes on the far left and right,
    '  pointers to lower bytes 
    'on the inner left, and pointers to higher bytes on the inner right.  Now 
    ' we will swap the 
    'equals to the center, between the lowers and the highers.
        NLO = InnerL - OuterL
        NHI = OuterR - InnerR
        NEQ = N - (NLO + NHI)
        If OuterL - L < NLO Then SwapN = OuterL - L Else SwapN = NLO
        I = L
        J = InnerL - SwapN
    'Move the lefthand equals to center.
        Do While SwapN > 0
            TMP = P(I)
            P(I) = P(J)
            P(J) = TMP
            I = I + 1
            J = J + 1
            SwapN = SwapN - 1
        Loop
        If R - OuterR < NHI Then SwapN = R - OuterR Else SwapN = NHI
        I = InnerL
        J = R + 1 - SwapN
    'Move the righthand equals to center.
        Do While SwapN > 0
            TMP = P(I)
            P(I) = P(J)
            P(J) = TMP
            I = I + 1
            J = J + 1
            SwapN = SwapN - 1
        Loop
    'If there are more bytes, we increment DEPTH and recurse on the equals.
        If B(P(L+NLO) + DEPTH) <> 0 Then TernQuick L+NLO, L+NLO+NEQ-1, B, P, _
            DEPTH+1
    'Now we recurse on the lowers and highers; we do the shorter sublist first 
    ' to hold stack 
    'depth to log N.
        If NLO < NHI Then
            TernQuick L, L + NLO - 1, B, P, DEPTH
            TernQuick L + NLO + NEQ, L + NLO + NEQ + NHI - 1, B, P, DEPTH
        Else
            TernQuick L + NLO + NEQ, L + NLO + NEQ + NHI - 1, B, P, DEPTH
            TernQuick L, L + NLO - 1, B, P, DEPTH
        End If
    Else
    'A special version of InsertionSort that compares keys starting at depth 
    ' (since the
    'sublists we hand off to it will have identical prefixes.
        DeepInsertS B, P, L, N, DEPTH
    End If
End Sub

Function BGetMed(B() As Byte, P() As Long, L As Long, N As Long, _
    DEPTH As Integer) As Long
    Dim D As Long
    Dim PL As Long
    Dim PM As Long
    Dim PN As Long

        PL = L
        PN = L + N - 1
        PM = (PL + PN) \ 2
        If N > 30 Then
            D = N \ 8
            PL = BMed3(B, P, PL, PL + D, PL + 2 * D, DEPTH)
            PM = BMed3(B, P, PM - D, PM, PM + D, DEPTH)
            PL = BMed3(B, P, PN - 2 * D, PN - D, PN, DEPTH)
        End If
        BGetMed = BMed3(B, P, PL, PM, PN, DEPTH)
End Function

Function BMed3(B() As Byte, P() As Long, I As Long, J As Long, K As Long, _
    DEPTH As Integer) As Long
    Dim CI As Byte
    Dim CJ As Byte
    Dim CK As Byte
    
    CI = B(P(I) + DEPTH)
    CJ = B(P(J) + DEPTH)
    CK = B(P(K) + DEPTH)
    If (CI <= CJ And CJ <= CK) Or (CI >= CJ And CJ >= CK) Then
        BMed3 = J
    ElseIf (CJ <= CI And CI <= CK) Or (CJ >= CI And CI >= CK) Then
        BMed3 = I
    ElseIf (CI <= CK And CK <= CJ) Or (CI >= CK And CK >= CJ) Then
        BMed3 = K
    End If
End Function

Sub DeepInsertS(B() As Byte, P() As Long, L As Long, N As Long, D As Integer)
    Dim LP As Long
    Dim RP As Long
    Dim TMP As Long
    Dim I As Long
    Dim J As Long
    
    For RP = L + 1 To L + N - 1
        TMP = P(RP)
        For LP = RP To L + 1 Step -1
            I = TMP + D
            J = P(LP - 1) + D
            Do While B(I) = B(J)
                If B(I) = 0 Or B(J) = 0 Then Exit Do
                I = I + 1
                J = J + 1
            Loop
            If CInt(B(I)) - CInt(B(J)) < 0 Then P(LP) = P(LP - 1) Else Exit For
        Next LP
        P(LP) = TMP
    Next RP
End Sub

Sub Strs2Bytes(A() As String, L As Long, R As Long, B() As Byte, P() As Long)
    Dim I As Long
    Dim nPtrs As Long
    Dim nBytes As Long
    Dim DEPTH As Integer
    
    nBytes = 0
    nPtrs = 0
    For I = L To R
        nBytes = nBytes + Strings.Len(A(I)) + 1
        nPtrs = nPtrs + 1
    Next I
    ReDim B(1 To nBytes)
    ReDim P(1 To nPtrs)
    
    nPtrs = 1
    nBytes = 1
    For I = L To R
        P(nPtrs) = nBytes
        For DEPTH = 1 To Strings.Len(A(I))
            B(nBytes) = Asc(Strings.MID(A(I), DEPTH, 1))
            nBytes = nBytes + 1
        Next DEPTH
        B(nBytes) = 0
        nBytes = nBytes + 1
        nPtrs = nPtrs + 1
    Next I
End Sub


David B. Ring
 
Comment and Contribute

 

 

 

 

 


(Maximum characters: 1200). You have 1200 characters left.

 

 

Sitemap