dcsimg
Login | Register   
LinkedIn
Google+
Twitter
RSS Feed
Download our iPhone app
TODAY'S HEADLINES  |   ARTICLE ARCHIVE  |   FORUMS  |   TIP BANK
Browse DevX
Sign up for e-mail newsletters from DevX

By submitting your information, you agree that devx.com may send you DevX offers via email, phone and text message, as well as email offers about other products and services that DevX believes may be of interest to you. DevX will process your information in accordance with the Quinstreet Privacy Policy.


Tip of the Day
Language: VB5, VB6
Expertise: Advanced
Jan 13, 2003

WEBINAR:

On-Demand

Building the Right Environment to Support AI, Machine Learning and Deep Learning


Ternary QuickSort - A modification of QuickSort

' Ternary QuickSort.  See the summary of QuickSort for background before 
' reading this one.  Ternary QuickSort (also called MultiKey QuickSort) differs 
' from the original QuickSort by examining keys one byte at a time (like 
' RadixSort), and by handling keys in three categories -- less than pivot,
'  equal to pivot, and more than pivot -- instead of two.  (Separate handling 
' of equal keys is why this sort is stable while ordinary QuickSort is not.)  
' As in QuickSort, the left and right sublists are processed recursively until 
' they are short enough for InsertionSort.  The equal-to-pivot sublist is also 
' handled recursively, except that a pointer indicating the "depth" of the byte 
' to be examined is advanced to the next position before the call.  Since 
' sublists handed off to InsertionSort consist of keys whose first bytes are 
' identical up to DEPTH - 1, we use a special version of InsertionSort that 
' compares keys only from DEPTH onward.
'
' Needs no extra arrays, but uses some stack space for recursion.  In practice 
' in VBA, I don't find this sort to be as fast as the original QuickSort,
'  but results may differ in languages that offer faster access to bytes within 
' strings.  This algorithm is definitely oriented toward strings; see comments 
' under MSD Radix Sort on possible adaptation to integers and longs.
'
' Reference:  Jon Bentley and Robert Sedgewick, "Fast Algorithms for Sorting 
' and Searching Strings", Proceedings of the 8th Annual ACM-SIAM Symposium on 
' Discrete Algorithms, January 1997.  See also http://www.cs.princeton.edu/~rs/
' strings/.
'
' Speed:  TernaryQuickSort sorts 500,000 random strings in 39 sec; sorts 100186 
' library call numbers in 14 sec; sorts 25479 dictionary words in 1.8 sec 
' (random order), 1.9 sec (presorted) or 1.8 sec (reverse sorted).  Timed in 
' Excel 2001 on an 800 mhz PowerBook.
'
' Bottom line:  a stable version of QuickSort good for strings,
'  but not (at least in VBA) as fast as the original.

' Usage:  

Dim S1(L To R) As Strings
Dim B1(1 To nChars) As Byte
Dim P1(L To R) As Long

For I = L To R
    S1(I) = GetRandomString()
Next I

StrsToBytes S1, L, R, B1, P1        'a routine that stores the strings in 0-
                                    ' terminated byte
                            'arrays with P1() holding pointers to the first 
                            ' byte of
                            'each string

TernaryQuickSort L, R, B1, P1

' CODE:

Sub TernaryQuickSort(L As Long, R As Long, B() As Byte, P() As Long)
    TernQuick L, R, B, P, 0
End Sub

Sub TernQuick(L As Long, R As Long, B() As Byte, P() As Long, _
    ByVal DEPTH As Integer)
    Dim TMP As Long
    Dim I As Long
    Dim J As Long
    Dim pMED As Long
    Dim Pivot As Integer
    Dim OuterL As Long
    Dim InnerL As Long
    Dim InnerR As Long
    Dim OuterR As Long
    Dim DIF As Long
    Dim N As Long
    Dim SwapN As Long
    Dim NLO As Long
    Dim NHI As Long
    Dim NEQ As Long

    N = 1 + R - L
    'Short sublists will be handled by lower overhead InsertionSort.
    If N > 10 Then
    'Get a pivot value from the median of three or nine keys.
        pMED = BGetMed(B, P, L, N, DEPTH)
    'Swap the median into the leftmost position.
        TMP = P(L)
        P(L) = P(pMED)
        P(pMED) = TMP
    'Our pivot will be the byte value at DEPTH.
        Pivot = B(P(L) + DEPTH)
    'Set up two pointers on the left and two on the right.
        OuterL = L
        InnerL = OuterL
        OuterR = R
        InnerR = OuterR
        Do
        'Look for a lefthand key/pointer to swap.
            Do While InnerL <= InnerR
         'DIF is the key's byte minus the Pivot byte.
                DIF = B(P(InnerL) + DEPTH) - Pivot
         'If the key's byte is greater, we've found a pointer to swap to the 
         ' right side.
                If DIF > 0 Then Exit Do
         'If our byte is equal to the Pivot byte, we swap it to the extreme 
         ' left end.
                If DIF = 0 Then
                    TMP = P(OuterL)
                    P(OuterL) = P(InnerL)
                    P(InnerL) = TMP
                    OuterL = OuterL + 1
                End If
         'If our byte is less than Pivot, we just scan over it.
                InnerL = InnerL + 1
            Loop
        'Now look for a righthand key/pointer to swap.
            Do While InnerL <= InnerR
                DIF = B(P(InnerR) + DEPTH) - Pivot
         'If the key's byte is less, we've found a pointer to swap to the left 
         ' side.
                If DIF < 0 Then Exit Do
         'If our byte is equal to the Pivot byte, we swap it to the extreme 
         ' right end.
                If DIF = 0 Then
                    TMP = P(OuterR)
                    P(OuterR) = P(InnerR)
                    P(InnerR) = TMP
                    OuterR = OuterR - 1
                End If
                InnerR = InnerR - 1
            Loop
        'If the inner pointers have crossed, we're done.
            If InnerL > InnerR Then Exit Do
        'Otherwise, we do the left/right swap we just set up.
            TMP = P(InnerL)
            P(InnerL) = P(InnerR)
            P(InnerR) = TMP
            InnerL = InnerL + 1
            InnerR = InnerR - 1
        Loop
    'We've arranged pointers to equal bytes on the far left and right,
    '  pointers to lower bytes 
    'on the inner left, and pointers to higher bytes on the inner right.  Now 
    ' we will swap the 
    'equals to the center, between the lowers and the highers.
        NLO = InnerL - OuterL
        NHI = OuterR - InnerR
        NEQ = N - (NLO + NHI)
        If OuterL - L < NLO Then SwapN = OuterL - L Else SwapN = NLO
        I = L
        J = InnerL - SwapN
    'Move the lefthand equals to center.
        Do While SwapN > 0
            TMP = P(I)
            P(I) = P(J)
            P(J) = TMP
            I = I + 1
            J = J + 1
            SwapN = SwapN - 1
        Loop
        If R - OuterR < NHI Then SwapN = R - OuterR Else SwapN = NHI
        I = InnerL
        J = R + 1 - SwapN
    'Move the righthand equals to center.
        Do While SwapN > 0
            TMP = P(I)
            P(I) = P(J)
            P(J) = TMP
            I = I + 1
            J = J + 1
            SwapN = SwapN - 1
        Loop
    'If there are more bytes, we increment DEPTH and recurse on the equals.
        If B(P(L+NLO) + DEPTH) <> 0 Then TernQuick L+NLO, L+NLO+NEQ-1, B, P, _
            DEPTH+1
    'Now we recurse on the lowers and highers; we do the shorter sublist first 
    ' to hold stack 
    'depth to log N.
        If NLO < NHI Then
            TernQuick L, L + NLO - 1, B, P, DEPTH
            TernQuick L + NLO + NEQ, L + NLO + NEQ + NHI - 1, B, P, DEPTH
        Else
            TernQuick L + NLO + NEQ, L + NLO + NEQ + NHI - 1, B, P, DEPTH
            TernQuick L, L + NLO - 1, B, P, DEPTH
        End If
    Else
    'A special version of InsertionSort that compares keys starting at depth 
    ' (since the
    'sublists we hand off to it will have identical prefixes.
        DeepInsertS B, P, L, N, DEPTH
    End If
End Sub

Function BGetMed(B() As Byte, P() As Long, L As Long, N As Long, _
    DEPTH As Integer) As Long
    Dim D As Long
    Dim PL As Long
    Dim PM As Long
    Dim PN As Long

        PL = L
        PN = L + N - 1
        PM = (PL + PN) \ 2
        If N > 30 Then
            D = N \ 8
            PL = BMed3(B, P, PL, PL + D, PL + 2 * D, DEPTH)
            PM = BMed3(B, P, PM - D, PM, PM + D, DEPTH)
            PL = BMed3(B, P, PN - 2 * D, PN - D, PN, DEPTH)
        End If
        BGetMed = BMed3(B, P, PL, PM, PN, DEPTH)
End Function

Function BMed3(B() As Byte, P() As Long, I As Long, J As Long, K As Long, _
    DEPTH As Integer) As Long
    Dim CI As Byte
    Dim CJ As Byte
    Dim CK As Byte
    
    CI = B(P(I) + DEPTH)
    CJ = B(P(J) + DEPTH)
    CK = B(P(K) + DEPTH)
    If (CI <= CJ And CJ <= CK) Or (CI >= CJ And CJ >= CK) Then
        BMed3 = J
    ElseIf (CJ <= CI And CI <= CK) Or (CJ >= CI And CI >= CK) Then
        BMed3 = I
    ElseIf (CI <= CK And CK <= CJ) Or (CI >= CK And CK >= CJ) Then
        BMed3 = K
    End If
End Function

Sub DeepInsertS(B() As Byte, P() As Long, L As Long, N As Long, D As Integer)
    Dim LP As Long
    Dim RP As Long
    Dim TMP As Long
    Dim I As Long
    Dim J As Long
    
    For RP = L + 1 To L + N - 1
        TMP = P(RP)
        For LP = RP To L + 1 Step -1
            I = TMP + D
            J = P(LP - 1) + D
            Do While B(I) = B(J)
                If B(I) = 0 Or B(J) = 0 Then Exit Do
                I = I + 1
                J = J + 1
            Loop
            If CInt(B(I)) - CInt(B(J)) < 0 Then P(LP) = P(LP - 1) Else Exit For
        Next LP
        P(LP) = TMP
    Next RP
End Sub

Sub Strs2Bytes(A() As String, L As Long, R As Long, B() As Byte, P() As Long)
    Dim I As Long
    Dim nPtrs As Long
    Dim nBytes As Long
    Dim DEPTH As Integer
    
    nBytes = 0
    nPtrs = 0
    For I = L To R
        nBytes = nBytes + Strings.Len(A(I)) + 1
        nPtrs = nPtrs + 1
    Next I
    ReDim B(1 To nBytes)
    ReDim P(1 To nPtrs)
    
    nPtrs = 1
    nBytes = 1
    For I = L To R
        P(nPtrs) = nBytes
        For DEPTH = 1 To Strings.Len(A(I))
            B(nBytes) = Asc(Strings.MID(A(I), DEPTH, 1))
            nBytes = nBytes + 1
        Next DEPTH
        B(nBytes) = 0
        nBytes = nBytes + 1
        nPtrs = nPtrs + 1
    Next I
End Sub


David B. Ring
 
Comment and Contribute

 

 

 

 

 


(Maximum characters: 1200). You have 1200 characters left.

 

 

Sitemap
Thanks for your registration, follow us on our social networks to keep up-to-date