algorithms/sorting_algorithms.py at main · don-c-smith/algorithms · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
"""
Let's talk about sorting algorithms.
Sorts basically do 'what it says on the box' - they arrange data in a specified sorted order.
There are many different sorting algorithms, with different strengths, efficiencies, and use cases.
"""


def selection_sort(array: list):
    """
    This is an implementation of a selection sort.
    Selection sort iterates over each element in an array, finds the minimum element in the unsorted part of the array,
    and then swaps it with the iterated element.
    Complexity: O(n^2), where n is the array length. (The quadratic complexity is because of the two nested for loops.)
    """
    array_length = len(array)
    for i in range(array_length):  # For each index value in the array
        min_idx = i  # Initialize the minimum index value as the iterated element

        for j in range(i+1, array_length):  # For each element in the unsorted portion of the array
            if array[j] < array[min_idx]:  # If the iterated element is less than the current value of min_idx
                min_idx = j  # Assign the lower value to min_idx

        array[i], array[min_idx] = array[min_idx], array[i]  # Swap the new minimum element with the element at index i

    return array  # Return the sorted array


def insertion_sort(array: list):
    """
    This is an implementation of an insertion sort.
    Insertion sort iteratively inserts each element into its correct position in the sorted part of the array.
    Complexity: O(n^2), where n is the array length. (Quadratic complexity comes from the nested for and while loops.)
    """
    array_length = len(array)
    for i in range(1, array_length):
        key = array[i]  # Initialize a 'key' value as the iterated element, which will be 'inserted' into sorted order

        j = i-1  # Initialize a value j representing the last element of the sorted section of the array
        while j >= 0 and key < array[j]:  # Boundary check and key comparison
            array[j + 1] = array[j]  # Move elements that are greater than the key one position 'ahead' in the array
            j -= 1  # Assess the next element

        array[j + 1] = key  # Insert the key at the appropriate position

    return array  # Return the sorted array


def bubble_sort(array: list):
    """
    This is an implementation of a bubble sort.
    Bubble sort iterates through an array, compares adjacent elements, and swaps them if they are in the wrong order.
    This process is repeated until the list is fully sorted.
    Complexity: O(n^2), where n is the array length. (The quadratic complexity is because of the two nested for loops.)
    """
    array_length = len(array)

    for i in range(array_length):  # For each element index in the array

        for j in range(0, array_length-i-1):  # The last 'i' elements are already sorted, so we avoid them
            if array[j] > array[j+1]:  # If the iterated element is greater than the next element
                array[j], array[j+1] = array[j+1], array[j]  # Swap those two elements

    return array  # Return the sorted array


def merge_sort(array: list):
    """
    This is an implementation of a merge sort, which requires a recursive solution.
    A merge sort divides the input array into two halves, calls itself recursively for those halves, and then merges
    the two sorted halves. Note that this sort is input order-agnostic.
    Complexity: O(n log n), where n is the array length.
    """
    def merge(left, right):
        """
        This 'helper' function merges two sorted sub-arrays into a single sorted array.
        """
        merged = []  # Initialize an empty list
        i = j = 0  # Initialize index positions to zero

        # Iterate through both arrays and append the smallest of both elements to 'merged'
        while i < len(left) and j < len(right):
            if left[i] < right[j]:
                merged.append(left[i])
                i += 1

            else:
                merged.append(right[j])
                j += 1

        # When we run out of elements in the sub-arrays, add any remaining elements to 'merged'
        merged.extend(left[i:])
        merged.extend(right[j:])

        return merged

    # Base Case
    # If the array is of length 1 or less, it's already fully sorted
    if len(array) <= 1:
        return array

    # Recursive Case
    # Begin by dividing the array elements into 2 halves
    midpoint = len(array) // 2

    # Make the recursive calls
    left = merge_sort(array[:midpoint])
    right = merge_sort(array[midpoint:])

    # Merge the sorted sub-arrays by calling the 'merge' helper function and return the sorted list
    return merge(left, right)


def quick_sort(array: list):
    """
    This is an implementation of a quick sort.
    Quick sort works by selecting a 'pivot' element from the array and partitioning the other elements into
    two sub-arrays, according to whether they are less than or greater than the pivot.
    The sub-arrays are then sorted recursively. This sort therefore requires a recursive solution.
    This implementation uses random pivot selection. This method helps improve performance over the traditional choice
    of selecting the last element as the pivot value, especially in worst-case scenarios for the sort.
    Complexity: O(n log n) on average, O(n^2) in the worst case.
    """
    import random  # This library is required for random pivot selection

    def partition(low, high):
        """
        This 'helper' function partitions the array into two parts around a randomly chosen pivot.
        """
        # Randomly select a pivot and swap it with the last element
        pivot_index = random.randint(low, high)  # Choose a random index value from within the appropriate range
        array[pivot_index], array[high] = array[high], array[pivot_index]
        pivot = array[high]

        i = low - 1  # Initialize a pointer to track the position where the next smaller element should be placed

        # Iterate over the array and compare all elements with the pivot
        for j in range(low, high):
            if array[j] <= pivot:  # If current element is less than or equal to the pivot value
                i += 1  # Increment the index of the smaller element
                array[i], array[j] = array[j], array[i]  # And perform the swap using multiple assignment

        # Place the pivot in the correct position by swapping it with the element at index i+1
        array[i + 1], array[high] = array[high], array[i + 1]  # Perform the swap using multiple assignment
        return i + 1  # And return the partition index

    def quick_sort_main(low, high):
        """
        The main function that implements the quick sort by recursively sorting the partitions
        """
        if low < high:
            part_idx = partition(low, high)  # This is the index of the partition value

            # Recursive calls: sort elements before and after the partition value
            quick_sort_main(low, part_idx - 1)
            quick_sort_main(part_idx + 1, high)

    quick_sort_main(0, len(array) - 1)  # Initiate the quick sort
    return array  # Finally, return the sorted array


def heap_sort(array: list):
    """
    This is an implementation of a heap sort.
    This function first builds a max-heap and then repeatedly extracts the maximum element from the heap.
    It then rebuilds the heap until the entire array is sorted.
    Complexity: O(n log n), where n is the array length.
    """

    def heapify(heap_size, root_index):
        """
        Transforms a subtree rooted with a node at root_index into a max heap
        Note that heap_size is simply the number of elements in the heap
        """
        largest = root_index  # Initialize the largest value as the root index
        left_child = 2 * root_index + 1  # Create the left 'child' node
        right_child = 2 * root_index + 2  # Create the right 'child' node

        # Check if the left child exists and is greater than the root value
        if left_child < heap_size and array[left_child] > array[largest]:
            largest = left_child  # If so, reassign the largest value

        # Check if the right child exists and is greater than the largest value assessed thus far
        if right_child < heap_size and array[right_child] > array[largest]:
            largest = right_child  # If so, reassign the largest value

        if largest != root_index:  # If the largest value is not the root value,
            array[root_index], array[largest] = array[largest], array[root_index]  # Swap it with the root value
            heapify(heap_size, largest)  # And continue calling the function recursively

    array_length = len(array)

    # Build a max-heap iteratively
    for i in range(array_length // 2 - 1, -1, -1):  # Iterate over the *non-leaf* nodes in reverse order
        heapify(array_length, i)  # Ensure that the subtree rooted at index 'i' satisfies the max heap property

    # Extract elements one by one from the max-heap
    for i in range(array_length - 1, 0, -1):  # Iterate backwards through the array
        # Swap the maximum of the heap (at index 0) with the last element of the unsorted part of the array (at index i)
        array[i], array[0] = array[0], array[i]

        heapify(i, 0)  # Restore the max-heap property in the reduced heap

    return array  # Finally, return the sorted array


def tim_sort(array: list):
    """
    This is an implementation of a more advanced sort, the TimSort algorithm.
    This is the default algorithm behind Python's native .sort() method.
    A TimSort is a hybrid sort derived from combining the merge sort and insertion sort algorithms.
    It is designed to perform well on many kinds of real-world data. It divides the array into segments known as 'runs'
    and first sorts these runs using insertion sort, then merges the runs using a merge sort.
    Complexity: O(n log n) where n is the array length.
    """
    min_run_size = 32  # Defining the minimum size of the 'runs' - 32 is a well-tested by-convention value

    def tim_insertion_sort(start, end):
        """
        Sorts the elements between the start and end values (exclusive) using an insertion sort
        """
        for i in range(start + 1, end):  # Iterate from the second element to the end of the sub-array
            element = array[i]  # Set element to be repositioned equal to the element at the iterated index
            j = i - 1  # Find the position where the element should be inserted
            while j >= start and element < array[j]:  # Move elements greater than the iterated 'element'
                array[j + 1] = array[j]  # By shifting those elements to the right
                j -= 1  # Assess the next element
            array[j + 1] = element  # And place the iterated element in its correct sorted position

    def tim_merge(left, right):
        """
        Merges two sorted lists into a single sorted list
        """
        if not left or not right:  # If either of the two lists is empty
            return left or right  # Return the other list

        result = []  # Initialize the merged list
        i = j = 0  # Initialize pointers for the left and right lists, both equal to zero

        # Iterate until the merged list contains all the elements from both lists
        while len(result) < len(left) + len(right):
            # Append smaller elements to the result list
            if left[i] < right[j]:
                result.append(left[i])
                i += 1

            else:
                result.append(right[j])
                j += 1

            # Append the remaining elements if the end of one of the lists has been reached
            if i == len(left) or j == len(right):
                result.extend(left[i:] or right[j:])
                break  # Exit the loop if the condition is satisfied
        return result

    # Step 1 of the TimSort: Sort the individual 'runs' of size 'min_run_size' using an insertion sort
    array_length = len(array)
    for start in range(0, array_length, min_run_size):  # Iterate over the array in steps of size 'min_run_size'
        end = min(start + min_run_size, array_length)  # Define the end of each iterated 'run'
        tim_insertion_sort(start, end)  # Perform the insertion sort on each run

    # Step 2 of the TimSort: Merge the sorted 'runs' iteratively, doubling the size in each iteration
    size = min_run_size
    while size < array_length:  # Loop as long as the size of the run is less than the total array length
        for left in range(0, array_length, size * 2):  # Iterate over array in steps of 'size * 2'
            middle = min(array_length, left + size)  # Calculate the middle index value
            right = min(array_length, left + size * 2)  # Calculate the end index value
            if middle < right:  # If we can legitimately merge the two segments
                merged_list = tim_merge(array[left:middle], array[middle:right])  # Merge the two halves
                array[left:left + len(merged_list)] = merged_list  # Update the array with the newly-merged list
        size *= 2  # And double the value of 'size' for the next iteration

    return array  # Finally, return the sorted array


def radix_sort(array: list):
    """
    This is an implementation of a Radix sort that sorts integers.
    Radix sorts are non-comparative sorts insofar as they don't directly compare elements with each other. Instead, they
    process each 'digit' of the elements to be sorted, from the least significant to the most significant digit.
    Radix sort is very efficient for sorting large integers or strings of characters where the length of the keys is
    relatively small compared to the range of the key values.
    Example: sorting an unsorted array of 1,000,000 eight-digit social security numbers.
    Complexity: O(n*k) where n is array length (number of elements) and k is the number of digits in the maximum value.
    """
    # The first step is to define a 'counting sort' subroutine which will serve to sort the individual digits
    def counting_sort(count_array, exp):
        """
        A subroutine to perform counting sort on an array according to the digit represented by exp (exponent).
        """
        n = len(count_array)
        output = [0] * n  # Initialize the output array
        count = [0] * 10  # Count array for storing the count of occurrences of each possible digit

        # Store the count of occurrences of each digit in the count array
        for i in range(n):  # For each index value in the array
            index = (count_array[i] // exp) % 10  # Calculate the digit at each position of each element in the array
            count[index] += 1  # Update the count array

        # Change count[i] so that count[i] contains actual position of that digit in the output array
        for i in range(1, 10):  # For each digit from 1 to 9
            count[i] += count[i - 1]  # Update value at count[i] to accumulate the count

        # Build the output array - i.e. perform the actual sort
        i = n - 1  # Initialize i to the last index of the array
        while i >= 0:  # Iterate backward through the array, starting from the last element, as long as values remain
            index = (count_array[i] // exp) % 10  # For each element, isolate the digit at the current sorting position
            output[count[index] - 1] = count_array[i]  # Place each element into the correct position in output array
            count[index] -= 1  # Then decrement the count at count[index]
            i -= 1  # And decrement i

        # Copy the output array back to count_array
        for i in range(n):
            count_array[i] = output[i]

    # Find the maximum number to know the number of digits
    max_num = max(array)

    # Run counting sort for every digit. Instead of passing the digit number, pass 'exp.'
    # Note that exp is 10^i where i is current digit number
    exp = 1
    while max_num // exp > 0:
        counting_sort(array, exp)
        exp *= 10