-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchapter_9.py
More file actions
279 lines (234 loc) · 9.47 KB
/
chapter_9.py
File metadata and controls
279 lines (234 loc) · 9.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
"""
The median-selection algorithm is generally not used in practice, as he overhead of pivot computation is significant.
But this technique is of theoretical interest in relating selection and sorting algorithms.
-- https://www.geeksforgeeks.org/selection-algorithms/
"""
import random
import math
"""
Get the minimum and the maximum elements of A simultaneously.
First we compare each elements in pairs,
then we search the possible minimum in the smaller elements in all pairs,
search the possible maximum in the larger elements in all pairs.
Input: list A.
Output: the minimum & maximum elements in A.
"""
def min_max(A):
n = len(A)
# Comparing in pairs.
for i in range(0, n - 1, 2):
if A[i] >= A[i + 1]:
A[i], A[i + 1] = A[i + 1], A[i]
# Get the minimum.
Amin = float("inf")
for i in range(0, n, 2):
if A[i] < Amin:
Amin = A[i]
# Get the maximum.
Amax = float("-inf")
for i in range(1, n, 2):
if A[i] > Amax:
Amax = A[i]
return Amin, Amax
# The classic partition in Quicksort.
def partition(A, p, r):
j = p - 1
for i in range(p, r):
if A[i] <= A[r]:
j += 1
A[j], A[i] = A[i], A[j]
A[j + 1], A[r] = A[r], A[j + 1]
return j + 1
# Randomized partition in Quicksort.
def random_partition(A, p, r):
# Choosing the pivot randomly.
j = random.randint(p, r)
A[j], A[r] = A[r], A[j]
return partition(A, p, r)
"""
Select the ith largest element in array A recursively.
Input @para: list A, start index p and end index r, i means ith largest element.
Output: the ith largest element in array A[p,...,r] and its index.
"""
def select_recur(A, p, r, i):
# Partition list A randomly, then search ith element in which subarray of A.
q = random_partition(A, p, r)
# Transfer index q to kth.
k = q - p + 1
# Base case.
if k == i:
# Notice, return the value not index.
return A[q], q
# ith element must be in the left subarray.
if i < k:
return select_recur(A, p, q - 1, i)
# when i > k, ith element must be in the right subarray.
# Notice, when go to the right subarray, i need to do some computation.
else:
return select_recur(A, q + 1, r, i - k)
"""
Select the ith largest element in array A iteratively.
Input @para: list A, i means ith largest element.
Output: the ith largest element in array A[p,...,r]
"""
def select_iter(A, i):
n = len(A)
p = 0
r = n - 1
# Set a sentinel.
assert i <= n, "i must be less than or equal to n."
while True:
# Partition list A randomly, then search ith element in which subarray of A.
q = random_partition(A, p, r)
# Transfer index q to kth.
k = q - p + 1
if i == k:
return A[q]
elif i < k:
r = q - 1
else:
p = q + 1
i = i - k
"""
The kth quantiles of an n-element set are the k - 1 order statistics that
divide the sorted set into k equal-sized sets (to within 1).
Input @para: list A, p & r are indices of A that A[p,...,r], k means split the array into k segments.
Output @para: the k - 1 quantiles.
"""
def kquantiles(A, p, r, k):
# Set sentinel.
assert k > 1, "It's meaningless otherwise."
n = r - p + 1
# Base case 1: just return the median.
if k == 2:
ans = select_recur(A, p, r, math.ceil(n / 2))
return ans[0]
# Base case 2: return the three equal points.
if k == 3:
ans1 = select_recur(A, p, r, round(n / 3))
ans2 = select_recur(A, p, r, round(n / 3) * 2)
return [ans1[0], ans2[0]]
# When k is an even number.
if k % 2 == 0:
# We find the median and its index first.
q_value, q = select_recur(A, p, r, math.ceil(n / 2))
# Then we go to the subarray to find k / 2 quantiles recursively.
# k / 2 must be an integer as n is an even number.
t1 = kquantiles(A, p, q - 1, k / 2)
t2 = kquantiles(A, q + 1, r, k / 2)
return [t1, q_value, t2]
# When k is an odd number.
else:
# We find the middle two quantiles and their indices first.
pivot = k - 1 / 2
pivot = n / k
q1_value, q1 = select_recur(A, p, r, pivot * n / k)
q2_value, q2 = select_recur(A, p, r, (pivot + 1) * n / k)
# Then we go to the subarray to find k - 1 / 2 quantiles recursively.
# k - 1 / 2 must be an integer as n is an even number.
t1 = kquantiles(A, p, q1 - 1, (k - 1) / 2)
t2 = kquantiles(A, q2 + 1, r, (k - 1) / 2)
return [t1, q1_value, q2_value, t2]
"""
Find the closest k elements around median inclusively of array A.
Input @para: array A and parameter k.
Output: the closest k elements.
"""
def kclosest( A, k ):
# Find the median in A first.
n = len(A)
median, q = select_recur(A, 0, n - 1, math.ceil( n / 2)) # index q is useless here.
# Get the delta of each elements with median.
delta = [A[i] - median for i in range(n)]
# We find the first k minimum elements in delta.
q_left, index = select_recur(delta, 0, n - 1, max(math.ceil( n / 2 ) - k // 2, 1) ) # index is useless here.
q_right, index = select_recur(delta, 0, n - 1, min(math.ceil( n / 2 ) + k // 2, n - 1) ) # index is useless here.
# Search the first k minimum elements in A and return.
res = [ A[i] for i in range(n) if A[i] >= q_left + median and A[i] <= q_right + median ]
return res
"""
Get the median of array X and array y, X & Y are already sorted.
Input @para: the sorted array X[xp, ..., xr], the sorted array Y[yp, ..., yr]
Output: the median of X and Y.
"""
def get_median( X, xp, xr, Y, yp, yr ):
# Base case1: if the total length of X & Y == 2, we find the median of the two.
xn = xr - xp + 1
yn = yr - yp + 1
if xn + yn == 2:
return min( X[xp], Y[yp] )
# The index / position of median in X.
xq = (xp + xr) // 2
# The index / position of median in Y.
yq = (yp + yr) // 2
# Divide:
# We move to the right part of X & left part of Y.
if X[xq] < Y[yq]:
# When X & Y has even numbers of elements each,
# The X median is not inclusive, the Y median is inclusive.
# The idea is simple, the median of X must belong to the left part of the total median.
# Therefore, X median is not inclusive, since it never will be the possible total median.
# The Y median is inclusive because we use lower median.
if xn % 2 == 0:
return get_median( X, xq + 1, xr, Y, yp, yq )
# When X & Y has odd numbers of elements each,
# Both median in X & Y are inclusive.
else:
return get_median( X, xq, xr, Y, yp, yq )
# Y[yq] < X[xq]
# vise versa.
else:
if xn % 2 == 0:
return get_median( X, xp, xq, Y, yq + 1, yr )
else:
return get_median( X, xp, xq, Y, yq, yr )
"""
Compute the weighted median of array A.
Input @para: A[p,...,r], the original leftkey and rightkey are both 0.5 according to the definition.
Output: the weighted median of A.
"""
def weighted_median( A, p, r, leftkey, rightkey ):
# We find the median and its index first.
median, mindex = select_recur( A, p, r, math.ceil( (r - p + 1)/2 ) )
# Get the sum of left & right weight.
left = sum( A[p:mindex])
right = sum( A[mindex + 1:r + 1] )
# Base case: the median is the weighted median.
if left < leftkey and right <= rightkey:
return median
# Divide
# if the weighted median is in the right part.
# NOTICE: remember to update the left key in the recursive call.
if left < leftkey and right > rightkey:
return weighted_median(A, mindex + 1, r, leftkey - left - median, rightkey)
# if the weighted median is in the left part.
# NOTICE: remember to update the right key in the recursive call.
else:
return weighted_median(A, p, mindex - 1, leftkey, rightkey - right - median)
# Drive code
if __name__ == "__main__":
# Test data collection.
A0 = [2, 5, 3, 0, 2, 3, 0, 3]
A1 = [6, 0, 2, 0, 1, 3, 4, 6, 1, 3, 2]
A2 = [329, 457, 657, 839, 436, 720, 355]
A3 = [10000, 329, 9923, 457, 12, 657, 68, 839, 54921, 436, 2849, 720, 3, 355]
A5 = [4321, 399, 28, 5]
A6 = [5, 10, 7, 2, 3, 1, 4, 9, 8, 6]
A7 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
A8 = [2, 4, 6, 8, 10, 12]
A9 = [1, 3, 5, 7, 9, 11]
A10 = [0.1, 0.35, 0.05, 0.1, 0.15, 0.05, 0.2]
# Test for the simultaneous minimum and maximum algorithm in P214.
print("Test for the simultaneous minimum and maximum algorithm in P214: ", min_max(A3), '\n')
# Test for recursive select in P216.
print("Test for recursive select in P216: ", select_recur(A2[:], 0, len(A2) - 1, 5))
# Test for iterative select in 9.2-3 P219.
print("Test for iterative select in 9.2-3 P219: ", select_iter(A2[:], 5), '\n')
# Test for the k-quantiles in 9.3-6 P223.
print("Test for the k-quantiles in 9.3-6 P223: ", kquantiles(A7[:], 0, len(A7) - 1, 5), '\n')
# Test for the k closest elements to median in 9.3-7 P223.
print("Test for the k closest to median func in 9.3-7 P223: ", kclosest(A6[:], 4), '\n')
# Test for find the median in X & Y in 9.3-8 P223.
print("Test for find the median in X & Y in 9.3-8 P223: ", get_median( A8, 0, len(A8) - 1, A9, 0, len(A9) - 1 ), '\n')
# Test for the weighted median in problems 9-2 P225.
print("Test for the weighted median in problems 9-2 P225: ", weighted_median(A10, 0, len(A10) - 1, 0.5, 0.5), '\n')