for key in self.get_keys(simhash):
dups = self.bucket[key]
self.log.debug('key:%s', key)
if len(dups) > 200:
self.log.warning('Big bucket found. key:%s, len:%s', key, len(dups))
for dup in dups:
sim2, obj_id = dup.split(',', 1)
sim2 = Simhash(long(sim2, 16), self.f)
d = simhash.distance(sim2)
if d <= self.k:
ans.put((d, obj_id))
res = []
tmp = {}
while not ans.empty():
d, obj_id = ans.get()
if obj_id not in tmp:
res.append(str(obj_id))
tmp[obj_id] = 1`
I custom it by like below:
`
ans = PriorityQueue()