Skip to content
Snippets Groups Projects
Commit 93073fb0 authored by ph's avatar ph
Browse files

Refactoring, got rid of globals, objectified for simpler reuse

parent 85e2022e
No related branches found
No related tags found
No related merge requests found
bitop.py 0 → 100644
#!/usr/bin/python3
class BitOp(object):
def __init__(self, bits):
self.bits = bits
self.gen_bit_prefix()
self.full_mask = int("1" * self.bits, 2)
def gen_bit_prefix(self):
self.bit_prefix = []
self.bit_postfix = []
for i in range(0, self.bits+1): # we'll overshoot for bintree_search
self.bit_prefix.append(int("1"*(self.bits-i) + "0"*(i), 2))
self.bit_postfix.append(int("0"*(self.bits-i) + "1"*i, 2))
......@@ -7,7 +7,10 @@ class Morton(object):
self.dim_zeroed = dim - 1
self.bits = bits
self.bits_half = bits >> 1
self.gen_masks()
def gen_masks(self):
self.mask_to = {}
n = self.bits_half
while n > 0:
......@@ -16,8 +19,8 @@ class Morton(object):
self.mask_from = {}
n = 1
while n <= bits:
self.mask_from[n>>1] = int(("0" * self.dim_zeroed * n + "1" * n) * (bits//n), 2)
while n <= self.bits:
self.mask_from[n>>1] = int(("0" * self.dim_zeroed * n + "1" * n) * (self.bits//n), 2)
n = n << 1
......
......@@ -4,6 +4,7 @@ import time
from types import SimpleNamespace
from kyoto import database
from morton import Morton
from bitop import BitOp
from random import randrange
def gen_dim_data(n, morton, valbits=8):
......@@ -23,22 +24,31 @@ def gen_dim_index(n, db, morton, data):
bar.update(enum+1, enum+1)
enum += 1
bit_prefix = []
bit_postfix = []
full_mask = 0
def gen_bit_prefix(bits):
for i in range(0, bits+1): # we'll overshoot for bintree_search
bit_prefix.append(int("1"*(bits-i) + "0"*(i), 2))
bit_postfix.append(int("0"*(bits-i) + "1"*i, 2))
def naive_search(db, lo, hi, morton):
los = morton.decompose(lo)
his = morton.decompose(hi)
#l = list(db.get_all_keys())
#print(len(l))
for val in db.get_all_keys():
vals = morton.decompose(val)
class Searcher(object):
def __init__(self, db=None, morton=None, bitop=None):
self.db = db
self.morton = morton
self.bitop = bitop
def search(self, lo, hi):
raise NotImplementedError()
@property
def space(self):
raise NotImplementedError()
class NaiveSearcher(Searcher):
@property
def space(self):
return self.db.db.count()
def search(self, lo, hi):
los = self.morton.decompose(lo)
his = self.morton.decompose(hi)
for val in self.db.get_all_keys():
vals = self.morton.decompose(val)
match = True
for l, h, v in zip(los, his, vals):
if v < l or v > h:
......@@ -49,27 +59,34 @@ def naive_search(db, lo, hi, morton):
else:
yield None, 1
class BinTreeSearcher(Searcher):
State = SimpleNamespace # mutable makes things simpler (instead of NamedTuple)
RState = SimpleNamespace
Range = SimpleNamespace
def bintree_search(db, lo, hi, morton):
@property
def space(self):
return 2**(self.morton.bits * self.morton.dim)
def search(self, lo, hi):
#print(bin(lo))
#print(bin(hi))
bit_pos = morton.bits * morton.dim - 1
bit_pos = self.morton.bits * self.morton.dim - 1
mask = 1 << bit_pos
depth = 0
states = []
zero_bits = 0
one_bits = int("1" * morton.bits * morton.dim, 2)
one_bits = int("1" * self.morton.bits * self.morton.dim, 2)
state_bits = 0
dim = 0
states.append(
State(
self.State(
bit=-1, # -1 = not yet processed
rstate=list(
RState(lo_inside=False, hi_inside=False) for i in range(morton.dim)
self.RState(lo_inside=False, hi_inside=False) for i in range(self.morton.dim)
)
)
)
......@@ -84,10 +101,10 @@ def bintree_search(db, lo, hi, morton):
if states[depth].bit > 1:
states.pop()
bit_pos += 1
state_bits &= bit_prefix[bit_pos]
state_bits &= self.bitop.bit_prefix[bit_pos]
depth -= 1
mask <<= 1
dim = (dim + morton.dim - 1) % morton.dim
dim = (dim + self.morton.dim - 1) % self.morton.dim
else:
state_bits |= states[depth].bit<<bit_pos
# here goes bound checking
......@@ -117,8 +134,8 @@ def bintree_search(db, lo, hi, morton):
# now for db
lower = state_bits
upper = state_bits | bit_postfix[bit_pos]
res = db.get_range_key(lower, upper)
upper = state_bits | self.bitop.bit_postfix[bit_pos]
res = self.db.get_range_key(lower, upper)
if res is None:
#print("nothing found")
yield None, 2**bit_pos
......@@ -131,44 +148,53 @@ def bintree_search(db, lo, hi, morton):
yield [res], 1
else:
new_rstate = states[depth].rstate[:]
new_rstate[dim] = RState(lo_inside=lo_inside, hi_inside=hi_inside)
new_rstate[dim] = self.RState(lo_inside=lo_inside, hi_inside=hi_inside)
if (all((r.lo_inside for r in new_rstate)) and all((r.hi_inside for r in new_rstate))):
yield db.get_range_keys(lower, upper), 2**bit_pos
yield self.db.get_range_keys(lower, upper), 2**bit_pos
else:
states.append(State(bit=-1, rstate=new_rstate)) # -1 = not yet processed
states.append(self.State(bit=-1, rstate=new_rstate)) # -1 = not yet processed
#print("descent")
bit_pos -= 1
depth += 1
mask >>= 1
dim = (dim + 1) % morton.dim
dim = (dim + 1) % self.morton.dim
def clip_up(bit_pos, val, morton):
mask = (morton.mask_from[0] >> (morton.dim * morton.bits - bit_pos)) & bit_postfix[bit_pos]
bit_zero = (1 << bit_pos) ^ full_mask
class SkipSearcher(Searcher):
@property
def space(self):
return 2**(self.morton.bits * self.morton.dim)
def clip_up(self, bit_pos, val):
mask = (self.morton.mask_from[0] >> (self.morton.dim * self.morton.bits - bit_pos)) & self.bitop.bit_postfix[bit_pos]
bit_zero = (1 << bit_pos) ^ self.bitop.full_mask
return val | mask & bit_zero
def clip_down(bit_pos, val, morton):
mask = (morton.mask_from[0] >> (morton.dim * morton.bits - bit_pos)) ^ full_mask
def clip_down(self, bit_pos, val):
mask = (self.morton.mask_from[0] >> (self.morton.dim * self.morton.bits - bit_pos)) ^ self.bitop.full_mask
bit_one = 1 << bit_pos
return val & mask | bit_one
def get_closest_alignment(val, lo, hi, morton):
def get_closest_alignment(self, val, lo, hi):
# lo > hi must not happen
align = None
for bit_pos in reversed(range(morton.dim * morton.bits)):
for bit_pos in reversed(range(self.morton.dim * self.morton.bits)):
mask = 1 << bit_pos
val_bit = val & mask
lo_bit = lo & mask
hi_bit = hi & mask
if not val_bit:
if lo_bit < hi_bit: # boundaries 0..1
# at this bit the closest boundary for this dimension is next half of the space
# but maybe closer will be found at less significant bits
align = clip_down(bit_pos, lo, morton)
align = self.clip_down(bit_pos, lo)
# next bits must not leave this quadrant, so clip them to
# top border
hi = clip_up(bit_pos, hi, morton)
hi = self.clip_up(bit_pos, hi)
elif lo_bit: # both boundaries 1
# val is strictly lower, so clip to the lower bound (which could have
# gotten adjusted by higher bits
......@@ -180,7 +206,7 @@ def get_closest_alignment(val, lo, hi, morton):
else:
if lo_bit < hi_bit: # boundaries 0..1
# We have left the top part of Z, clip search to the bottom half of the space
lo = clip_down(bit_pos, lo, morton)
lo = self.clip_down(bit_pos, lo)
elif not lo_bit: # both boundaries 0
# val is strictly higher, end of search
break
......@@ -189,17 +215,17 @@ def get_closest_alignment(val, lo, hi, morton):
continue
return align
def skip_search(db, lo, hi, morton):
los = morton.decompose(lo)
his = morton.decompose(hi)
#l = list(db.get_all_keys())
#print(len(l))
class sentinel(object): pass
it = db.get_all_keys()
class Sentinel(object):
pass
def search(self, lo, hi):
los = self.morton.decompose(lo)
his = self.morton.decompose(hi)
it = self.db.get_all_keys()
oldval = 0
val = next(it, sentinel)
while val is not sentinel:
vals = morton.decompose(val)
val = next(it, self.Sentinel)
while val is not self.Sentinel:
vals = self.morton.decompose(val)
match = True
for l, h, v in zip(los, his, vals):
if v < l or v > h:
......@@ -209,16 +235,16 @@ def skip_search(db, lo, hi, morton):
yield [val], val - oldval + 1
oldval = val
else:
align = get_closest_alignment(val, lo, hi, morton)
align = self.get_closest_alignment(val, lo, hi)
if align is None:
break
else:
yield None, align - oldval
oldval = align
#print(bin(align))
db.jump(align)
val = next(it, sentinel)
yield None, full_mask - oldval
self.db.jump(align)
val = next(it, self.Sentinel)
yield None, self.bitop.full_mask - oldval
class pbar(object):
......@@ -273,8 +299,7 @@ if __name__=="__main__":
valbits = 8
dims = 6
morton = Morton(dims, keybits)
gen_bit_prefix(keybits*dims)
full_mask = int("1" * keybits * dims, 2)
bitop = BitOp(dims * keybits)
#lo = morton.compose((0, 0))
#hi = morton.compose((128, 128))
lo = morton.compose((0, 0, 0, 0, 0, 0))
......@@ -284,8 +309,8 @@ if __name__=="__main__":
#hi = morton.compose((2**keybits-1,)*dims) # full range bits 128
#hi = morton.compose((303157124495624664821668372333924974592,)*dims) # 1/2 bits 128
#hi = morton.compose((270082290000000000000000000000000000000,)*dims) # 1/4 bits 128
hi = morton.compose((231831680000000000000000000000000000000,)*dims) # 1/10 bits 128
#hi = morton.compose((157945080000000000000000000000000000000,)*dims) # 1/100 bits 128
#hi = morton.compose((231831680000000000000000000000000000000,)*dims) # 1/10 bits 128
hi = morton.compose((157945080000000000000000000000000000000,)*dims) # 1/100 bits 128
#hi = morton.compose((2**keybits-1,)*dims) # 100% bits 128
db = database("morton", (keybits * dims + 7) // 8, (valbits + 7) // 8)
#gen_len = 100000
......@@ -297,13 +322,17 @@ if __name__=="__main__":
#db.dump()
print("DB.count", db.db.count())
print("Naive search")
results = naive_search(db, lo, hi, morton)
res_list = []
for stype in NaiveSearcher, BinTreeSearcher, SkipSearcher:
# for stype in BinTreeSearcher, SkipSearcher:
# for stype in SkipSearcher,:
print(stype.__name__)
s = stype(db, morton, bitop)
results = s.search(lo, hi)
num = 0
proc = 0
space = db.db.count()
plain_res = []
bar = pbar(space)
bar = pbar(s.space)
for res, processed in results:
proc += processed
if res is not None:
......@@ -313,41 +342,8 @@ if __name__=="__main__":
# deco = morton.decompose(r)
# print(deco)
print()
res_list.append(plain_res)
print("Bintree search")
results = bintree_search(db, lo, hi, morton)
num = 0
proc = 0
space = 2**(keybits*dims)
plain_res_2 = []
bar = pbar(space)
for res, processed in results:
proc += processed
if res is not None:
plain_res_2.extend(list(res))
bar.update(proc, len(plain_res_2))
#for r in plain_res:
# deco = morton.decompose(r)
# print(deco)
print()
assert set(plain_res) == set(plain_res_2)
print("Skip search")
results = skip_search(db, lo, hi, morton)
num = 0
proc = 0
space = 2**(keybits*dims)
plain_res_3 = []
bar = pbar(space)
for res, processed in results:
proc += processed
if res is not None:
plain_res_3.extend(list(res))
bar.update(proc, len(plain_res_3))
#for r in plain_res_3:
# deco = morton.decompose(r)
# print(deco)
print()
assert set(plain_res) == set(plain_res_3)
assert res_list.count(res_list[0]) == len(res_list)
#db.drop()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment