I recently was looking at a making predictions on a graph transaction and required maintaining a sorted collection to implement. After spining my wheels (wasting time and failing to build it myself), I google for a bit and found this amazing library called sortedcontainers that is pure python to boot.

If you didn't know about it previously, check it out. It will make your life easier in the future.

!conda install memory_profiler -n data-structures -y
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

!conda install sortedcontainers -n data-structures -y
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

%load_ext autoreload
%load_ext memory_profiler
%autoreload 2
from sortedcontainers import SortedList, SortedDict, SortedSet

Sorted List

l = ['e', 'a', 'c', 'd', 'b']
print(f"List({l})")
sl = SortedList(l)
print(f"{sl}")
sl *= 10_000_000
print(f"count of c: {sl.count('c'):,}")
del sl
List(['e', 'a', 'c', 'd', 'b'])
SortedList(['a', 'b', 'c', 'd', 'e'])
count of c: 10,000,000
%%timeit -n 1 -r 5
%memit
sl = SortedList(['e', 'a', 'c', 'd', 'b'])
sl *= 10_000_000
sl.count('c')
del sl
peak memory: 51.81 MiB, increment: 0.15 MiB
peak memory: 52.68 MiB, increment: 0.01 MiB
peak memory: 53.14 MiB, increment: -0.00 MiB
peak memory: 53.18 MiB, increment: 0.02 MiB
peak memory: 53.34 MiB, increment: 0.01 MiB
15.9 s ± 254 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)

Sorted Dict

d = {'c': 3, 'a': 1, 'b': 2}
print(f"Dict({d})")
sd = SortedDict(d)
print(f"{sd}")
print(f"pop last item: {sd.popitem(index=-1)}")
del sd
Dict({'c': 3, 'a': 1, 'b': 2})
SortedDict({'a': 1, 'b': 2, 'c': 3})
pop last item: ('c', 3)
%%timeit -n 1 -r 5
%memit
sd = SortedDict({'c': 3, 'a': 1, 'b': 2})
sd.popitem(index=-1)
del sd
peak memory: 53.62 MiB, increment: 0.00 MiB
peak memory: 53.62 MiB, increment: 0.00 MiB
peak memory: 53.62 MiB, increment: 0.00 MiB
peak memory: 53.63 MiB, increment: 0.00 MiB
peak memory: 53.63 MiB, increment: 0.00 MiB
7.84 s ± 248 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)

Sorted Set

s = set('abracadabra')
print(f"Set({s})")
ss = SortedSet(s)
print(f"{ss}")
print(f"index of c: {ss.bisect_left('c')}")
del ss
Set({'d', 'c', 'r', 'b', 'a'})
SortedSet(['a', 'b', 'c', 'd', 'r'])
index of c: 2
%%timeit -n 1 -r 5
%memit
ss = SortedSet('abracadabra')
ss.bisect_left('c')
del ss
peak memory: 53.68 MiB, increment: 0.00 MiB
peak memory: 53.68 MiB, increment: 0.00 MiB
peak memory: 53.68 MiB, increment: 0.00 MiB
peak memory: 53.68 MiB, increment: 0.00 MiB
peak memory: 53.68 MiB, increment: 0.00 MiB
7.73 s ± 200 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)