Skip to content

Commit fde34e9

Browse files
Merge pull request #131 from Theodus/master
Dtrie
2 parents ee781de + fcad795 commit fde34e9

File tree

5 files changed

+631
-1
lines changed

5 files changed

+631
-1
lines changed

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,14 @@ Most concurrent data structures do not support snapshots, instead opting for
156156
locks or requiring a quiescent state. This allows Ctries to have O(1) iterator
157157
creation and clear operations and O(logn) size retrieval.
158158

159+
#### Dtrie
160+
161+
A persistent hash trie that dynamically expands or shrinks to provide efficient
162+
memory allocation. Being persistent, the Dtrie is immutable and any modification
163+
yields a new version of the Dtrie rather than changing the original. Bitmapped
164+
nodes allow for O(log32(n)) get, remove, and update operations. Insertions are
165+
O(n) and iteration is O(1).
166+
159167
#### Persistent List
160168

161169
A persistent, immutable linked list. All write operations yield a new, updated
@@ -204,4 +212,3 @@ Requirements to commit here:
204212

205213
- Dustin Hiatt <[dustin.hiatt@workiva.com](mailto:dustin.hiatt@workiva.com)>
206214
- Alexander Campbell <[alexander.campbell@workiva.com](mailto:alexander.campbell@workiva.com)>
207-

trie/dtrie/dtrie.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
Copyright (c) 2016, Theodore Butler
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are met:
7+
8+
* Redistributions of source code must retain the above copyright notice, this
9+
list of conditions and the following disclaimer.
10+
11+
* Redistributions in binary form must reproduce the above copyright notice,
12+
this list of conditions and the following disclaimer in the documentation
13+
and/or other materials provided with the distribution.
14+
15+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
*/
26+
27+
// Package dtrie provides an implementation of the dtrie data structure, which
28+
// is a persistent hash trie that dynamically expands or shrinks to provide
29+
// efficient memory allocation. This data structure is based on the papers
30+
// Ideal Hash Trees by Phil Bagwell and Optimizing Hash-Array Mapped Tries for
31+
// Fast and Lean Immutable JVM Collections by Michael J. Steindorfer and
32+
// Jurgen J. Vinju
33+
package dtrie
34+
35+
// Dtrie is a persistent hash trie that dynamically expands or shrinks
36+
// to provide efficient memory allocation.
37+
type Dtrie struct {
38+
root *node
39+
hasher func(v interface{}) uint32
40+
}
41+
42+
// New creates an empty DTrie with the given hashing function.
43+
// If nil is passed in, the default hashing function will be used.
44+
func New(hasher func(v interface{}) uint32) *Dtrie {
45+
if hasher == nil {
46+
hasher = defaultHasher
47+
}
48+
return &Dtrie{
49+
root: emptyNode(0, 32),
50+
hasher: hasher,
51+
}
52+
}
53+
54+
// Size returns the number of entries in the Dtrie.
55+
func (d *Dtrie) Size() (size int) {
56+
for _ = range iterate(d.root, nil) {
57+
size++
58+
}
59+
return size
60+
}
61+
62+
// Get returns the Entry for the associated key or returns nil if the
63+
// key does not exist.
64+
func (d *Dtrie) Get(key interface{}) Entry {
65+
return get(d.root, d.hasher(key), key)
66+
}
67+
68+
// Insert adds an entry to the Dtrie, replacing the existing value if
69+
// the key already exists and returns the resulting Dtrie.
70+
func (d *Dtrie) Insert(entry Entry) *Dtrie {
71+
root := insert(d.root, entry)
72+
return &Dtrie{root, d.hasher}
73+
}
74+
75+
// Remove deletes the value for the associated key if it exists and returns
76+
// the resulting Dtrie.
77+
func (d *Dtrie) Remove(key interface{}) *Dtrie {
78+
root := remove(d.root, d.hasher(key), key)
79+
return &Dtrie{root, d.hasher}
80+
}
81+
82+
// Iterator returns a read-only channel of Entries from the Dtrie. If a stop
83+
// channel is provided, closing it will terminate and close the iterator
84+
// channel. Note that if a cancel channel is not used and not every entry is
85+
// read from the iterator, a goroutine will leak.
86+
func (d *Dtrie) Iterator(stop <-chan struct{}) <-chan Entry {
87+
return iterate(d.root, stop)
88+
}

trie/dtrie/dtrie_test.go

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
/*
2+
Copyright (c) 2016, Theodore Butler
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are met:
7+
8+
* Redistributions of source code must retain the above copyright notice, this
9+
list of conditions and the following disclaimer.
10+
11+
* Redistributions in binary form must reproduce the above copyright notice,
12+
this list of conditions and the following disclaimer in the documentation
13+
and/or other materials provided with the distribution.
14+
15+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
*/
26+
27+
package dtrie
28+
29+
import (
30+
"fmt"
31+
"sync/atomic"
32+
"testing"
33+
34+
"github.com/stretchr/testify/assert"
35+
)
36+
37+
func TestPopCount(t *testing.T) {
38+
b := []uint32{
39+
uint32(0x55555555), // 0x55555555 = 01010101 01010101 01010101 01010101
40+
uint32(0x33333333), // 0x33333333 = 00110011 00110011 00110011 00110011
41+
uint32(0x0F0F0F0F), // 0x0F0F0F0F = 00001111 00001111 00001111 00001111
42+
uint32(0x00FF00FF), // 0x00FF00FF = 00000000 11111111 00000000 11111111
43+
uint32(0x0000FFFF), // 0x0000FFFF = 00000000 00000000 11111111 11111111
44+
}
45+
for _, x := range b {
46+
assert.Equal(t, 16, popCount(x))
47+
}
48+
}
49+
50+
func TestDefaultHasher(t *testing.T) {
51+
assert.Equal(t,
52+
defaultHasher(map[int]string{11234: "foo"}),
53+
defaultHasher(map[int]string{11234: "foo"}))
54+
assert.NotEqual(t, defaultHasher("foo"), defaultHasher("bar"))
55+
}
56+
57+
type testEntry struct {
58+
hash uint32
59+
key int
60+
value int
61+
}
62+
63+
func (e *testEntry) KeyHash() uint32 {
64+
return e.hash
65+
}
66+
67+
func (e *testEntry) Key() interface{} {
68+
return e.key
69+
}
70+
71+
func (e *testEntry) Value() interface{} {
72+
return e.value
73+
}
74+
75+
func (e *testEntry) String() string {
76+
return fmt.Sprint(e.value)
77+
}
78+
79+
func collisionHash(key interface{}) uint32 {
80+
return uint32(0xffffffff) // for testing collisions
81+
}
82+
83+
func TestInsert(t *testing.T) {
84+
insertTest(t, defaultHasher, 10000)
85+
insertTest(t, collisionHash, 1000)
86+
}
87+
88+
func insertTest(t *testing.T, hashfunc func(interface{}) uint32, count int) *node {
89+
n := emptyNode(0, 32)
90+
for i := 0; i < count; i++ {
91+
n = insert(n, &testEntry{hashfunc(i), i, i})
92+
}
93+
return n
94+
}
95+
96+
func TestGet(t *testing.T) {
97+
getTest(t, defaultHasher, 10000)
98+
getTest(t, collisionHash, 1000)
99+
}
100+
101+
func getTest(t *testing.T, hashfunc func(interface{}) uint32, count int) {
102+
n := insertTest(t, hashfunc, count)
103+
for i := 0; i < count; i++ {
104+
x := get(n, hashfunc(i), i)
105+
assert.Equal(t, i, x.Value())
106+
}
107+
}
108+
109+
func TestRemove(t *testing.T) {
110+
removeTest(t, defaultHasher, 10000)
111+
removeTest(t, collisionHash, 1000)
112+
}
113+
114+
func removeTest(t *testing.T, hashfunc func(interface{}) uint32, count int) {
115+
n := insertTest(t, hashfunc, count)
116+
for i := 0; i < count; i++ {
117+
n = remove(n, hashfunc(i), i)
118+
}
119+
for _, e := range n.entries {
120+
if e != nil {
121+
t.Fatal("final node is not empty")
122+
}
123+
}
124+
}
125+
126+
func TestUpdate(t *testing.T) {
127+
updateTest(t, defaultHasher, 10000)
128+
updateTest(t, collisionHash, 1000)
129+
}
130+
131+
func updateTest(t *testing.T, hashfunc func(interface{}) uint32, count int) {
132+
n := insertTest(t, hashfunc, count)
133+
for i := 0; i < count; i++ {
134+
n = insert(n, &testEntry{hashfunc(i), i, -i})
135+
}
136+
}
137+
138+
func TestIterate(t *testing.T) {
139+
n := insertTest(t, defaultHasher, 10000)
140+
echan := iterate(n, nil)
141+
var c int64
142+
for _ = range echan {
143+
c++
144+
}
145+
assert.Equal(t, int64(10000), c)
146+
// test with stop chan
147+
c = 0
148+
stop := make(chan struct{})
149+
echan = iterate(n, stop)
150+
go func() {
151+
for _ = range echan {
152+
atomic.AddInt64(&c, 1)
153+
}
154+
}()
155+
for atomic.LoadInt64(&c) < 100 {
156+
}
157+
close(stop)
158+
cf := atomic.LoadInt64(&c)
159+
assert.True(t, cf > 99 && cf < 1000)
160+
// test with collisions
161+
n = insertTest(t, collisionHash, 1000)
162+
atomic.StoreInt64(&c, 0)
163+
echan = iterate(n, nil)
164+
for _ = range echan {
165+
atomic.AddInt64(&c, 1)
166+
}
167+
assert.Equal(t, int64(1000), atomic.LoadInt64(&c))
168+
}
169+
170+
func TestSize(t *testing.T) {
171+
n := insertTest(t, defaultHasher, 10000)
172+
d := &Dtrie{n, defaultHasher}
173+
assert.Equal(t, 10000, d.Size())
174+
}
175+
176+
func BenchmarkInsert(b *testing.B) {
177+
b.ReportAllocs()
178+
n := emptyNode(0, 32)
179+
b.ResetTimer()
180+
for i := b.N; i > 0; i-- {
181+
n = insert(n, &testEntry{defaultHasher(i), i, i})
182+
}
183+
}
184+
185+
func BenchmarkGet(b *testing.B) {
186+
b.ReportAllocs()
187+
n := insertTest(nil, defaultHasher, b.N)
188+
b.ResetTimer()
189+
for i := b.N; i > 0; i-- {
190+
get(n, defaultHasher(i), i)
191+
}
192+
}
193+
194+
func BenchmarkRemove(b *testing.B) {
195+
b.ReportAllocs()
196+
n := insertTest(nil, defaultHasher, b.N)
197+
b.ResetTimer()
198+
for i := b.N; i > 0; i-- {
199+
n = remove(n, defaultHasher(i), i)
200+
}
201+
}
202+
203+
func BenchmarkUpdate(b *testing.B) {
204+
b.ReportAllocs()
205+
n := insertTest(nil, defaultHasher, b.N)
206+
b.ResetTimer()
207+
for i := b.N; i > 0; i-- {
208+
n = insert(n, &testEntry{defaultHasher(i), i, -i})
209+
}
210+
}

0 commit comments

Comments
 (0)