pytries · EliFinkelshteyn · Apr 13, 2015 · Apr 13, 2015 · Apr 14, 2015 · Apr 14, 2015
diff --git a/dawg_python/dawgs.py b/dawg_python/dawgs.py
@@ -141,6 +141,39 @@ def keys(self, prefix=""):
 
         return res
 
+    def edges(self, prefix=""):
+        b_prefix = prefix.encode('utf8')
+        res = []
+
+        index = self.dct.follow_bytes(b_prefix, self.dct.ROOT)
+        if index is None:
+            return res
+
+        edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
+        if not edge_follower.start(index, b_prefix):
+            return res
+
+        res.append(edge_follower.get_cur_edge())
+        while edge_follower.next():
+            res.append(edge_follower.get_cur_edge())
+
+        return res
+
+    def iteredges(self, prefix=""):
+        b_prefix = prefix.encode('utf8')
+
+        index = self.dct.follow_bytes(b_prefix, self.dct.ROOT)
+        if index is None:
+            return
+
+        edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
+        if not edge_follower.start(index, b_prefix):
+            return
+
+        yield edge_follower.get_cur_edge()
+        while edge_follower.next():
+            yield edge_follower.get_cur_edge()
+
     def iterkeys(self, prefix=""):
         b_prefix = prefix.encode('utf8')
         index = self.dct.follow_bytes(b_prefix, self.dct.ROOT)
@@ -279,15 +312,14 @@ def iterkeys(self, prefix=""):
             yield u_key
 
     def items(self, prefix=""):
+        index = self.dct.ROOT
         if not isinstance(prefix, bytes):
             prefix = prefix.encode('utf8')
-        res = []
-
-        index = self.dct.ROOT
         if prefix:
             index = self.dct.follow_bytes(prefix, index)
             if not index:
-                return res
+                return []
+        res = []
 
         completer = wrapper.Completer(self.dct, self.guide)
         completer.start(index, prefix)
@@ -301,10 +333,9 @@ def items(self, prefix=""):
         return res
 
     def iteritems(self, prefix=""):
+        index = self.dct.ROOT
         if not isinstance(prefix, bytes):
             prefix = prefix.encode('utf8')
-
-        index = self.dct.ROOT
         if prefix:
             index = self.dct.follow_bytes(prefix, index)
             if not index:
@@ -315,9 +346,52 @@ def iteritems(self, prefix=""):
 
         while completer.next():
             key, value = completer.key.split(self._payload_separator)
-            item = (key.decode('utf8'), a2b_base64(bytes(value))) # bytes() cast is a python 2.6 fix
+            # bytes() cast is a python 2.6 fix
+            item = (key.decode('utf8'), a2b_base64(bytes(value)))
             yield item
 
+    def edges(self, prefix=""):
+        index = self.dct.ROOT
+        if not isinstance(prefix, bytes):
+            prefix = prefix.encode('utf8')
+        if prefix:
+            index = self.dct.follow_bytes(prefix, index)
+            if not index:
+                return
+        res = []
+
+        edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
+        if not edge_follower.start(index, prefix):
+            return res
+
+        vals = self.b_get_value(bytes(edge_follower.key)) or [False]
+        res.extend([(edge_follower.decoded_key, val) for val in vals])
+        while edge_follower.next():
+            vals = self.b_get_value(bytes(edge_follower.key)) or [False]
+            res.extend([(edge_follower.decoded_key, val) for val in vals])
+
+        return res
+
+    def iteredges(self, prefix=""):
+        index = self.dct.ROOT
+        if not isinstance(prefix, bytes):
+            prefix = prefix.encode('utf8')
+        if prefix:
+            index = self.dct.follow_bytes(prefix, index)
+            if not index:
+                return
+
+        edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
+        if not edge_follower.start(index, prefix):
+            return
+
+        vals = self.b_get_value(bytes(edge_follower.key)) or [False]
+        for val in vals:
+            yield (edge_follower.decoded_key, val or False)
+        while edge_follower.next():
+            vals = self.b_get_value(bytes(edge_follower.key)) or [False]
+            for val in vals:
+                yield (edge_follower.decoded_key, val or False)
 
     def _has_value(self, index):
         return self.dct.follow_bytes(PAYLOAD_SEPARATOR, index)
@@ -464,6 +538,43 @@ class IntCompletionDAWG(CompletionDAWG, IntDAWG):
     Dict-like class based on DAWG.
     It can store integer values for unicode keys and support key completion.
     """
+    def edges(self, prefix=""):
+        index = self.dct.ROOT
+        if not isinstance(prefix, bytes):
+            prefix = prefix.encode('utf8')
+        if prefix:
+            index = self.dct.follow_bytes(prefix, index)
+            if not index:
+                return
+        res = []
+
+        edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
+        if not edge_follower.start(index, prefix):
+            return res
+
+        res.append((edge_follower.decoded_key, edge_follower.value()))
+        while edge_follower.next():
+            res.append((edge_follower.decoded_key, edge_follower.value()))
+
+        return res
+
+    def iteredges(self, prefix=""):
+        index = self.dct.ROOT
+        if not isinstance(prefix, bytes):
+            prefix = prefix.encode('utf8')
+        if prefix:
+            index = self.dct.follow_bytes(prefix, index)
+            if not index:
+                return
+
+        edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
+        if not edge_follower.start(index, prefix):
+            return
+
+        yield (edge_follower.decoded_key, edge_follower.value())
+        while edge_follower.next():
+            yield (edge_follower.decoded_key, edge_follower.value())
+
     def items(self, prefix=""):
         if not isinstance(prefix, bytes):
             prefix = prefix.encode('utf8')

diff --git a/dawg_python/wrapper.py b/dawg_python/wrapper.py
@@ -17,29 +17,29 @@ def __init__(self):
     "Root index"
 
     def has_value(self, index):
-        "Checks if a given index is related to the end of a key."
+        #Checks if a given index is related to the end of a key.
         return units.has_leaf(self._units[index])
 
     def value(self, index):
-        "Gets a value from a given index."
+        #Gets a value from a given index.
         offset = units.offset(self._units[index])
         value_index = (index ^ offset) & units.PRECISION_MASK
         return units.value(self._units[value_index])
 
     def read(self, fp):
-        "Reads a dictionary from an input stream."
+        #Reads a dictionary from an input stream.
         base_size = struct.unpack(str("=I"), fp.read(4))[0]
         self._units.fromfile(fp, base_size)
 
     def contains(self, key):
-        "Exact matching."
+        #Exact matching.
         index = self.follow_bytes(key, self.ROOT)
         if index is None:
             return False
         return self.has_value(index)
 
     def find(self, key):
-        "Exact matching (returns value)"
+        #Exact matching (returns value)
         index = self.follow_bytes(key, self.ROOT)
         if index is None:
             return -1
@@ -48,7 +48,7 @@ def find(self, key):
         return self.value(index)
 
     def follow_char(self, label, index):
-        "Follows a transition"
+        #Follows a transition
         offset = units.offset(self._units[index])
         next_index = (index ^ offset ^ label) & units.PRECISION_MASK
 
@@ -58,7 +58,7 @@ def follow_char(self, label, index):
         return next_index
 
     def follow_bytes(self, s, index):
-        "Follows transitions."
+        #Follows transitions.
         for ch in s:
             index = self.follow_char(int_from_byte(ch), index)
             if index is None:
@@ -95,16 +95,96 @@ def size(self):
         return len(self._units)
 
 
+class EdgeFollower(object):
+    def __init__(self, dic=None, guide=None):
+        self._dic = dic
+        self._guide = guide
+
+    def value(self):
+        "provides list of values at current index"
+
+        if self._dic.has_value(self._cur_index):
+            return self._dic.value(self._cur_index)
+        return False
+
+    def start(self, index, prefix=b""):
+        """initial setup for the next() action on some prefix. If there's a
+        child for this prefix, we add that as the one item on the index_stack.
+        Otherwise, leave the stack empty, so next() fails"""
+
+        self.key = bytearray(prefix)
+        self.base_key_len = len(self.key)
+        self._parent_index = index
+        self._sib_index = None
+        self._cur_index = None
+        if self._guide.size():
+            child_label = self._guide.child(index) # UCharType
+
+            if child_label:
+                # Follows a transition to the first child.
+                next_index = self._dic.follow_char(child_label, index)
+                if index is not None:
+                    self._sib_index = next_index
+                    self._cur_index = self._sib_index
+                    self.key.append(child_label)
+                    self.decoded_key = self.key.decode('utf8')
+                    return True
+
+    def next(self):
+        "Gets the next edge (not necessarily a terminal)"
+
+        if not self._sib_index:
+            return False
+
+        sibling_label = self._guide.sibling(self._sib_index)
+        self._sib_index = self._dic.follow_char(sibling_label,
+                                                self._parent_index)
+        self._cur_index = self._sib_index
+        if not self._sib_index:
+            return False
+
+        self.key = self.key[:self.base_key_len]
+        self.key.append(sibling_label)
+        try:
+            self.decoded_key = self.key.decode('utf8')
+        except UnicodeDecodeError:
+            #this sibling is a multibyte char. keep following its children til
+            #something is decodable
+            while True:
+                child_label = self._guide.child(self._sib_index)
+                self._cur_index = self._dic.follow_char(child_label,
+                                                        self._cur_index)
+                if not self._cur_index:
+                    return False
+                self.key.append(child_label)
+                try:
+                    self.decoded_key = self.key.decode('utf8')
+                    break
+                except UnicodeDecodeError:
+                    pass
+        return True
+
+    def get_cur_edge(self):
+        """helper method for getting the decoded key along with whether or not
+        it is a terminal"""
+
+        return (self.decoded_key, self._dic.has_value(self._cur_index))
+
+
 class Completer(object):
 
     def __init__(self, dic=None, guide=None):
         self._dic = dic
         self._guide = guide
 
     def value(self):
+        "provides list of values at current index"
+
         return self._dic.value(self._last_index)
 
     def start(self, index, prefix=b""):
+        "initial setup for a completer next() action on some prefix"
+
         self.key = bytearray(prefix)
 
         if self._guide.size():
@@ -113,7 +193,6 @@ def start(self, index, prefix=b""):
         else:
             self._index_stack = []
 
-
     def next(self):
         "Gets the next key"
 
@@ -153,7 +232,6 @@ def next(self):
 
         return self._find_terminal(index)
 
-
     def _follow(self, label, index):
         next_index = self._dic.follow_char(label, index)
         if next_index is None:

diff --git a/dev_data/small/int_completion_dawg.dawg b/dev_data/small/int_completion_dawg.dawg
diff --git a/dev_data/small/int_dawg.dawg b/dev_data/small/int_dawg.dawg
diff --git a/tests/test_dawg.py b/tests/test_dawg.py
@@ -46,10 +46,22 @@ def test_keys(self):
         d = self.dawg()
         assert d.keys() == sorted(self.keys)
 
+    def test_edges(self):
+        d = self.dawg()
+        assert d.edges() == [('b', False), ('f', True)]
+        assert d.edges('b') == [('ba', False)]
+        assert d.edges('fo') == [('foo', True)]
+
     def test_iterkeys(self):
         d = self.dawg()
         assert list(d.iterkeys()) == d.keys()
 
+    def test_iter_edges(self):
+        d = self.dawg()
+        assert list(d.iteredges()) == [('b', False), ('f', True)]
+        assert list(d.iteredges('b')) == [('ba', False)]
+        assert list(d.edges('fo')) == [('foo', True)]
+
     def test_completion(self):
         d = self.dawg()
 
@@ -79,7 +91,7 @@ def test_prefixes(self):
 
 
 class TestIntDAWG(object):
-    payload = {'foo': 1, 'bar': 5, 'foobar': 3}
+    payload = {'foo': 1, 'bar': 5, 'foobar': 30}
 
     def dawg(self):
         return dawg_python.IntDAWG().load(data_path('small', 'int_dawg.dawg'))
@@ -119,3 +131,13 @@ def test_completion_keys_with_prefix(self):
 
     def test_completion_items(self):
         assert self.dawg().items() == sorted(self.payload.items(), key=lambda r: r[0])
+
+    def test_completion_edges(self):
+        assert self.dawg().edges('ba') == [('bar', 5)]
+        assert self.dawg().edges('foob') == [('fooba', False)]
+        assert self.dawg().edges('fooba') == [('foobar', 30)]
+
+    def test_completion_iteredges(self):
+        assert list(self.dawg().iteredges('ba')) == [('bar', 5)]
+        assert list(self.dawg().iteredges('foob')) == [('fooba', False)]
+        assert list(self.dawg().iteredges('fooba')) == [('foobar', 30)]