Improve docstrings and naming. - stoichiograph - Spell words with elemental symbols from the periodic table.

commit 458d0c1e83aa3e30ad154b8d174e942333b151b1
parent 0890827b23182eda96881b5e2401c9650fb9d2cd
Author: amin <dev@aminmesbah.com>
Date:   Sat, 25 Feb 2017 02:06:11 +0000

Improve docstrings and naming.

FossilOrigin-Name: d7ddc98a2d8aa8802d5080980fd44f3fe35c59709b9fe6eff4c89fa82ef7b0f3
Diffstat:
M speller.py  | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------

1 file changed, 77 insertions(+), 45 deletions(-)
diff --git a/speller.py b/speller.py
@@ -1,5 +1,4 @@
 from collections import defaultdict, namedtuple
-from functools import lru_cache
 import logging
 
 # TODO(amin): Convert symbol tuple to element name or atomic number tuple
@@ -25,8 +24,8 @@ ELEMENTS = {
 # TODO(amin): Support appostrophies
 # TODO(amin): Add option to require no repeated symbols
 def spell(word, symbols=ELEMENTS):
-    """Return a list of any possible ways to spell a word
-    with a given set of symbols.
+    """Return a list of any possible ways to spell a word with a given
+    set of symbols.
 
     Example:
     >>> spell('amputation')
@@ -36,7 +35,7 @@ def spell(word, symbols=ELEMENTS):
 
     log.debug('Using graph speller')
     g = Graph()
-    build_graph(word, g)
+    build_spelling_graph(word, g)
 
     spellings = list()
     for first in g.firsts():
@@ -54,13 +53,9 @@ def spell(word, symbols=ELEMENTS):
     return elemental_spellings
 
 
-# A single node of the graph.
-Node = namedtuple('Node', ['value', 'position'])
-
-
 class Graph():
-    """A directed acyclic graph that stores all possible
-    elemental spellings of a word.
+    """A directed acyclic graph that stores all possible elemental
+    spellings of a word.
     """
     def __init__(self):
         self._parents_of = defaultdict(set)
@@ -75,8 +70,8 @@ class Graph():
         return self._parents_of[None]
 
     def add_edge(self, parent, child):
-        """Add a parent-child relatonship to the graph.
-        None is ok as a key, but not a value.
+        """Add a parent-child relatonship to the graph. None is ok as a
+        key, but not a value.
         """
         if parent is not None:
             self._parents_of[child].add(parent)
@@ -92,8 +87,7 @@ class Graph():
         ]
 
     def export(self):
-        """Print a string to stdout that can be interpreted by
-        Graphviz.
+        """Print a string to stdout that can be interpreted by Graphviz.
         """
         print('digraph G {')
         for (parent, child) in self.edges():
@@ -103,53 +97,91 @@ class Graph():
         print('}')
 
 
-def find_all_paths(graph, start, end, path=[]):
-    """Return a list of all paths through the graph from start
-    to end.
-    Based on https://www.python.org/doc/essays/graphs/
-    """
-    path = path + [start]
-    if start == end:
-        return [path]
-    if start not in graph.keys():
-        return []
-    paths = []
-    for node in graph[start]:
-        if node not in path:
-            newpaths = find_all_paths(graph, node, end, path)
-            for newpath in newpaths:
-                paths.append(tuple(newpath))
-    return paths
+# A single node of the graph.
+Node = namedtuple('Node', ['value', 'position'])
 
 
-def build_graph(word, graph, symbols=ELEMENTS):
-    """Given a word and a graph, recursively find all single and
-    double-character tokens in the word and add them to the graph.
+def build_spelling_graph(word, graph, symbols=ELEMENTS):
+    """Given a word and a graph, find all single and double-character
+    tokens in the word. Add them to the graph only if they are present
+    within the given set of allowed symbols.
     """
 
-    def segments(word, position=0, previous_root=None):
-        if word == '':
+    def pop_root(remaining, position=0, previous_root=None):
+        """Pop the single and double-character roots off the front of a
+        given string, then recurse into what remains.
+
+        For the word 'because', the roots and remainders look something
+        like:
+
+            'b' 'ecause'
+                'e' 'cause'
+                    'c' 'ause'
+                        'a' 'use'
+                            'u' 'se'
+                                's' 'e'
+                                    'e' ''
+                                'se' ''
+                            'us' 'e'
+                        'au' 'se'
+                    'ca' 'use'
+                'ec' 'ause'
+            'be' 'cause'
+
+        For each root present in the set of allowed symbols, add an edge
+        to the graph:
+
+            previous root --> current root.
+
+        Keep track of processed values for `remaining` and do not
+        evaluate a given value more than once.
+
+        Keep track of the position of each root so that repeated roots
+        are distinct nodes.
+        """
+        if remaining == '':
             graph.add_edge(previous_root, None)
             return
 
-        single_root = Node(word[0], position)
+        single_root = Node(remaining[0], position)
         if single_root.value.capitalize() in symbols:
             graph.add_edge(previous_root, single_root)
 
-            if word not in processed:
-                segments(word[1:], position + 1, previous_root=single_root)
+            if remaining not in processed:
+                pop_root(remaining[1:], position + 1, previous_root=single_root)
 
-        if len(word) >= 2:
-            double_root = Node(word[0:2], position)
+        if len(remaining) >= 2:
+            double_root = Node(remaining[0:2], position)
             if double_root.value.capitalize() in symbols:
                 graph.add_edge(previous_root, double_root)
 
-                if word not in processed:
-                    segments(word[2:], position + 2, previous_root=double_root)
-        processed.add(word)
+                if remaining not in processed:
+                    pop_root(remaining[2:], position + 2, previous_root=double_root)
+        processed.add(remaining)
 
     processed = set()
-    segments(word)
+    pop_root(word)
+
+
+def find_all_paths(parents_to_children, start, end, path=[]):
+    """Return a list of all paths through a graph from start to end.
+    `parents_to_children` is a dict with parent nodes as keys and sets
+    of child nodes as values.
+
+    Based on https://www.python.org/doc/essays/graphs/
+    """
+    path = path + [start]
+    if start == end:
+        return [path]
+    if start not in parents_to_children.keys():
+        return []
+    paths = []
+    for node in parents_to_children[start]:
+        if node not in path:
+            newpaths = find_all_paths(parents_to_children, node, end, path)
+            for newpath in newpaths:
+                paths.append(tuple(newpath))
+    return paths
 
 
 if __name__ == '__main__':

	stoichiograph Spell words with elemental symbols from the periodic table.
	git clone git://git.amin.space/stoichiograph.git
	Log \| Files \| Refs \| LICENSE