stoichiograph

Spell words with elemental symbols from the periodic table.
git clone git://git.amin.space/stoichiograph.git
Log | Files | Refs | LICENSE

commit 458d0c1e83aa3e30ad154b8d174e942333b151b1
parent 0890827b23182eda96881b5e2401c9650fb9d2cd
Author: amin <dev@aminmesbah.com>
Date:   Sat, 25 Feb 2017 02:06:11 +0000

Improve docstrings and naming.

FossilOrigin-Name: d7ddc98a2d8aa8802d5080980fd44f3fe35c59709b9fe6eff4c89fa82ef7b0f3
Diffstat:
Mspeller.py | 122++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
1 file changed, 77 insertions(+), 45 deletions(-)

diff --git a/speller.py b/speller.py @@ -1,5 +1,4 @@ from collections import defaultdict, namedtuple -from functools import lru_cache import logging # TODO(amin): Convert symbol tuple to element name or atomic number tuple @@ -25,8 +24,8 @@ ELEMENTS = { # TODO(amin): Support appostrophies # TODO(amin): Add option to require no repeated symbols def spell(word, symbols=ELEMENTS): - """Return a list of any possible ways to spell a word - with a given set of symbols. + """Return a list of any possible ways to spell a word with a given + set of symbols. Example: >>> spell('amputation') @@ -36,7 +35,7 @@ def spell(word, symbols=ELEMENTS): log.debug('Using graph speller') g = Graph() - build_graph(word, g) + build_spelling_graph(word, g) spellings = list() for first in g.firsts(): @@ -54,13 +53,9 @@ def spell(word, symbols=ELEMENTS): return elemental_spellings -# A single node of the graph. -Node = namedtuple('Node', ['value', 'position']) - - class Graph(): - """A directed acyclic graph that stores all possible - elemental spellings of a word. + """A directed acyclic graph that stores all possible elemental + spellings of a word. """ def __init__(self): self._parents_of = defaultdict(set) @@ -75,8 +70,8 @@ class Graph(): return self._parents_of[None] def add_edge(self, parent, child): - """Add a parent-child relatonship to the graph. - None is ok as a key, but not a value. + """Add a parent-child relatonship to the graph. None is ok as a + key, but not a value. """ if parent is not None: self._parents_of[child].add(parent) @@ -92,8 +87,7 @@ class Graph(): ] def export(self): - """Print a string to stdout that can be interpreted by - Graphviz. + """Print a string to stdout that can be interpreted by Graphviz. """ print('digraph G {') for (parent, child) in self.edges(): @@ -103,53 +97,91 @@ class Graph(): print('}') -def find_all_paths(graph, start, end, path=[]): - """Return a list of all paths through the graph from start - to end. - Based on https://www.python.org/doc/essays/graphs/ - """ - path = path + [start] - if start == end: - return [path] - if start not in graph.keys(): - return [] - paths = [] - for node in graph[start]: - if node not in path: - newpaths = find_all_paths(graph, node, end, path) - for newpath in newpaths: - paths.append(tuple(newpath)) - return paths +# A single node of the graph. +Node = namedtuple('Node', ['value', 'position']) -def build_graph(word, graph, symbols=ELEMENTS): - """Given a word and a graph, recursively find all single and - double-character tokens in the word and add them to the graph. +def build_spelling_graph(word, graph, symbols=ELEMENTS): + """Given a word and a graph, find all single and double-character + tokens in the word. Add them to the graph only if they are present + within the given set of allowed symbols. """ - def segments(word, position=0, previous_root=None): - if word == '': + def pop_root(remaining, position=0, previous_root=None): + """Pop the single and double-character roots off the front of a + given string, then recurse into what remains. + + For the word 'because', the roots and remainders look something + like: + + 'b' 'ecause' + 'e' 'cause' + 'c' 'ause' + 'a' 'use' + 'u' 'se' + 's' 'e' + 'e' '' + 'se' '' + 'us' 'e' + 'au' 'se' + 'ca' 'use' + 'ec' 'ause' + 'be' 'cause' + + For each root present in the set of allowed symbols, add an edge + to the graph: + + previous root --> current root. + + Keep track of processed values for `remaining` and do not + evaluate a given value more than once. + + Keep track of the position of each root so that repeated roots + are distinct nodes. + """ + if remaining == '': graph.add_edge(previous_root, None) return - single_root = Node(word[0], position) + single_root = Node(remaining[0], position) if single_root.value.capitalize() in symbols: graph.add_edge(previous_root, single_root) - if word not in processed: - segments(word[1:], position + 1, previous_root=single_root) + if remaining not in processed: + pop_root(remaining[1:], position + 1, previous_root=single_root) - if len(word) >= 2: - double_root = Node(word[0:2], position) + if len(remaining) >= 2: + double_root = Node(remaining[0:2], position) if double_root.value.capitalize() in symbols: graph.add_edge(previous_root, double_root) - if word not in processed: - segments(word[2:], position + 2, previous_root=double_root) - processed.add(word) + if remaining not in processed: + pop_root(remaining[2:], position + 2, previous_root=double_root) + processed.add(remaining) processed = set() - segments(word) + pop_root(word) + + +def find_all_paths(parents_to_children, start, end, path=[]): + """Return a list of all paths through a graph from start to end. + `parents_to_children` is a dict with parent nodes as keys and sets + of child nodes as values. + + Based on https://www.python.org/doc/essays/graphs/ + """ + path = path + [start] + if start == end: + return [path] + if start not in parents_to_children.keys(): + return [] + paths = [] + for node in parents_to_children[start]: + if node not in path: + newpaths = find_all_paths(parents_to_children, node, end, path) + for newpath in newpaths: + paths.append(tuple(newpath)) + return paths if __name__ == '__main__':