stoichiograph

Spell words with elemental symbols from the periodic table.
git clone git://git.amin.space/stoichiograph.git
Log | Files | Refs | LICENSE

commit affb5199928d5504a6afbda7d86fb32bb56817c1
parent 3d73dfb5f62edf6fa50c3c63852628dca1a51a94
Author: amin <dev@aminmesbah.com>
Date:   Wed, 22 Feb 2017 01:04:09 +0000

Find spellings by building a graph.

FossilOrigin-Name: 624537ac7f7dfbb382640ae461ca19022cea14eed0a16da4173b2b0928017fc9
Diffstat:
Adag.py | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mspeller.py | 19++++++++++++++++---
Mstoichiograph.py | 11++++++++---
3 files changed, 136 insertions(+), 6 deletions(-)

diff --git a/dag.py b/dag.py @@ -0,0 +1,112 @@ +from collections import defaultdict, namedtuple + + +# A single node of the graph. +Node = namedtuple('Node', ['value', 'position']) + + +class Graph(): + """A directed acyclic graph that stores all possible + elemental spellings of a word. + """ + def __init__(self): + self._parents_of = defaultdict(set) + self._children_of = defaultdict(set) + + def firsts(self): + """Return nodes with no parents.""" + return self._children_of[None] + + def lasts(self): + """Return nodes with no children.""" + return self._parents_of[None] + + def add_edge(self, parent, child): + """Add a parent-child to child relatonship to the graph. + None is ok as a key, but not a value. + """ + if parent is not None: + self._parents_of[child].add(parent) + if child is not None: + self._children_of[parent].add(child) + + def edges(self): + """Return a list of all parent-child relationships.""" + return [ + (parent, child) + for parent in self._children_of + for child in self._children_of[parent] + ] + + def export(self): + """Print a string to stdout that can be interpreted by + Graphviz. + """ + print('digraph G {') + for (parent, child) in self.edges(): + print('\t{} -> {}'.format(parent.value, child.value)) + print('}') + + +def find_all_paths(graph, start, end, path=[]): + """Return a list of all paths through the graph from start + to end. + Based on https://www.python.org/doc/essays/graphs/ + """ + path = path + [start] + if start == end: + return [path] + if start not in graph.keys(): + return [] + paths = [] + for node in graph[start]: + if node not in path: + newpaths = find_all_paths(graph, node, end, path) + for newpath in newpaths: + paths.append(tuple(newpath)) + return paths + + +def build_graph(word, graph): + """Given a word and a graph, recursively find all single and + double-character tokens in the word and add them to the graph. + """ + + def segments(word, position=0, previous_root=None): + if word == '': + graph.add_edge(previous_root, None) + return + + single_root = Node(word[0], position) + graph.add_edge(previous_root, single_root) + + if word not in processed: + single_stem = word[1:] + segments(single_stem, position + 1, previous_root=single_root) + + if len(word) >= 2: + double_root = Node(word[0:2], position) + graph.add_edge(previous_root, double_root) + + if word not in processed: + double_stem = word[2:] + segments(double_stem, position + 2, previous_root=double_root) + processed.add(word) + + processed = set() + segments(word) + + +if __name__ == '__main__': + from pprint import pprint + w = 'inconspicuous' + g = Graph() + build_graph(w, g) + + spellings = list() + for first in g._children_of[None]: + for last in g._parents_of[None]: + for path in find_all_paths(g._children_of, first, last): + spellings.append(tuple(node.value for node in path)) + + pprint(spellings) diff --git a/speller.py b/speller.py @@ -1,6 +1,7 @@ from functools import lru_cache from itertools import chain, product import logging +import dag # TODO(amin): Profile and optimize # TODO(amin): Add performance reporting to log @@ -27,7 +28,7 @@ ELEMENTS = ( # TODO(amin): Use optional caching/memoization to improve performance # TODO(amin): Support appostrophies # TODO(amin): Add option to require no repeated symbols -def spell(word, symbols=ELEMENTS): +def spell(word, use_graph=False, symbols=ELEMENTS): """Return a list of any possible ways to spell a word with a given set of symbols. @@ -36,9 +37,21 @@ def spell(word, symbols=ELEMENTS): [('Am', 'Pu', 'Ta', 'Ti', 'O', 'N'), ('Am', 'P', 'U', 'Ta', 'Ti', 'O', 'N')] """ log.info('Word: {}'.format(word)) - groupings = generate_groupings(len(word)) - spellings = [map_word(word, grouping) for grouping in groupings] + if use_graph: + log.debug('Using graph speller') + g = dag.Graph() + dag.build_graph(word, g) + + spellings = list() + for first in g.firsts(): + for last in g.lasts(): + for path in dag.find_all_paths(g._children_of, first, last): + spellings.append(tuple(node.value for node in path)) + + else: + groupings = generate_groupings(len(word)) + spellings = [map_word(word, grouping) for grouping in groupings] elemental_spellings = [ tuple(token.capitalize() for token in spelling) diff --git a/stoichiograph.py b/stoichiograph.py @@ -42,7 +42,7 @@ def get_args(): help='print list of elemental symbols and exit' ) parser.add_argument( - '-o', '--output_file', + '-o', '--output-file', help='path of output json file' ) parser.add_argument( @@ -61,6 +61,11 @@ def get_args(): '-V', '--version', action='store_true', help='print version info and exit' ) + # TODO(amin): Remove this + parser.add_argument( + '--use-graph', action='store_true', + help='use the graph-based speller' + ) return parser.parse_args() @@ -116,9 +121,9 @@ def main(): for word in words: if TUPLES: - spellings = speller.spell(word) + spellings = speller.spell(word, use_graph=args.use_graph) else: - spellings = [''.join(s) for s in speller.spell(word)] + spellings = [''.join(s) for s in speller.spell(word, use_graph=args.use_graph)] if spellings: spellable[word] = spellings