commit affb5199928d5504a6afbda7d86fb32bb56817c1
parent 3d73dfb5f62edf6fa50c3c63852628dca1a51a94
Author: amin <dev@aminmesbah.com>
Date: Wed, 22 Feb 2017 01:04:09 +0000
Find spellings by building a graph.
FossilOrigin-Name: 624537ac7f7dfbb382640ae461ca19022cea14eed0a16da4173b2b0928017fc9
Diffstat:
A | dag.py | | | 112 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | speller.py | | | 19 | ++++++++++++++++--- |
M | stoichiograph.py | | | 11 | ++++++++--- |
3 files changed, 136 insertions(+), 6 deletions(-)
diff --git a/dag.py b/dag.py
@@ -0,0 +1,112 @@
+from collections import defaultdict, namedtuple
+
+
+# A single node of the graph.
+Node = namedtuple('Node', ['value', 'position'])
+
+
+class Graph():
+ """A directed acyclic graph that stores all possible
+ elemental spellings of a word.
+ """
+ def __init__(self):
+ self._parents_of = defaultdict(set)
+ self._children_of = defaultdict(set)
+
+ def firsts(self):
+ """Return nodes with no parents."""
+ return self._children_of[None]
+
+ def lasts(self):
+ """Return nodes with no children."""
+ return self._parents_of[None]
+
+ def add_edge(self, parent, child):
+ """Add a parent-child to child relatonship to the graph.
+ None is ok as a key, but not a value.
+ """
+ if parent is not None:
+ self._parents_of[child].add(parent)
+ if child is not None:
+ self._children_of[parent].add(child)
+
+ def edges(self):
+ """Return a list of all parent-child relationships."""
+ return [
+ (parent, child)
+ for parent in self._children_of
+ for child in self._children_of[parent]
+ ]
+
+ def export(self):
+ """Print a string to stdout that can be interpreted by
+ Graphviz.
+ """
+ print('digraph G {')
+ for (parent, child) in self.edges():
+ print('\t{} -> {}'.format(parent.value, child.value))
+ print('}')
+
+
+def find_all_paths(graph, start, end, path=[]):
+ """Return a list of all paths through the graph from start
+ to end.
+ Based on https://www.python.org/doc/essays/graphs/
+ """
+ path = path + [start]
+ if start == end:
+ return [path]
+ if start not in graph.keys():
+ return []
+ paths = []
+ for node in graph[start]:
+ if node not in path:
+ newpaths = find_all_paths(graph, node, end, path)
+ for newpath in newpaths:
+ paths.append(tuple(newpath))
+ return paths
+
+
+def build_graph(word, graph):
+ """Given a word and a graph, recursively find all single and
+ double-character tokens in the word and add them to the graph.
+ """
+
+ def segments(word, position=0, previous_root=None):
+ if word == '':
+ graph.add_edge(previous_root, None)
+ return
+
+ single_root = Node(word[0], position)
+ graph.add_edge(previous_root, single_root)
+
+ if word not in processed:
+ single_stem = word[1:]
+ segments(single_stem, position + 1, previous_root=single_root)
+
+ if len(word) >= 2:
+ double_root = Node(word[0:2], position)
+ graph.add_edge(previous_root, double_root)
+
+ if word not in processed:
+ double_stem = word[2:]
+ segments(double_stem, position + 2, previous_root=double_root)
+ processed.add(word)
+
+ processed = set()
+ segments(word)
+
+
+if __name__ == '__main__':
+ from pprint import pprint
+ w = 'inconspicuous'
+ g = Graph()
+ build_graph(w, g)
+
+ spellings = list()
+ for first in g._children_of[None]:
+ for last in g._parents_of[None]:
+ for path in find_all_paths(g._children_of, first, last):
+ spellings.append(tuple(node.value for node in path))
+
+ pprint(spellings)
diff --git a/speller.py b/speller.py
@@ -1,6 +1,7 @@
from functools import lru_cache
from itertools import chain, product
import logging
+import dag
# TODO(amin): Profile and optimize
# TODO(amin): Add performance reporting to log
@@ -27,7 +28,7 @@ ELEMENTS = (
# TODO(amin): Use optional caching/memoization to improve performance
# TODO(amin): Support appostrophies
# TODO(amin): Add option to require no repeated symbols
-def spell(word, symbols=ELEMENTS):
+def spell(word, use_graph=False, symbols=ELEMENTS):
"""Return a list of any possible ways to spell a word
with a given set of symbols.
@@ -36,9 +37,21 @@ def spell(word, symbols=ELEMENTS):
[('Am', 'Pu', 'Ta', 'Ti', 'O', 'N'), ('Am', 'P', 'U', 'Ta', 'Ti', 'O', 'N')]
"""
log.info('Word: {}'.format(word))
- groupings = generate_groupings(len(word))
- spellings = [map_word(word, grouping) for grouping in groupings]
+ if use_graph:
+ log.debug('Using graph speller')
+ g = dag.Graph()
+ dag.build_graph(word, g)
+
+ spellings = list()
+ for first in g.firsts():
+ for last in g.lasts():
+ for path in dag.find_all_paths(g._children_of, first, last):
+ spellings.append(tuple(node.value for node in path))
+
+ else:
+ groupings = generate_groupings(len(word))
+ spellings = [map_word(word, grouping) for grouping in groupings]
elemental_spellings = [
tuple(token.capitalize() for token in spelling)
diff --git a/stoichiograph.py b/stoichiograph.py
@@ -42,7 +42,7 @@ def get_args():
help='print list of elemental symbols and exit'
)
parser.add_argument(
- '-o', '--output_file',
+ '-o', '--output-file',
help='path of output json file'
)
parser.add_argument(
@@ -61,6 +61,11 @@ def get_args():
'-V', '--version', action='store_true',
help='print version info and exit'
)
+ # TODO(amin): Remove this
+ parser.add_argument(
+ '--use-graph', action='store_true',
+ help='use the graph-based speller'
+ )
return parser.parse_args()
@@ -116,9 +121,9 @@ def main():
for word in words:
if TUPLES:
- spellings = speller.spell(word)
+ spellings = speller.spell(word, use_graph=args.use_graph)
else:
- spellings = [''.join(s) for s in speller.spell(word)]
+ spellings = [''.join(s) for s in speller.spell(word, use_graph=args.use_graph)]
if spellings:
spellable[word] = spellings