speller.py (7570B)
1 from collections import defaultdict, namedtuple 2 from io import StringIO 3 import logging 4 5 # TODO(amin): Convert symbol tuple to element name or atomic number tuple 6 7 log = logging.getLogger(__name__) 8 log.addHandler(logging.NullHandler()) 9 10 ELEMENTS = { 11 'H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 12 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 13 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 14 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 15 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 16 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 17 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 18 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 19 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 20 'Mc', 'Lv', 'Ts', 'Og' 21 } 22 23 24 # TODO(amin): Use optional caching/memoization to improve performance 25 # TODO(amin): Support appostrophies 26 # TODO(amin): Add option to require no repeated symbols 27 def spell(word, symbols=ELEMENTS): 28 """Return a list of any possible ways to spell a word with a given 29 set of symbols. 30 31 Example: 32 >>> spell('amputation') 33 [('Am', 'Pu', 'Ta', 'Ti', 'O', 'N'), ('Am', 'P', 'U', 'Ta', 'Ti', 'O', 'N')] 34 """ 35 log.info('Word: {}'.format(word)) 36 37 g = Graph() 38 build_spelling_graph(word, g) 39 40 elemental_spellings = sorted( 41 [ 42 tuple(node.value.capitalize() for node in path) 43 # There will only ever be at most 2 firsts and 2 lasts. 44 for first in g.firsts() 45 for last in g.lasts() 46 for path in find_all_paths(g._children_of, first, last) 47 ], 48 reverse=True 49 ) 50 51 log.info('Spellings: {}'.format(elemental_spellings)) 52 53 return elemental_spellings 54 55 56 class Graph(): 57 """A directed acyclic graph that stores all possible elemental 58 spellings of a word. 59 """ 60 def __init__(self): 61 self._parents_of = defaultdict(set) 62 self._children_of = defaultdict(set) 63 64 def firsts(self): 65 """Return nodes with no parents.""" 66 return self._children_of[None] 67 68 def lasts(self): 69 """Return nodes with no children.""" 70 return self._parents_of[None] 71 72 def add_edge(self, parent, child): 73 """Add a parent-child relatonship to the graph. None is ok as a 74 key, but not a value. 75 """ 76 log.debug('add_edge({}, {})'.format(parent, child)) 77 if parent is not None: 78 self._parents_of[child].add(parent) 79 if child is not None: 80 self._children_of[parent].add(child) 81 82 def nodes(self, connected_only=True): 83 """Return a list of all nodes.""" 84 if connected_only: 85 return set( 86 node for node in 87 set(self._children_of.keys()) | set(self._parents_of.keys()) 88 if node is not None 89 ) 90 else: 91 return set( 92 node for node in 93 set(self._children_of.keys()) 94 | set(self._parents_of.keys()) 95 | set(v for s in self._children_of.values() for v in s) 96 | set(v for s in self._parents_of.values() for v in s) 97 if node is not None 98 ) 99 100 def edges(self): 101 """Return a list of all parent-child relationships.""" 102 return [ 103 (parent, child) 104 for parent in self._children_of 105 for child in self._children_of[parent] 106 ] 107 108 # TODO(amin): Eliminate dead-end nodes from exported graph. 109 def export(self, connected_only=True): 110 """Print a string to stdout that can be piped to Graphviz to 111 generate a graph diagram. 112 """ 113 export = StringIO() 114 export.write('digraph G {\n') 115 export.write(' graph [rankdir=LR];\n') 116 export.write(' node [width=0.75 shape=circle];\n') 117 118 edges = [ 119 (p, c) 120 for p, c in self.edges() 121 if p is not None and c is not None 122 ] 123 for parent, child in sorted(edges): 124 export.write(' "{}" -> "{}";\n'.format(parent, child)) 125 126 for node in sorted(self.nodes(connected_only=connected_only)): 127 export.write(' "{}" [label="{}"];\n'.format(node, node.value.capitalize())) 128 export.write('}') 129 return export.getvalue() 130 131 132 # A single node of the graph. A glyph and its position in the word. 133 Node = namedtuple('Node', ['value', 'position']) 134 135 136 def build_spelling_graph(word, graph, symbols=ELEMENTS): 137 """Given a word and a graph, find all single and double-character 138 glyphs in the word. Add them to the graph only if they are present 139 within the given set of allowed symbols. 140 """ 141 142 def pop_root(remaining, position=0, previous_root=None): 143 """Pop the single and double-character roots off the front of a 144 given string, then recurse into what remains. 145 146 For the word 'because', the roots and remainders for each call 147 look something like: 148 149 'b' 'ecause' 150 'e' 'cause' 151 'c' 'ause' 152 'a' 'use' 153 'u' 'se' 154 's' 'e' 155 'e' '' 156 'se' '' 157 'us' 'e' 158 'au' 'se' 159 'ca' 'use' 160 'ec' 'ause' 161 'be' 'cause' 162 163 For each root present in the set of allowed symbols, add an edge 164 to the graph: 165 166 previous root --> current root. 167 168 Keep track of processed values for `remaining` and do not 169 evaluate a given value more than once. 170 171 Keep track of the position of each root so that repeated roots 172 are distinct nodes. 173 """ 174 if remaining == '': 175 graph.add_edge(previous_root, None) 176 return 177 178 single_root = Node(remaining[0], position) 179 if single_root.value.capitalize() in symbols: 180 graph.add_edge(previous_root, single_root) 181 182 if remaining not in processed: 183 pop_root(remaining[1:], position + 1, previous_root=single_root) 184 185 if len(remaining) >= 2: 186 double_root = Node(remaining[0:2], position) 187 if double_root.value.capitalize() in symbols: 188 graph.add_edge(previous_root, double_root) 189 190 if remaining not in processed: 191 pop_root(remaining[2:], position + 2, previous_root=double_root) 192 processed.add(remaining) 193 194 processed = set() 195 pop_root(word) 196 197 198 def find_all_paths(parents_to_children, start, end, path=[]): 199 """Return a list of all paths through a graph from start to end. 200 `parents_to_children` is a dict with parent nodes as keys and sets 201 of child nodes as values. 202 203 Based on https://www.python.org/doc/essays/graphs/ 204 """ 205 path = path + [start] 206 if start == end: 207 return [path] 208 if start not in parents_to_children.keys(): 209 return [] 210 paths = [] 211 for node in parents_to_children[start]: 212 if node not in path: 213 newpaths = find_all_paths(parents_to_children, node, end, path) 214 for newpath in newpaths: 215 paths.append(tuple(newpath)) 216 return paths 217 218 219 if __name__ == '__main__': 220 test_word = 'Mockery' 221 print('{}:\n{}'.format(test_word, spell(test_word)))