stoichiograph

Spell words with elemental symbols from the periodic table.
git clone git://git.amin.space/stoichiograph.git
Log | Files | Refs | LICENSE

speller.py (7570B)


      1 from collections import defaultdict, namedtuple
      2 from io import StringIO
      3 import logging
      4 
      5 # TODO(amin): Convert symbol tuple to element name or atomic number tuple
      6 
      7 log = logging.getLogger(__name__)
      8 log.addHandler(logging.NullHandler())
      9 
     10 ELEMENTS = {
     11     'H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al',
     12     'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe',
     13     'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y',
     14     'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb',
     15     'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd',
     16     'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir',
     17     'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac',
     18     'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No',
     19     'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl',
     20     'Mc', 'Lv', 'Ts', 'Og'
     21 }
     22 
     23 
     24 # TODO(amin): Use optional caching/memoization to improve performance
     25 # TODO(amin): Support appostrophies
     26 # TODO(amin): Add option to require no repeated symbols
     27 def spell(word, symbols=ELEMENTS):
     28     """Return a list of any possible ways to spell a word with a given
     29     set of symbols.
     30 
     31     Example:
     32     >>> spell('amputation')
     33     [('Am', 'Pu', 'Ta', 'Ti', 'O', 'N'), ('Am', 'P', 'U', 'Ta', 'Ti', 'O', 'N')]
     34     """
     35     log.info('Word: {}'.format(word))
     36 
     37     g = Graph()
     38     build_spelling_graph(word, g)
     39 
     40     elemental_spellings = sorted(
     41         [
     42             tuple(node.value.capitalize() for node in path)
     43             # There will only ever be at most 2 firsts and 2 lasts.
     44             for first in g.firsts()
     45             for last in g.lasts()
     46             for path in find_all_paths(g._children_of, first, last)
     47         ],
     48         reverse=True
     49     )
     50 
     51     log.info('Spellings: {}'.format(elemental_spellings))
     52 
     53     return elemental_spellings
     54 
     55 
     56 class Graph():
     57     """A directed acyclic graph that stores all possible elemental
     58     spellings of a word.
     59     """
     60     def __init__(self):
     61         self._parents_of = defaultdict(set)
     62         self._children_of = defaultdict(set)
     63 
     64     def firsts(self):
     65         """Return nodes with no parents."""
     66         return self._children_of[None]
     67 
     68     def lasts(self):
     69         """Return nodes with no children."""
     70         return self._parents_of[None]
     71 
     72     def add_edge(self, parent, child):
     73         """Add a parent-child relatonship to the graph. None is ok as a
     74         key, but not a value.
     75         """
     76         log.debug('add_edge({}, {})'.format(parent, child))
     77         if parent is not None:
     78             self._parents_of[child].add(parent)
     79         if child is not None:
     80             self._children_of[parent].add(child)
     81 
     82     def nodes(self, connected_only=True):
     83         """Return a list of all nodes."""
     84         if connected_only:
     85             return set(
     86                 node for node in
     87                 set(self._children_of.keys()) | set(self._parents_of.keys())
     88                 if node is not None
     89             )
     90         else:
     91             return set(
     92                 node for node in
     93                 set(self._children_of.keys())
     94                 | set(self._parents_of.keys())
     95                 | set(v for s in self._children_of.values() for v in s)
     96                 | set(v for s in self._parents_of.values() for v in s)
     97                 if node is not None
     98             )
     99 
    100     def edges(self):
    101         """Return a list of all parent-child relationships."""
    102         return [
    103             (parent, child)
    104             for parent in self._children_of
    105             for child in self._children_of[parent]
    106         ]
    107 
    108     # TODO(amin): Eliminate dead-end nodes from exported graph.
    109     def export(self, connected_only=True):
    110         """Print a string to stdout that can be piped to Graphviz to
    111         generate a graph diagram.
    112         """
    113         export = StringIO()
    114         export.write('digraph G {\n')
    115         export.write('    graph [rankdir=LR];\n')
    116         export.write('    node [width=0.75 shape=circle];\n')
    117 
    118         edges = [
    119             (p, c)
    120             for p, c in self.edges()
    121             if p is not None and c is not None
    122         ]
    123         for parent, child in sorted(edges):
    124             export.write('    "{}" -> "{}";\n'.format(parent, child))
    125 
    126         for node in sorted(self.nodes(connected_only=connected_only)):
    127             export.write('    "{}" [label="{}"];\n'.format(node, node.value.capitalize()))
    128         export.write('}')
    129         return export.getvalue()
    130 
    131 
    132 # A single node of the graph. A glyph and its position in the word.
    133 Node = namedtuple('Node', ['value', 'position'])
    134 
    135 
    136 def build_spelling_graph(word, graph, symbols=ELEMENTS):
    137     """Given a word and a graph, find all single and double-character
    138     glyphs in the word. Add them to the graph only if they are present
    139     within the given set of allowed symbols.
    140     """
    141 
    142     def pop_root(remaining, position=0, previous_root=None):
    143         """Pop the single and double-character roots off the front of a
    144         given string, then recurse into what remains.
    145 
    146         For the word 'because', the roots and remainders for each call
    147         look something like:
    148 
    149             'b' 'ecause'
    150                 'e' 'cause'
    151                     'c' 'ause'
    152                         'a' 'use'
    153                             'u' 'se'
    154                                 's' 'e'
    155                                     'e' ''
    156                                 'se' ''
    157                             'us' 'e'
    158                         'au' 'se'
    159                     'ca' 'use'
    160                 'ec' 'ause'
    161             'be' 'cause'
    162 
    163         For each root present in the set of allowed symbols, add an edge
    164         to the graph:
    165 
    166             previous root --> current root.
    167 
    168         Keep track of processed values for `remaining` and do not
    169         evaluate a given value more than once.
    170 
    171         Keep track of the position of each root so that repeated roots
    172         are distinct nodes.
    173         """
    174         if remaining == '':
    175             graph.add_edge(previous_root, None)
    176             return
    177 
    178         single_root = Node(remaining[0], position)
    179         if single_root.value.capitalize() in symbols:
    180             graph.add_edge(previous_root, single_root)
    181 
    182             if remaining not in processed:
    183                 pop_root(remaining[1:], position + 1, previous_root=single_root)
    184 
    185         if len(remaining) >= 2:
    186             double_root = Node(remaining[0:2], position)
    187             if double_root.value.capitalize() in symbols:
    188                 graph.add_edge(previous_root, double_root)
    189 
    190                 if remaining not in processed:
    191                     pop_root(remaining[2:], position + 2, previous_root=double_root)
    192         processed.add(remaining)
    193 
    194     processed = set()
    195     pop_root(word)
    196 
    197 
    198 def find_all_paths(parents_to_children, start, end, path=[]):
    199     """Return a list of all paths through a graph from start to end.
    200     `parents_to_children` is a dict with parent nodes as keys and sets
    201     of child nodes as values.
    202 
    203     Based on https://www.python.org/doc/essays/graphs/
    204     """
    205     path = path + [start]
    206     if start == end:
    207         return [path]
    208     if start not in parents_to_children.keys():
    209         return []
    210     paths = []
    211     for node in parents_to_children[start]:
    212         if node not in path:
    213             newpaths = find_all_paths(parents_to_children, node, end, path)
    214             for newpath in newpaths:
    215                 paths.append(tuple(newpath))
    216     return paths
    217 
    218 
    219 if __name__ == '__main__':
    220     test_word = 'Mockery'
    221     print('{}:\n{}'.format(test_word, spell(test_word)))