stoichiograph

Spell words with elemental symbols from the periodic table.
git clone git://git.amin.space/stoichiograph.git
Log | Files | Refs | LICENSE

commit b5231a079f27c9cf4061314b67be0b48a38a517a
parent a0f9ebae95c7485f207ff67d3603d90133f0fd33
Author: amin <dev@aminmesbah.com>
Date:   Wed,  7 Sep 2016 23:31:13 +0000

Add function to map a word to a grouping scheme. Simplify control flow.

FossilOrigin-Name: 0ede8311bbcec66575422d99bb203bd6a322e729876c6cc38085cba7b87f5d09
Diffstat:
Mspeller.py | 142++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
Mtests.py | 37++++++++++++++++++++++++++++---------
2 files changed, 121 insertions(+), 58 deletions(-)

diff --git a/speller.py b/speller.py @@ -1,21 +1,35 @@ # TODO: -# test that all letters in word are present in some element -# generate group_maps only for the exact number of chars in word -# - +# - eliminate unnecessary functions +# - simplify +# - use consistent terminology +import csv from collections import namedtuple from itertools import chain, product -import csv -import sys - - -def get_csv_data(file_name, column): +from pprint import pprint + +ELEMENTS = ( + 'Ac', 'Ag', 'Al', 'Am', 'Ar', 'As', 'At', 'Au', 'B', 'Ba', 'Be', 'Bh', + 'Bi', 'Bk', 'Br', 'C', 'Ca', 'Cd', 'Ce', 'Cf', 'Cl', 'Cm', 'Co', 'Cr', + 'Cs', 'Cu', 'Db', 'Ds', 'Dy', 'Er', 'Es', 'Eu', 'F', 'Fe', 'Fm', 'Fr', + 'Ga', 'Gd', 'Ge', 'H', 'He', 'Hf', 'Hg', 'Ho', 'Hs', 'I', 'In', 'Ir', + 'K', 'Kr', 'La', 'Li', 'Lr', 'Lu', 'Md', 'Mg', 'Mn', 'Mo', 'Mt', 'N', + 'Na', 'Nb', 'Nd', 'Ne', 'Ni', 'No', 'Np', 'O', 'Os', 'P', 'Pa', 'Pb', + 'Pd', 'Pm', 'Po', 'Pr', 'Pt', 'Pu', 'Ra', 'Rb', 'Re', 'Rf', 'Rg', 'Rh', + 'Rn', 'Ru', 'S', 'Sb', 'Sc', 'Se', 'Sg', 'Si', 'Sm', 'Sn', 'Sr', 'Ta', + 'Tb', 'Tc', 'Te', 'Th', 'Ti', 'Tl', 'Tm', 'U', 'Uub', 'Uuh', 'Uuo', + 'Uup', 'Uuq', 'Uus', 'Uut', 'V', 'W', 'Xe', 'Y', 'Yb', 'Zn', 'Zr' +) + + +def get_csv_data(file_name, column, header=True): """Return in a list all data from a given column of a .csv file""" + data = [] with open(file_name) as infile: csv_reader = csv.reader(infile, skipinitialspace=True, delimiter=',') - next(csv_reader, None) + if header: + next(csv_reader, None) # skip header row for row in csv_reader: data.append(row[column]) @@ -24,64 +38,94 @@ def get_csv_data(file_name, column): def tokenize_sequence(sequence): """Return a list each of all single and double character tokens.""" - t = namedtuple('Tokens', (['single', 'pair'])) - single = [sequence[i:i+1] for i in range(0, len(sequence))] - pair = [sequence[i:i+2] for i in range(0, len(sequence) - 1)] - tokens = t(single, pair) + Tokens = namedtuple('Tokens', (['singles', 'doubles'])) + + singles = tuple(sequence[i] for i in range(0, len(sequence))) + doubles = tuple(sequence[i:i+2] for i in range(0, len(sequence) - 1)) - return tokens + return Tokens(singles, doubles) def find_matches(sequence, symbols): - """Return a list of all element symbols matching - an item in the given sequence. + """Return a dictionary of symbols and indices for all + symbols that match an item in the given sequence. """ - matches = [] - indices = [] - lower_symbols = [i.lower() for i in symbols] - lower_sequence = [i.lower() for i in sequence] - for i in lower_sequence: - matches += (x for x in lower_symbols if x == i) - indices += (lower_symbols.index(x) for x in lower_symbols if x == i) + return { + symbol: index + for character in sequence + for index, symbol in enumerate(symbols) + if symbol.lower() == character.lower() + } - return matches +def groupings(word, token_sizes=(1, 2)): + """Return a tuple of all permutations of possible character + grouping arrangements of a word. -def groupings(word, group_sizes = [1,2]): - """Return a list of all permutations of possible character grouping - arrangements of a word. group_sizes defines the possible sizes of - character groups, and by default allows only singles and pairs. + token_sizes defines the possible sizes of character groups, + and by default allows only singles and pairs. """ - group_maps = [] - length = len(word) - cartesian_product = (product(group_sizes, repeat=r) - for r in range(1, length + 1)) - products = chain.from_iterable(cartesian_product) - # include only products that represent the correct number of chars - for p in products: - if sum(p) == length: - p = [tuple(x for x in p)] - for x in p: - if x not in group_maps: - group_maps.append(x) + cartesian_products = ( + product(token_sizes, repeat=r) + for r in range(1, len(word) + 1) + ) + + # include only groupings that represent the correct number of chars + groupings = tuple( + grouping + for grouping in chain.from_iterable(cartesian_products) + if sum(grouping) == len(word) + ) - return group_maps + return groupings -def main(): +def map_word(word, grouping): + """Given a word and a grouping, map the characters of the word + to match the distribution defined in the grouping. + + example: + >>> map_word('because', (1, 2, 1, 1, 2)) + ['b', 'ec', 'a', 'u', 'se'] + """ + + word_chars = (c for c in word) + + mapped = [] + for char_group_size in grouping: + char_group = "" + for _ in range(char_group_size): + char_group += next(word_chars) + mapped.append(char_group) + + return tuple(mapped) + + +if __name__ == '__main__': symbols = get_csv_data('elements.csv', 1) - test_word = "Because" + test_word = 'Osiris' tokens = tokenize_sequence(test_word) - single_matches = find_matches(tokens.single, symbols) - pair_matches = find_matches(tokens.pair, symbols) - print(single_matches, pair_matches) + single_matches = find_matches(tokens.singles, symbols) + pair_matches = find_matches(tokens.doubles, symbols) + letter_groupings = groupings(test_word) -if __name__ == '__main__': - main() + spellings = [map_word(test_word, g) for g in letter_groupings] + + elemental_spellings = [ + [l.capitalize() for l in spelling] + for spelling in spellings + if set(c.lower() for c in spelling) <= set(s.lower() for s in symbols) + ] + + pprint(tokens) + pprint(single_matches) + pprint(pair_matches) + pprint(list(zip(letter_groupings, spellings))) + pprint(elemental_spellings) diff --git a/tests.py b/tests.py @@ -1,20 +1,38 @@ import speller import unittest +# TODO: change to py.test syntax + +ELEMENTS = ( + 'Ac', 'Ag', 'Al', 'Am', 'Ar', 'As', 'At', 'Au', 'B', 'Ba', 'Be', 'Bh', + 'Bi', 'Bk', 'Br', 'C', 'Ca', 'Cd', 'Ce', 'Cf', 'Cl', 'Cm', 'Co', 'Cr', + 'Cs', 'Cu', 'Db', 'Ds', 'Dy', 'Er', 'Es', 'Eu', 'F', 'Fe', 'Fm', 'Fr', + 'Ga', 'Gd', 'Ge', 'H', 'He', 'Hf', 'Hg', 'Ho', 'Hs', 'I', 'In', 'Ir', + 'K', 'Kr', 'La', 'Li', 'Lr', 'Lu', 'Md', 'Mg', 'Mn', 'Mo', 'Mt', 'N', + 'Na', 'Nb', 'Nd', 'Ne', 'Ni', 'No', 'Np', 'O', 'Os', 'P', 'Pa', 'Pb', + 'Pd', 'Pm', 'Po', 'Pr', 'Pt', 'Pu', 'Ra', 'Rb', 'Re', 'Rf', 'Rg', 'Rh', + 'Rn', 'Ru', 'S', 'Sb', 'Sc', 'Se', 'Sg', 'Si', 'Sm', 'Sn', 'Sr', 'Ta', + 'Tb', 'Tc', 'Te', 'Th', 'Ti', 'Tl', 'Tm', 'U', 'Uub', 'Uuh', 'Uuo', + 'Uup', 'Uuq', 'Uus', 'Uut', 'V', 'W', 'Xe', 'Y', 'Yb', 'Zn', 'Zr' +) class MatchingTest(unittest.TestCase): test_singles = ['B', 'e', 'c', 'a', 'u', 's', 'e'] test_pairs = ['Be', 'ec', 'ca', 'au', 'se'] - #TODO: change this so it never fails - symbols = speller.get_csv_data('elements.csv', 1) def test_match_singles(self): - matches = speller.find_matches(self.test_singles, self.symbols) - self.assertEqual(matches, ['b', 'c', 'u', 's']) + matches = speller.find_matches(self.test_singles, ELEMENTS) + self.assertEqual( + matches, + {'S': 86, 'B': 8, 'U': 103, 'C': 15} + ) def test_match_pairs(self): - matches = speller.find_matches(self.test_pairs, self.symbols) - self.assertEqual(matches, ['be', 'ca', 'au', 'se']) + matches = speller.find_matches(self.test_pairs, ELEMENTS) + self.assertEqual( + matches, + {'Au': 7, 'Be': 10, 'Ca': 16, 'Se': 89} + ) class TokensTest(unittest.TestCase): @@ -22,24 +40,25 @@ class TokensTest(unittest.TestCase): def test_single_chars(self): tokens = speller.tokenize_sequence(self.test_word) - self.assertEqual(tokens.single, ["O", "s", "i", "r", "i", "s"]) + self.assertEqual(tokens.singles, ("O", "s", "i", "r", "i", "s")) def test_pair_chars(self): tokens = speller.tokenize_sequence(self.test_word) - self.assertEqual(tokens.pair, ["Os", "si", "ir", "ri", "is"]) + self.assertEqual(tokens.doubles, ("Os", "si", "ir", "ri", "is")) class GroupingTest(unittest.TestCase): word = "that" def test_singles_and_pairs(self): - expected_maps = [(2, 2), (1, 1, 2), (1, 2, 1), (2, 1, 1), (1,1,1,1)] + expected_maps = ((2, 2), (1, 1, 2), (1, 2, 1), (2, 1, 1), (1, 1, 1, 1)) group_maps = speller.groupings(self.word) self.assertEqual(group_maps, expected_maps) class FileTest(unittest.TestCase): file_name = "elements.csv" + proper_data = ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',