stoichiograph

Spell words with elemental symbols from the periodic table.
git clone git://git.amin.space/stoichiograph.git
Log | Files | Refs | LICENSE

commit 72a9c67c17da70ce7e15c3f1e75b6753bddad512
parent b5231a079f27c9cf4061314b67be0b48a38a517a
Author: amin <dev@aminmesbah.com>
Date:   Thu,  8 Sep 2016 03:14:08 +0000

Remove all unnecessary functionality. Use py.test.

FossilOrigin-Name: c67d46c068b7619f31297d387b0bf65af828c936f7373fc389828e8d81de5909
Diffstat:
AMakefile | 21+++++++++++++++++++++
Adev_requirements.txt | 6++++++
Aelemental_speller.py | 98+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dspeller.py | 131-------------------------------------------------------------------------------
Mtests.py | 78++++++++++++++++++++----------------------------------------------------------
5 files changed, 145 insertions(+), 189 deletions(-)

diff --git a/Makefile b/Makefile @@ -0,0 +1,21 @@ +DATADIR = data/ +DATE = `date +%Y-%m-%d` + +init: + pip install -r dev_requirements.txt + +test: + # To run individual tests, use "py.test -k the_test_path" + py.test tests.py + +lint: + flake8 *.py + +watch-log: + tail -f debug.log + +loc: + cloc --by-file --include-lang=Python . + +todo: + grep -FR --ignore-case --binary-file=without-match todo *.py diff --git a/dev_requirements.txt b/dev_requirements.txt @@ -0,0 +1,6 @@ +flake8==3.0.4 +mccabe==0.5.2 +py==1.4.31 +pycodestyle==2.0.0 +pyflakes==1.2.3 +pytest==3.0.2 diff --git a/elemental_speller.py b/elemental_speller.py @@ -0,0 +1,98 @@ +# TODO: add logging + +from collections import namedtuple +from itertools import chain, product + +ELEMENTS = ( + 'Ac', 'Ag', 'Al', 'Am', 'Ar', 'As', 'At', 'Au', 'B', 'Ba', 'Be', 'Bh', + 'Bi', 'Bk', 'Br', 'C', 'Ca', 'Cd', 'Ce', 'Cf', 'Cl', 'Cm', 'Co', 'Cr', + 'Cs', 'Cu', 'Db', 'Ds', 'Dy', 'Er', 'Es', 'Eu', 'F', 'Fe', 'Fm', 'Fr', + 'Ga', 'Gd', 'Ge', 'H', 'He', 'Hf', 'Hg', 'Ho', 'Hs', 'I', 'In', 'Ir', + 'K', 'Kr', 'La', 'Li', 'Lr', 'Lu', 'Md', 'Mg', 'Mn', 'Mo', 'Mt', 'N', + 'Na', 'Nb', 'Nd', 'Ne', 'Ni', 'No', 'Np', 'O', 'Os', 'P', 'Pa', 'Pb', + 'Pd', 'Pm', 'Po', 'Pr', 'Pt', 'Pu', 'Ra', 'Rb', 'Re', 'Rf', 'Rg', 'Rh', + 'Rn', 'Ru', 'S', 'Sb', 'Sc', 'Se', 'Sg', 'Si', 'Sm', 'Sn', 'Sr', 'Ta', + 'Tb', 'Tc', 'Te', 'Th', 'Ti', 'Tl', 'Tm', 'U', 'Uub', 'Uuh', 'Uuo', + 'Uup', 'Uuq', 'Uus', 'Uut', 'V', 'W', 'Xe', 'Y', 'Yb', 'Zn', 'Zr' +) + + +def elemental_spelling(word, symbols=ELEMENTS): + """Given a word and a sequence of symbols (tokens), + return a list of any possible ways to spell that word + with those symbols. + + Example: + >>> elemental_spelling('amputation') + [(('Am', 'Pu', 'Ta', 'Ti', 'O', 'N'), ('Am', 'P', 'U', 'Ta', 'Ti', 'O', 'N')] + """ + letter_groupings = _groupings(len(word)) + + spellings = [_map_word(word, grouping) for grouping in letter_groupings] + + elemental_spellings = [ + tuple(token.capitalize() for token in spelling) + for spelling in spellings + # set operation: set of chars in spelling is subset of set of symbols + if set(s.lower() for s in spelling) <= set(s.lower() for s in symbols) + ] + + return elemental_spellings + + +def _groupings(word_length, token_sizes=(1, 2, 3)): + """Return a tuple of all character groupings for a word + of a given length. + + A character grouping is a tuple representing the distribution + of characters in a tokenized word. + + The word 'canary', if mapped to the grouping (1, 3, 2), would + be broken down into ['c', 'ana', 'ry']. + + token_sizes defines the possible sizes of character groups, + and by default allows only singles, pairs, and triplets. + + Example: + >>> _groupings(4, token_sizes=(1, 2)) + ((2, 2), (1, 1, 2), (1, 2, 1), (2, 1, 1), (1, 1, 1, 1)) + """ + + cartesian_products = ( + product(token_sizes, repeat=r) + for r in range(1, word_length + 1) + ) + + # include only groupings that represent the correct number of chars + groupings = tuple( + grouping + for grouping in chain.from_iterable(cartesian_products) + if sum(grouping) == word_length + ) + + return groupings + + +def _map_word(word, grouping): + """Return a tuple of tokens: word mapped to a grouping. + + Example: + >>> _map_word('because', (1, 2, 1, 1, 2)) + ('b', 'ec', 'a', 'u', 'se') + """ + + word_chars = (c for c in word) + + mapped = [] + for char_group_size in grouping: + char_group = "" + for _ in range(char_group_size): + char_group += next(word_chars) + mapped.append(char_group) + + return tuple(mapped) + + +if __name__ == '__main__': + test_word = 'Mockery' + print('{}:\n{}'.format(test_word, elemental_spelling(test_word))) diff --git a/speller.py b/speller.py @@ -1,131 +0,0 @@ -# TODO: -# - eliminate unnecessary functions -# - simplify -# - use consistent terminology -import csv -from collections import namedtuple -from itertools import chain, product -from pprint import pprint - -ELEMENTS = ( - 'Ac', 'Ag', 'Al', 'Am', 'Ar', 'As', 'At', 'Au', 'B', 'Ba', 'Be', 'Bh', - 'Bi', 'Bk', 'Br', 'C', 'Ca', 'Cd', 'Ce', 'Cf', 'Cl', 'Cm', 'Co', 'Cr', - 'Cs', 'Cu', 'Db', 'Ds', 'Dy', 'Er', 'Es', 'Eu', 'F', 'Fe', 'Fm', 'Fr', - 'Ga', 'Gd', 'Ge', 'H', 'He', 'Hf', 'Hg', 'Ho', 'Hs', 'I', 'In', 'Ir', - 'K', 'Kr', 'La', 'Li', 'Lr', 'Lu', 'Md', 'Mg', 'Mn', 'Mo', 'Mt', 'N', - 'Na', 'Nb', 'Nd', 'Ne', 'Ni', 'No', 'Np', 'O', 'Os', 'P', 'Pa', 'Pb', - 'Pd', 'Pm', 'Po', 'Pr', 'Pt', 'Pu', 'Ra', 'Rb', 'Re', 'Rf', 'Rg', 'Rh', - 'Rn', 'Ru', 'S', 'Sb', 'Sc', 'Se', 'Sg', 'Si', 'Sm', 'Sn', 'Sr', 'Ta', - 'Tb', 'Tc', 'Te', 'Th', 'Ti', 'Tl', 'Tm', 'U', 'Uub', 'Uuh', 'Uuo', - 'Uup', 'Uuq', 'Uus', 'Uut', 'V', 'W', 'Xe', 'Y', 'Yb', 'Zn', 'Zr' -) - - -def get_csv_data(file_name, column, header=True): - """Return in a list all data from a given column of a .csv file""" - - data = [] - - with open(file_name) as infile: - csv_reader = csv.reader(infile, skipinitialspace=True, delimiter=',') - if header: - next(csv_reader, None) # skip header row - for row in csv_reader: - data.append(row[column]) - - return data - - -def tokenize_sequence(sequence): - """Return a list each of all single and double character tokens.""" - - Tokens = namedtuple('Tokens', (['singles', 'doubles'])) - - singles = tuple(sequence[i] for i in range(0, len(sequence))) - doubles = tuple(sequence[i:i+2] for i in range(0, len(sequence) - 1)) - - return Tokens(singles, doubles) - - -def find_matches(sequence, symbols): - """Return a dictionary of symbols and indices for all - symbols that match an item in the given sequence. - """ - - return { - symbol: index - for character in sequence - for index, symbol in enumerate(symbols) - if symbol.lower() == character.lower() - } - - -def groupings(word, token_sizes=(1, 2)): - """Return a tuple of all permutations of possible character - grouping arrangements of a word. - - token_sizes defines the possible sizes of character groups, - and by default allows only singles and pairs. - """ - - cartesian_products = ( - product(token_sizes, repeat=r) - for r in range(1, len(word) + 1) - ) - - # include only groupings that represent the correct number of chars - groupings = tuple( - grouping - for grouping in chain.from_iterable(cartesian_products) - if sum(grouping) == len(word) - ) - - return groupings - - -def map_word(word, grouping): - """Given a word and a grouping, map the characters of the word - to match the distribution defined in the grouping. - - example: - >>> map_word('because', (1, 2, 1, 1, 2)) - ['b', 'ec', 'a', 'u', 'se'] - """ - - word_chars = (c for c in word) - - mapped = [] - for char_group_size in grouping: - char_group = "" - for _ in range(char_group_size): - char_group += next(word_chars) - mapped.append(char_group) - - return tuple(mapped) - - -if __name__ == '__main__': - symbols = get_csv_data('elements.csv', 1) - - test_word = 'Osiris' - - tokens = tokenize_sequence(test_word) - - single_matches = find_matches(tokens.singles, symbols) - pair_matches = find_matches(tokens.doubles, symbols) - - letter_groupings = groupings(test_word) - - spellings = [map_word(test_word, g) for g in letter_groupings] - - elemental_spellings = [ - [l.capitalize() for l in spelling] - for spelling in spellings - if set(c.lower() for c in spelling) <= set(s.lower() for s in symbols) - ] - - pprint(tokens) - pprint(single_matches) - pprint(pair_matches) - pprint(list(zip(letter_groupings, spellings))) - pprint(elemental_spellings) diff --git a/tests.py b/tests.py @@ -1,7 +1,5 @@ -import speller -import unittest - -# TODO: change to py.test syntax +import elemental_speller as es +import pytest ELEMENTS = ( 'Ac', 'Ag', 'Al', 'Am', 'Ar', 'As', 'At', 'Au', 'B', 'Ba', 'Be', 'Bh', @@ -16,66 +14,30 @@ ELEMENTS = ( 'Uup', 'Uuq', 'Uus', 'Uut', 'V', 'W', 'Xe', 'Y', 'Yb', 'Zn', 'Zr' ) -class MatchingTest(unittest.TestCase): - test_singles = ['B', 'e', 'c', 'a', 'u', 's', 'e'] - test_pairs = ['Be', 'ec', 'ca', 'au', 'se'] - - def test_match_singles(self): - matches = speller.find_matches(self.test_singles, ELEMENTS) - self.assertEqual( - matches, - {'S': 86, 'B': 8, 'U': 103, 'C': 15} - ) - - def test_match_pairs(self): - matches = speller.find_matches(self.test_pairs, ELEMENTS) - self.assertEqual( - matches, - {'Au': 7, 'Be': 10, 'Ca': 16, 'Se': 89} - ) - - -class TokensTest(unittest.TestCase): - test_word = "Osiris" - - def test_single_chars(self): - tokens = speller.tokenize_sequence(self.test_word) - self.assertEqual(tokens.singles, ("O", "s", "i", "r", "i", "s")) - - def test_pair_chars(self): - tokens = speller.tokenize_sequence(self.test_word) - self.assertEqual(tokens.doubles, ("Os", "si", "ir", "ri", "is")) +def test_verify_data(): + assert es.ELEMENTS == ELEMENTS -class GroupingTest(unittest.TestCase): - word = "that" - def test_singles_and_pairs(self): - expected_maps = ((2, 2), (1, 1, 2), (1, 2, 1), (2, 1, 1), (1, 1, 1, 1)) - group_maps = speller.groupings(self.word) - self.assertEqual(group_maps, expected_maps) +def test_groupings(): + assert es._groupings(4, token_sizes=()) == () + expected = ((2, 2), (1, 1, 2), (1, 2, 1), (2, 1, 1), (1, 1, 1, 1)) + assert es._groupings(4, token_sizes=(1, 2)) == expected -class FileTest(unittest.TestCase): - file_name = "elements.csv" + expected = ( + (1, 3), (2, 2), (3, 1), (1, 1, 2), (1, 2, 1), (2, 1, 1), (1, 1, 1, 1) + ) + assert es._groupings(4, token_sizes=(1, 2, 3)) == expected - proper_data = ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', - 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', - 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', - 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', - 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', - 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', - 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', - 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', - 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', - 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', - 'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', - 'Rg', 'Uub', 'Uut', 'Uuq', 'Uup', 'Uuh', 'Uus', 'Uuo'] - def test_file_contains_proper_data(self): - data = speller.get_csv_data(self.file_name, 1) - self.assertEqual(data, self.proper_data) +def test_map_word(): + assert es._map_word('because', (1, 2, 1, 1, 2)) == ('b', 'ec', 'a', 'u', 'se') + assert es._map_word('osiris', (1, 3, 2)) == ('o', 'sir', 'is') -if __name__ == '__main__': - unittest.main(warnings='ignore') +def test_elemental_spelling(): + assert es.elemental_spelling('amputation') == [ + ('Am', 'Pu', 'Ta', 'Ti', 'O', 'N'), + ('Am', 'P', 'U', 'Ta', 'Ti', 'O', 'N') + ]