stoichiograph

Spell words with elemental symbols from the periodic table.
git clone git://git.amin.space/stoichiograph.git
Log | Files | Refs | LICENSE

commit a0f9ebae95c7485f207ff67d3603d90133f0fd33
parent 9cf2430e9df092935b513b1be6b28ff72020a839
Author: amin <dev@aminmesbah.com>
Date:   Wed, 23 Mar 2016 19:55:42 +0000

add function to return possible char grouping arrangements

FossilOrigin-Name: b48ac857506028725e989a7affc496f1cace505c232d10b57eae4c21498b5566
Diffstat:
A.gitignore | 2++
Mspeller.py | 62+++++++++++++++++++++++++++++++++++++++++++++-----------------
Mtests.py | 10++++++++++
3 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/* +*.swp diff --git a/speller.py b/speller.py @@ -1,18 +1,25 @@ +# TODO: +# test that all letters in word are present in some element +# generate group_maps only for the exact number of chars in word +# + from collections import namedtuple +from itertools import chain, product import csv import sys -def main(): - symbols = get_csv_data('elements.csv', 1) - - test_word = "Because" +def get_csv_data(file_name, column): + """Return in a list all data from a given column of a .csv file""" + data = [] - tokens = tokenize_sequence(test_word) - single_matches = find_matches(tokens.single, symbols) - pair_matches = find_matches(tokens.pair, symbols) + with open(file_name) as infile: + csv_reader = csv.reader(infile, skipinitialspace=True, delimiter=',') + next(csv_reader, None) + for row in csv_reader: + data.append(row[column]) - print(single_matches, pair_matches) + return data def tokenize_sequence(sequence): @@ -42,17 +49,38 @@ def find_matches(sequence, symbols): return matches -def get_csv_data(file_name, column): - """Return in a list all data from a given column of a .csv file""" - data = [] +def groupings(word, group_sizes = [1,2]): + """Return a list of all permutations of possible character grouping + arrangements of a word. group_sizes defines the possible sizes of + character groups, and by default allows only singles and pairs. + """ + group_maps = [] + length = len(word) + cartesian_product = (product(group_sizes, repeat=r) + for r in range(1, length + 1)) + products = chain.from_iterable(cartesian_product) - with open(file_name) as infile: - csv_reader = csv.reader(infile, skipinitialspace=True, delimiter=',') - next(csv_reader, None) - for row in csv_reader: - data.append(row[column]) + # include only products that represent the correct number of chars + for p in products: + if sum(p) == length: + p = [tuple(x for x in p)] + for x in p: + if x not in group_maps: + group_maps.append(x) - return data + return group_maps + + +def main(): + symbols = get_csv_data('elements.csv', 1) + + test_word = "Because" + + tokens = tokenize_sequence(test_word) + single_matches = find_matches(tokens.single, symbols) + pair_matches = find_matches(tokens.pair, symbols) + + print(single_matches, pair_matches) if __name__ == '__main__': diff --git a/tests.py b/tests.py @@ -5,6 +5,7 @@ import unittest class MatchingTest(unittest.TestCase): test_singles = ['B', 'e', 'c', 'a', 'u', 's', 'e'] test_pairs = ['Be', 'ec', 'ca', 'au', 'se'] + #TODO: change this so it never fails symbols = speller.get_csv_data('elements.csv', 1) def test_match_singles(self): @@ -28,6 +29,15 @@ class TokensTest(unittest.TestCase): self.assertEqual(tokens.pair, ["Os", "si", "ir", "ri", "is"]) +class GroupingTest(unittest.TestCase): + word = "that" + + def test_singles_and_pairs(self): + expected_maps = [(2, 2), (1, 1, 2), (1, 2, 1), (2, 1, 1), (1,1,1,1)] + group_maps = speller.groupings(self.word) + self.assertEqual(group_maps, expected_maps) + + class FileTest(unittest.TestCase): file_name = "elements.csv" proper_data = ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne',