add function to return possible char grouping arrangements - stoichiograph - Spell words with elemental symbols from the periodic table.

commit a0f9ebae95c7485f207ff67d3603d90133f0fd33
parent 9cf2430e9df092935b513b1be6b28ff72020a839
Author: amin <dev@aminmesbah.com>
Date:   Wed, 23 Mar 2016 19:55:42 +0000

add function to return possible char grouping arrangements

FossilOrigin-Name: b48ac857506028725e989a7affc496f1cace505c232d10b57eae4c21498b5566
Diffstat:
A .gitignore  | 2 ++
M speller.py  | 62 +++++++++++++++++++++++++++++++++++++++++++++-----------------
M tests.py  | 10 ++++++++++

3 files changed, 57 insertions(+), 17 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/*
+*.swp
diff --git a/speller.py b/speller.py
@@ -1,18 +1,25 @@
+# TODO:
+# test that all letters in word are present in some element
+# generate group_maps only for the exact number of chars in word
+#  
+
 from collections import namedtuple
+from itertools import chain, product
 import csv
 import sys
 
 
-def main():
-    symbols = get_csv_data('elements.csv', 1)
-
-    test_word = "Because"
+def get_csv_data(file_name, column):
+    """Return in a list all data from a given column of a .csv file"""
+    data = []
 
-    tokens = tokenize_sequence(test_word)
-    single_matches = find_matches(tokens.single, symbols)
-    pair_matches = find_matches(tokens.pair, symbols)
+    with open(file_name) as infile:
+        csv_reader = csv.reader(infile, skipinitialspace=True, delimiter=',')
+        next(csv_reader, None)
+        for row in csv_reader:
+            data.append(row[column])
 
-    print(single_matches, pair_matches)
+    return data
 
 
 def tokenize_sequence(sequence):
@@ -42,17 +49,38 @@ def find_matches(sequence, symbols):
     return matches
 
 
-def get_csv_data(file_name, column):
-    """Return in a list all data from a given column of a .csv file"""
-    data = []
+def groupings(word, group_sizes = [1,2]):
+    """Return a list of all permutations of possible character grouping
+    arrangements of a word. group_sizes defines the possible sizes of 
+    character groups, and by default allows only singles and pairs.
+    """
+    group_maps = []
+    length = len(word)
+    cartesian_product = (product(group_sizes, repeat=r)
+                         for r in range(1, length + 1))
+    products = chain.from_iterable(cartesian_product)
 
-    with open(file_name) as infile:
-        csv_reader = csv.reader(infile, skipinitialspace=True, delimiter=',')
-        next(csv_reader, None)
-        for row in csv_reader:
-            data.append(row[column])
+    # include only products that represent the correct number of chars
+    for p in products:
+        if sum(p) == length:
+            p = [tuple(x for x in p)]
+            for x in p:
+                if x not in group_maps:
+                    group_maps.append(x)
 
-    return data
+    return group_maps 
+
+
+def main():
+    symbols = get_csv_data('elements.csv', 1)
+
+    test_word = "Because"
+
+    tokens = tokenize_sequence(test_word)
+    single_matches = find_matches(tokens.single, symbols)
+    pair_matches = find_matches(tokens.pair, symbols)
+
+    print(single_matches, pair_matches)
 
 
 if __name__ == '__main__':
diff --git a/tests.py b/tests.py
@@ -5,6 +5,7 @@ import unittest
 class MatchingTest(unittest.TestCase):
     test_singles = ['B', 'e', 'c', 'a', 'u', 's', 'e']
     test_pairs = ['Be', 'ec', 'ca', 'au', 'se']
+    #TODO: change this so it never fails
     symbols = speller.get_csv_data('elements.csv', 1)
 
     def test_match_singles(self):
@@ -28,6 +29,15 @@ class TokensTest(unittest.TestCase):
         self.assertEqual(tokens.pair, ["Os", "si", "ir", "ri", "is"])
 
 
+class GroupingTest(unittest.TestCase):
+    word = "that"
+
+    def test_singles_and_pairs(self):
+        expected_maps = [(2, 2), (1, 1, 2), (1, 2, 1), (2, 1, 1), (1,1,1,1)]
+        group_maps = speller.groupings(self.word)
+        self.assertEqual(group_maps, expected_maps)
+
+
 class FileTest(unittest.TestCase):
     file_name = "elements.csv"
     proper_data = ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne',

	stoichiograph Spell words with elemental symbols from the periodic table.
	git clone git://git.amin.space/stoichiograph.git
	Log \| Files \| Refs \| LICENSE

A	.gitignore	\|	2	++
M	speller.py	\|	62	+++++++++++++++++++++++++++++++++++++++++++++-----------------
M	tests.py	\|	10	++++++++++