commit a0f9ebae95c7485f207ff67d3603d90133f0fd33
parent 9cf2430e9df092935b513b1be6b28ff72020a839
Author: amin <dev@aminmesbah.com>
Date: Wed, 23 Mar 2016 19:55:42 +0000
add function to return possible char grouping arrangements
FossilOrigin-Name: b48ac857506028725e989a7affc496f1cace505c232d10b57eae4c21498b5566
Diffstat:
3 files changed, 57 insertions(+), 17 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/*
+*.swp
diff --git a/speller.py b/speller.py
@@ -1,18 +1,25 @@
+# TODO:
+# test that all letters in word are present in some element
+# generate group_maps only for the exact number of chars in word
+#
+
from collections import namedtuple
+from itertools import chain, product
import csv
import sys
-def main():
- symbols = get_csv_data('elements.csv', 1)
-
- test_word = "Because"
+def get_csv_data(file_name, column):
+ """Return in a list all data from a given column of a .csv file"""
+ data = []
- tokens = tokenize_sequence(test_word)
- single_matches = find_matches(tokens.single, symbols)
- pair_matches = find_matches(tokens.pair, symbols)
+ with open(file_name) as infile:
+ csv_reader = csv.reader(infile, skipinitialspace=True, delimiter=',')
+ next(csv_reader, None)
+ for row in csv_reader:
+ data.append(row[column])
- print(single_matches, pair_matches)
+ return data
def tokenize_sequence(sequence):
@@ -42,17 +49,38 @@ def find_matches(sequence, symbols):
return matches
-def get_csv_data(file_name, column):
- """Return in a list all data from a given column of a .csv file"""
- data = []
+def groupings(word, group_sizes = [1,2]):
+ """Return a list of all permutations of possible character grouping
+ arrangements of a word. group_sizes defines the possible sizes of
+ character groups, and by default allows only singles and pairs.
+ """
+ group_maps = []
+ length = len(word)
+ cartesian_product = (product(group_sizes, repeat=r)
+ for r in range(1, length + 1))
+ products = chain.from_iterable(cartesian_product)
- with open(file_name) as infile:
- csv_reader = csv.reader(infile, skipinitialspace=True, delimiter=',')
- next(csv_reader, None)
- for row in csv_reader:
- data.append(row[column])
+ # include only products that represent the correct number of chars
+ for p in products:
+ if sum(p) == length:
+ p = [tuple(x for x in p)]
+ for x in p:
+ if x not in group_maps:
+ group_maps.append(x)
- return data
+ return group_maps
+
+
+def main():
+ symbols = get_csv_data('elements.csv', 1)
+
+ test_word = "Because"
+
+ tokens = tokenize_sequence(test_word)
+ single_matches = find_matches(tokens.single, symbols)
+ pair_matches = find_matches(tokens.pair, symbols)
+
+ print(single_matches, pair_matches)
if __name__ == '__main__':
diff --git a/tests.py b/tests.py
@@ -5,6 +5,7 @@ import unittest
class MatchingTest(unittest.TestCase):
test_singles = ['B', 'e', 'c', 'a', 'u', 's', 'e']
test_pairs = ['Be', 'ec', 'ca', 'au', 'se']
+ #TODO: change this so it never fails
symbols = speller.get_csv_data('elements.csv', 1)
def test_match_singles(self):
@@ -28,6 +29,15 @@ class TokensTest(unittest.TestCase):
self.assertEqual(tokens.pair, ["Os", "si", "ir", "ri", "is"])
+class GroupingTest(unittest.TestCase):
+ word = "that"
+
+ def test_singles_and_pairs(self):
+ expected_maps = [(2, 2), (1, 1, 2), (1, 2, 1), (2, 1, 1), (1,1,1,1)]
+ group_maps = speller.groupings(self.word)
+ self.assertEqual(group_maps, expected_maps)
+
+
class FileTest(unittest.TestCase):
file_name = "elements.csv"
proper_data = ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne',