commit 594dcc343ea4f9e58d42befc0f5505625c5107c6
parent 82052449d2e24228733b52427cb8924f6f456e4d
Author: amin <dev@aminmesbah.com>
Date: Wed, 27 Jan 2016 18:59:42 +0000
add unit tests
FossilOrigin-Name: cd70d699d142f330f5178d9c5dfd8b958694ca3ab51743ceb918c37efa1904d9
Diffstat:
M | speller.py | | | 37 | ++++++++++++++++++++----------------- |
A | tests.py | | | 41 | +++++++++++++++++++++++++++++++++++++++++ |
2 files changed, 61 insertions(+), 17 deletions(-)
diff --git a/speller.py b/speller.py
@@ -1,49 +1,52 @@
from collections import namedtuple
import csv
+import sys
def main():
symbols = get_csv_data('elements.csv', 1)
- test_word = "Osiris"
+
+ test_word = "Because"
tokens = tokenize_sequence(test_word)
+ single_matches = find_matches(tokens.single, symbols)
+ pair_matches = find_matches(tokens.pair, symbols)
- print(tokens.singles)
- print(tokens.pairs)
- print(find_matches(tokens.singles, symbols))
- print(find_matches(tokens.pairs, symbols))
+ print(single_matches, pair_matches)
-def tokenize_sequence(sequence):
- """Splits a sequence into one list of individual elements, and one of pairs."""
- t = namedtuple('Tokens', (['singles', 'pairs']))
- singles = [sequence[i:i+1] for i in range(0, len(sequence))]
- pairs = [sequence[i:i+2] for i in range(0, len(sequence) - 1)]
- tokens = t(singles, pairs)
+def tokenize_sequence(sequence):
+ t = namedtuple('Tokens', (['single', 'pair']))
+
+ single = [sequence[i:i+1] for i in range(0, len(sequence))]
+ pair = [sequence[i:i+2] for i in range(0, len(sequence) - 1)]
+ tokens = t(single, pair)
return tokens
def find_matches(sequence, symbols):
matches = []
+ lower_symbols = [i.lower() for i in symbols]
+ lower_sequence = [i.lower() for i in sequence]
- for i in sequence:
- matches += (x for x in symbols if x == i)
-
+ for i in lower_sequence:
+ matches += (x for x in lower_symbols if x == i)
+ # TODO: Make this return an array of indices
return matches
def get_csv_data(file_name, column):
- symbols = []
+ data = []
with open(file_name) as infile:
csv_reader = csv.reader(infile, skipinitialspace=True, delimiter=',')
next(csv_reader, None)
for row in csv_reader:
- symbols.append(row[column])
+ data.append(row[column])
- return symbols
+ return data
if __name__ == '__main__':
diff --git a/tests.py b/tests.py
@@ -0,0 +1,41 @@
+import speller
+import unittest
+
+
+class MatchingTest(unittest.TestCase):
+ test_singles = ['B', 'e', 'c', 'a', 'u', 's', 'e']
+ test_pairs = ['Be', 'ec', 'ca', 'au', 'se']
+ symbols = speller.get_csv_data('elements.csv', 1)
+
+ def test_match_singles(self):
+ matches = speller.find_matches(self.test_singles, self.symbols)
+ self.assertEqual(matches, ['b', 'c', 'u', 's'])
+
+ def test_match_pairs(self):
+ matches = speller.find_matches(self.test_pairs, self.symbols)
+ self.assertEqual(matches, ['be', 'ca', 'au', 'se'])
+
+
+class TokensTest(unittest.TestCase):
+ test_word = "Osiris"
+
+ def test_single_chars(self):
+ tokens = speller.tokenize_sequence(self.test_word)
+ self.assertEqual(tokens.single, ["O", "s", "i", "r", "i", "s"])
+
+ def test_pair_chars(self):
+ tokens = speller.tokenize_sequence(self.test_word)
+ self.assertEqual(tokens.pair, ["Os", "si", "ir", "ri", "is"])
+
+
+class FileTest(unittest.TestCase):
+ file_name = "elements.csv"
+ proper_data = ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Uub', 'Uut', 'Uuq', 'Uup', 'Uuh', 'Uus', 'Uuo']
+
+ def test_file_contains_proper_data(self):
+ data = speller.get_csv_data(self.file_name, 1)
+ self.assertEqual(data, self.proper_data)
+
+
+if __name__ == '__main__':
+ unittest.main(warnings='ignore')