commit 82052449d2e24228733b52427cb8924f6f456e4d
parent 2706cbee644f413e519770fe6e345d66fe5dbc5d
Author: amin <dev@aminmesbah.com>
Date: Wed, 27 Jan 2016 05:06:52 +0000
split a word into lists of single chars and pairs
FossilOrigin-Name: 5a89ec149a63f20c7651db664ab74cd794d6da44cc683445bcf120564abfcc67
Diffstat:
1 file changed, 26 insertions(+), 5 deletions(-)
diff --git a/speller.py b/speller.py
@@ -1,19 +1,39 @@
+from collections import namedtuple
import csv
+
def main():
symbols = get_csv_data('elements.csv', 1)
test_word = "Osiris"
- print(find_matches(test_word, symbols))
-def find_matches(word, symbols):
+ tokens = tokenize_sequence(test_word)
+
+ print(tokens.singles)
+ print(tokens.pairs)
+ print(find_matches(tokens.singles, symbols))
+ print(find_matches(tokens.pairs, symbols))
+
+def tokenize_sequence(sequence):
+ """Splits a sequence into one list of individual elements, and one of pairs."""
+
+ t = namedtuple('Tokens', (['singles', 'pairs']))
+
+ singles = [sequence[i:i+1] for i in range(0, len(sequence))]
+ pairs = [sequence[i:i+2] for i in range(0, len(sequence) - 1)]
+ tokens = t(singles, pairs)
+
+ return tokens
+
+
+def find_matches(sequence, symbols):
matches = []
- for char in word:
- single = char
- matches += (x for x in symbols if x == char)
+ for i in sequence:
+ matches += (x for x in symbols if x == i)
return matches
+
def get_csv_data(file_name, column):
symbols = []
@@ -25,5 +45,6 @@ def get_csv_data(file_name, column):
return symbols
+
if __name__ == '__main__':
main()