commit 005472ad032e0c85afc62467fa7209e993b5317e
parent aa2221ff172f2512a8a64f11527e4cea9263c2a3
Author: Amin Mesbah <mesbah.amin@gmail.com>
Date: Tue, 26 Jan 2016 21:06:53 -0800
split a word into lists of single chars and pairs
Diffstat:
1 file changed, 26 insertions(+), 5 deletions(-)
diff --git a/speller.py b/speller.py
@@ -1,19 +1,39 @@
+from collections import namedtuple
import csv
+
def main():
symbols = get_csv_data('elements.csv', 1)
test_word = "Osiris"
- print(find_matches(test_word, symbols))
-def find_matches(word, symbols):
+ tokens = tokenize_sequence(test_word)
+
+ print(tokens.singles)
+ print(tokens.pairs)
+ print(find_matches(tokens.singles, symbols))
+ print(find_matches(tokens.pairs, symbols))
+
+def tokenize_sequence(sequence):
+ """Splits a sequence into one list of individual elements, and one of pairs."""
+
+ t = namedtuple('Tokens', (['singles', 'pairs']))
+
+ singles = [sequence[i:i+1] for i in range(0, len(sequence))]
+ pairs = [sequence[i:i+2] for i in range(0, len(sequence) - 1)]
+ tokens = t(singles, pairs)
+
+ return tokens
+
+
+def find_matches(sequence, symbols):
matches = []
- for char in word:
- single = char
- matches += (x for x in symbols if x == char)
+ for i in sequence:
+ matches += (x for x in symbols if x == i)
return matches
+
def get_csv_data(file_name, column):
symbols = []
@@ -25,5 +45,6 @@ def get_csv_data(file_name, column):
return symbols
+
if __name__ == '__main__':
main()