split a word into lists of single chars and pairs - stoichiograph - Spell words with elemental symbols from the periodic table.

commit 82052449d2e24228733b52427cb8924f6f456e4d
parent 2706cbee644f413e519770fe6e345d66fe5dbc5d
Author: amin <dev@aminmesbah.com>
Date:   Wed, 27 Jan 2016 05:06:52 +0000

split a word into lists of single chars and pairs

FossilOrigin-Name: 5a89ec149a63f20c7651db664ab74cd794d6da44cc683445bcf120564abfcc67
Diffstat:
M speller.py  | 31 ++++++++++++++++++++++++++-----

1 file changed, 26 insertions(+), 5 deletions(-)
diff --git a/speller.py b/speller.py
@@ -1,19 +1,39 @@
+from collections import namedtuple
 import csv
 
+
 def main():
     symbols = get_csv_data('elements.csv', 1)
     test_word = "Osiris"
-    print(find_matches(test_word, symbols))
 
-def find_matches(word, symbols):
+    tokens = tokenize_sequence(test_word)
+
+    print(tokens.singles)
+    print(tokens.pairs)
+    print(find_matches(tokens.singles, symbols))
+    print(find_matches(tokens.pairs, symbols))
+
+def tokenize_sequence(sequence):
+    """Splits a sequence into one list of individual elements, and one of pairs."""
+
+    t = namedtuple('Tokens', (['singles', 'pairs']))
+
+    singles = [sequence[i:i+1] for i in range(0, len(sequence))]
+    pairs = [sequence[i:i+2] for i in range(0, len(sequence) - 1)]
+    tokens = t(singles, pairs)
+
+    return tokens
+
+
+def find_matches(sequence, symbols):
     matches = []
 
-    for char in word:
-        single = char
-        matches += (x for x in symbols if x == char)
+    for i in sequence:
+        matches += (x for x in symbols if x == i)
 
     return matches
 
+
 def get_csv_data(file_name, column):
     symbols = []
 
@@ -25,5 +45,6 @@ def get_csv_data(file_name, column):
 
     return symbols
 
+
 if __name__ == '__main__':
     main()

	stoichiograph Spell words with elemental symbols from the periodic table.
	git clone git://git.amin.space/stoichiograph.git
	Log \| Files \| Refs \| LICENSE