diff --git a/README.md b/README.md index 5c8346e..badfc75 100644 --- a/README.md +++ b/README.md @@ -42,4 +42,5 @@ optional arguments: False) --strict, -s How strictly should the words be related to real English? (default: None) + --all-words, -a whether to use letters from all words (default: False) ``` diff --git a/acronym/acronym.py b/acronym/acronym.py index 6bad35e..bdb8cbc 100755 --- a/acronym/acronym.py +++ b/acronym/acronym.py @@ -3,6 +3,7 @@ import numpy as np import re # import enchant +from itertools import product import nltk try: nltk.corpus.words.ensure_loaded() @@ -47,6 +48,43 @@ def _get_acronym(s, idx): result += s[i].upper() return result +def _check_all_words_caps(s): + """ + Returns True ie each word in the string s has at least one capital + letter in it. Returns False if not. + """ + capword_count = 0 + words = s.split() + for word in words: + for letter in word: + if letter.isupper(): + capword_count += 1 + break + return len(words) == capword_count + +def _check_allwords(s, word): + idlists = list() + for lett in word: + idlists.append([i for i, ltr in enumerate(s) if ltr == lett]) + + incrementals = list() + for i in product(*idlists): + if len(list(i)) != len(set(i)): + continue + if list(i) == sorted(i): + incrementals.append(list(i)) + + spaces = [ctr for ctr, lett in enumerate(s) if lett == " "] + for incremental in incrementals: + letperword = [len([i for i in incremental if i