# -*- coding: utf-8 -*- # # Copyright © Spyder Project Contributors # Licensed under the terms of the MIT License # (see spyder/__init__.py for details) """ String search and match utilities useful when filtering a list of texts. """ import re from spyder.py3compat import to_text_string NOT_FOUND_SCORE = -1 NO_SCORE = 0 def get_search_regex(query, ignore_case=True): """Returns a compiled regex pattern to search for query letters in order. Parameters ---------- query : str String to search in another string (in order of character occurrence). ignore_case : True Optional value perform a case insensitive search (True by default). Returns ------- pattern : SRE_Pattern Notes ----- This function adds '.*' between the query characters and compiles the resulting regular expression. """ regex_text = [char for char in query if char != ' '] regex_text = '.*'.join(regex_text) regex = u'({0})'.format(regex_text) if ignore_case: pattern = re.compile(regex, re.IGNORECASE) else: pattern = re.compile(regex) return pattern def get_search_score(query, choice, ignore_case=True, apply_regex=True, template='{}'): """Returns a tuple with the enriched text (if a template is provided) and a score for the match. Parameters ---------- query : str String with letters to search in choice (in order of appearance). choice : str Sentence/words in which to search for the 'query' letters. ignore_case : bool, optional Optional value perform a case insensitive search (True by default). apply_regex : bool, optional Optional value (True by default) to perform a regex search. Useful when this function is called directly. template : str, optional Optional template string to surround letters found in choices. This is useful when using a rich text editor ('{}' by default). Examples: '{}', '{}', '{}' Returns ------- results : tuple Tuples where the first item is the text (enriched if a template was used) and the second item is a search score. Notes ----- The score is given according the following precedence (high to low): - Letters in one word and no spaces with exact match. Example: 'up' in 'up stroke' - Letters in one word and no spaces with partial match. Example: 'up' in 'upstream stroke' - Letters in one word but with skip letters. Example: 'cls' in 'close up' - Letters in two or more words Example: 'cls' in 'car lost' """ original_choice = to_text_string(choice, encoding='utf-8') result = (original_choice, NOT_FOUND_SCORE) # Handle empty string case if not query: return result query = to_text_string(query, encoding='utf-8') choice = to_text_string(choice, encoding='utf-8') if ignore_case: query = query.lower() choice = choice.lower() if apply_regex: pattern = get_search_regex(query, ignore_case=ignore_case) r = re.search(pattern, choice) if r is None: return result else: sep = u'-' # Matches will be replaced by this character let = u'x' # Nonmatches (except spaed) will be replaced by this score = 0 exact_words = [query == to_text_string(word, encoding='utf-8') for word in choice.split(u' ')] partial_words = [query in word for word in choice.split(u' ')] if any(exact_words) or any(partial_words): pos_start = choice.find(query) pos_end = pos_start + len(query) score += pos_start text = choice.replace(query, sep*len(query), 1) enriched_text = original_choice[:pos_start] +\ template.format(original_choice[pos_start:pos_end]) +\ original_choice[pos_end:] if any(exact_words): # Check if the query words exists in a word with exact match score += 1 elif any(partial_words): # Check if the query words exists in a word with partial match score += 100 else: # Check letter by letter text = [l for l in original_choice] if ignore_case: temp_text = [l.lower() for l in original_choice] else: temp_text = text[:] # Give points to start of string score += temp_text.index(query[0]) # Find the query letters and replace them by `sep`, also apply # template as needed for enricching the letters in the text enriched_text = text[:] for char in query: if char != u'' and char in temp_text: index = temp_text.index(char) enriched_text[index] = template.format(text[index]) text[index] = sep temp_text = [u' ']*(index + 1) + temp_text[index+1:] enriched_text = u''.join(enriched_text) patterns_text = [] for i, char in enumerate(text): if char != u' ' and char != sep: new_char = let else: new_char = char patterns_text.append(new_char) patterns_text = u''.join(patterns_text) for i in reversed(range(1, len(query) + 1)): score += (len(query) - patterns_text.count(sep*i))*100000 temp = patterns_text.split(sep) while u'' in temp: temp.remove(u'') if not patterns_text.startswith(sep): temp = temp[1:] if not patterns_text.endswith(sep): temp = temp[:-1] for pat in temp: score += pat.count(u' ')*10000 score += pat.count(let)*100 return original_choice, enriched_text, score def get_search_scores(query, choices, ignore_case=True, template='{}', valid_only=False, sort=False): """Search for query inside choices and return a list of tuples. Returns a list of tuples of text with the enriched text (if a template is provided) and a score for the match. Lower scores imply a better match. Parameters ---------- query : str String with letters to search in each choice (in order of appearance). choices : list of str List of sentences/words in which to search for the 'query' letters. ignore_case : bool, optional Optional value perform a case insensitive search (True by default). template : str, optional Optional template string to surround letters found in choices. This is useful when using a rich text editor ('{}' by default). Examples: '{}', '{}', '{}' Returns ------- results : list of tuples List of tuples where the first item is the text (enriched if a template was used) and a search score. Lower scores means better match. """ # First remove spaces from query query = query.replace(' ', '') pattern = get_search_regex(query, ignore_case) results = [] for choice in choices: r = re.search(pattern, choice) if query and r: result = get_search_score(query, choice, ignore_case=ignore_case, apply_regex=False, template=template) else: if query: result = (choice, choice, NOT_FOUND_SCORE) else: result = (choice, choice, NO_SCORE) if valid_only: if result[-1] != NOT_FOUND_SCORE: results.append(result) else: results.append(result) if sort: results = sorted(results, key=lambda row: row[-1]) return results def test(): template = '{0}' names = ['close pane', 'debug continue', 'debug exit', 'debug step into', 'debug step over', 'debug step return', 'fullscreen mode', 'layout preferences', 'lock unlock panes', 'maximize pane', 'preferences', 'quit', 'restart', 'save current layout', 'switch to breakpoints', 'switch to console', 'switch to editor', 'switch to explorer', 'switch to find_in_files', 'switch to historylog', 'switch to help', 'switch to ipython_console', 'switch to onlinehelp', 'switch to outline_explorer', 'switch to project_explorer', 'switch to variable_explorer', 'use next layout', 'use previous layout', 'clear line', 'clear shell', 'inspect current object', 'blockcomment', 'breakpoint', 'close all', 'code completion', 'conditional breakpoint', 'configure', 'copy', 'copy line', 'cut', 'debug', 'debug with winpdb', 'delete', 'delete line', 'duplicate line', 'end of document', 'end of line', 'file list management', 'find next', 'find previous', 'find text', 'go to definition', 'go to line', 'go to next file', 'go to previous file', 'inspect current object', 'kill next word', 'kill previous word', 'kill to line end', 'kill to line start', 'last edit location', 'move line down', 'move line up', 'new file', 'next char', 'next cursor position', 'next line', 'next word', 'open file', 'paste', 'previous char', 'previous cursor position', 'previous line', 'previous word', 'print', 're-run last script', 'redo', 'replace text', 'rotate kill ring', 'run', 'run selection', 'save all', 'save as', 'save file', 'select all', 'show/hide outline', 'show/hide project explorer', 'start of document', 'start of line', 'toggle comment', 'unblockcomment', 'undo', 'yank', 'run profiler', 'run analysis'] a = get_search_scores('lay', names, template=template, ) b = get_search_scores('lay', names, template=template, valid_only=True, sort=True) # Full results for r in a: print(r) # spyder: test-skip # Ordered and filtered results print('\n') # spyder: test-skip for r in b: print(r) # spyder: test-skip if __name__ == '__main__': test()