Source code for clean_text_rhoni.clean_text_rhoni

"""Main module.

Import the BaseCleanText class with base functions to clean and normalize text,
and build derived functions that combines cleaning processes.
"""
from clean_text_rhoni.base_clean_text import BaseCleanText


[docs] def clean_text(text): """Perform a complete text cleaning process on the input text. This function performs a series of text cleaning operations, including: 1. Removing leading and trailing spaces. 2. Removing multiple spaces and replacing them with a single space. 3. Converting the text to lowercase. 4. Removing accents from vowels. 5. Removing special characters. 6. Removing tilde from ñ. Parameters ---------- text: str The input text to be cleaned. Returns ------- text: str The cleaned text after applying all cleaning operations. Examples -------- >>> clean_text(" Hola Sofía!, cómo estás? ") "hola sofia como estas" """ clean_text_utils = BaseCleanText() text = clean_text_utils.remove_leading_trailing_spaces(text) text = clean_text_utils.replace_multiple_spaces(text) text = clean_text_utils.transform_to_lowercase(text) text = clean_text_utils.remove_accents(text) text = clean_text_utils.remove_special_characters(text) text = clean_text_utils.remove_n_tilde(text) return text
[docs] def clean_text_snake_case(text): """Perform a complete text cleaning process on the input text and transform it to snake case This function performs a series of text cleaning operations, including: 1. Removing leading and trailing space. 2. Removing multiple spaces and replacing them with a single space. 3. Converting the text to lowercase. 4. Removing accents from vowels. 5. Removing special characters. 6. Removing tilde from ñ. 7. Replacing spaces by underscores Parameters ---------- text: str The input text to be cleaned. Returns ------- text: str The cleaned text after applying all cleaning operations. Examples -------- >>> clean_text(" Hola Sofía!, cómo estás? ") "hola_sofia_como_estas" """ text = clean_text(text=text) clean_text_utils = BaseCleanText() text = clean_text_utils.replace_spaces_by_underscores(text) return text