fix issue with special spaces
This commit is contained in:
parent
9ca9ac9de1
commit
70926afce6
1 changed files with 1 additions and 0 deletions
|
|
@ -87,6 +87,7 @@ def remove_non_printable(text):
|
|||
text = "".join(
|
||||
char for char in text if unicodedata.category(char)[0] != "C" or char in "\n\t"
|
||||
)
|
||||
text = text.replace("\xa0", " ").strip()
|
||||
# Keep letters (including accented ones), numbers, spaces, newlines, tabs, and basic punctuation
|
||||
return re.sub(r"[^\w\s.,!?\-\n\t]", "", text, flags=re.UNICODE)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue