Fit a lot of encodings for text file. (#458)
### What problem does this PR solve? #384 ### Type of change - [x] Performance Improvement
This commit is contained in:
@@ -8,6 +8,7 @@ import re
|
||||
import string
|
||||
import sys
|
||||
from hanziconv import HanziConv
|
||||
from huggingface_hub import snapshot_download
|
||||
from nltk import word_tokenize
|
||||
from nltk.stem import PorterStemmer, WordNetLemmatizer
|
||||
from api.utils.file_utils import get_project_base_directory
|
||||
|
||||
Reference in New Issue
Block a user