Answers for "spacy clean text and split by sentences"


remove punctuation from string python

#with re
import re
s = "string. With. Punctuation?"
s = re.sub(r'[^ws]','',s)
#without re
s = "string. With. Punctuation?"
s.translate(str.maketrans('', '', string.punctuation))
Posted by: Guest on May-26-2020

spacy tokenize

# Construction 1
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English
nlp = English()
# Create a blank Tokenizer with just the English vocab
tokenizer = Tokenizer(nlp.vocab)

# Construction 2
from spacy.lang.en import English
nlp = English()
# Create a Tokenizer with the default settings for English
# including punctuation rules and exceptions
tokenizer = nlp.Defaults.create_tokenizer(nlp)
Posted by: Guest on September-11-2020

Python Answers by Framework

Browse Popular Code Answers by Language