python sentence splitter
>>> from nltk import tokenize
>>> p = "Good morning Dr. Adams. The patient is waiting for you in room number 3."
>>> tokenize.sent_tokenize(p)
['Good morning Dr. Adams.', 'The patient is waiting for you in room number 3.']
python sentence splitter
>>> from nltk import tokenize
>>> p = "Good morning Dr. Adams. The patient is waiting for you in room number 3."
>>> tokenize.sent_tokenize(p)
['Good morning Dr. Adams.', 'The patient is waiting for you in room number 3.']
python sentence splitter
# -*- coding: utf-8 -*-
import re
alphabets= "([A-Za-z])"
prefixes = "(Mr|St|Mrs|Ms|Dr)[.]"
suffixes = "(Inc|Ltd|Jr|Sr|Co)"
starters = "(Mr|Mrs|Ms|Dr|Hes|Shes|Its|Theys|Theirs|Ours|Wes|Buts|Howevers|Thats|Thiss|Wherever)"
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
websites = "[.](com|net|org|io|gov)"
def split_into_sentences(text):
text = " " + text + " "
text = text.replace("n"," ")
text = re.sub(prefixes,"\1<prd>",text)
text = re.sub(websites,"<prd>\1",text)
if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
text = re.sub("s" + alphabets + "[.] "," \1<prd> ",text)
text = re.sub(acronyms+" "+starters,"\1<stop> \2",text)
text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\1<prd>\2<prd>\3<prd>",text)
text = re.sub(alphabets + "[.]" + alphabets + "[.]","\1<prd>\2<prd>",text)
text = re.sub(" "+suffixes+"[.] "+starters," \1<stop> \2",text)
text = re.sub(" "+suffixes+"[.]"," \1<prd>",text)
text = re.sub(" " + alphabets + "[.]"," \1<prd>",text)
if "”" in text: text = text.replace(".”","”.")
if """ in text: text = text.replace("."","".")
if "!" in text: text = text.replace("!"",""!")
if "?" in text: text = text.replace("?"",""?")
text = text.replace(".",".<stop>")
text = text.replace("?","?<stop>")
text = text.replace("!","!<stop>")
text = text.replace("<prd>",".")
sentences = text.split("<stop>")
sentences = sentences[:-1]
sentences = [s.strip() for s in sentences]
return sentences
Splitting strings in Python without split()
sentence = 'This is a sentence'
word=""
for w in sentence :
if w.isalpha():
word=word+w
elif not w.isalpha():
print(word)
word=""
print(word)
Copyright © 2021 Codeinu
Forgot your account's password or having trouble logging into your Account? Don't worry, we'll help you to get back your account. Enter your email address and we'll send you a recovery link to reset your password. If you are experiencing problems resetting your password contact us