data processsing

def filter(string):
    stopwords   = ['their', 'during', 'doesn', "she's", 'ourselves', 'weren', 'll', 'has', 'had', 'we', 'not', 'if', 'will', 'such', 'needn', 'own', "isn't", "you'll", 'or', 'o', 'hadn', 'but', 'his', 'my', 'y', 'she', "doesn't", 'haven', 'herself', 'and', 'other', 'hasn', 'having', 'ain', 'doing', 'mustn', 'he', "that'll", 'being', 'most', 'did', 'is', 'same', "weren't", 'itself', 'yourselves', 'too', 'ma', 's', 'do', 'just', 'all', 'on', 'than', 'didn', 'couldn', 'some', 'until', 'under', 'your', 'its', 'a', 'further', 'isn', "needn't", 'over', 'below', 'where', 'theirs', 'to', 'more', "you'd", 'her', "shan't", 'with', 'while', 'nor', 'wouldn', "aren't", 'wasn', "won't", 'for', 't', 'aren', "mightn't", 'them', "couldn't", 'was', 'be', 'should', 'i', 'mightn', 'themselves', 'out', 'no', 'have', 'am', 'off', 'any', "don't", 'ours', 'it', 'are', 'by', "wasn't", 'each', 'whom', "haven't", 'yourself', 'won', "hasn't", "should've", 'me', 'can', 'when', 'only', 'now', 'm', 'above', 'up', 'in', 'does', 'because', 'myself', 'hers', 'of', 'into', 'very', 'between', 'here', 'the', 'why', 'after', 'yours', 'there', 'these', "you're", 'few', 'don', 'both', 'which', 'who', 're', 'that', 'through', 'before', 'd', "you've", 'as', 'shouldn', "wouldn't", 'an', 'about', 'those', 'how', "it's", 'been', 'what', "shouldn't", 'down', 'at', 'you', 'so', 'him', 'they', 'himself', 've', 'from', "hadn't", "mustn't", 'then', 'were', 'our', "didn't", 'again', 'against', 'once', 'shan', 'this']
    res = re.sub(r'[^\w\s]', '', string.lower())   # removing punc
    res = ''.join([i for i in res if not i.isdigit()]) #removing numbers
    tokens =  res.split()
    filtered_sentence = [w for w in tokens if not w in stopwords]  
    for index,i in  enumerate(filtered_sentence):
        if i.lower()[-1] == "s":
            temp =  i.replace("s","")
            #temp =  re.sub('s$','',i)
            filtered_sentence[index] = temp
    res = ' '.join(filtered_sentence)
    pattern = re.compile(r'\s+') 
    res= re.sub(pattern, '', res)  # removig spaces
    return(res)

Posted by: Guest on February-06-2021

Code answers related to "data processsing"

Code answers related to "Whatever"

Browse Popular Code Answers by Language

Answers for "data processsing"

Code answers related to "data processsing"

Code answers related to "Whatever"

Browse Popular Code Answers by Language

Popular Programming Languages

Advertisements

Company

Compilers

Help

Connect with us