replace url with text python
import re
text = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))'+, '', text, flags=re.MULTILINE)
replace url with text python
import re
text = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))'+, '', text, flags=re.MULTILINE)
separate link from tweet in R
library(stringr)
#Load your data into R
RawData <- read.table("DB_YS.txt", sep="\n", header = F)
#Extract the dates into a new vector called dates
dates <- str_extract(RawData$V1, "[A-Za-z]+ \\d+, \\d{4} *$")
#Extract the urls assuming that all urls will start by http and store them in a new vector called url
url <- str_extract(RawData$V1, "http.+")
#Remove the urls from text and store them into a vector called text
text <- gsub("http.+", "", RawData$V1)
#Remove the "indyref" that tells twitter where to put the urls in a tweet and overwrite the result in the text vector
text <- gsub(" indyref", "", text)
#Create a data.frame containing the tidy data
Data <- data.frame(dates, text, url, stringsAsFactors=F)
Copyright © 2021 Codeinu
Forgot your account's password or having trouble logging into your Account? Don't worry, we'll help you to get back your account. Enter your email address and we'll send you a recovery link to reset your password. If you are experiencing problems resetting your password contact us