separate link from tweet in R
library(stringr)
#Load your data into R
RawData <- read.table("DB_YS.txt", sep="\n", header = F)
#Extract the dates into a new vector called dates
dates <- str_extract(RawData$V1, "[A-Za-z]+ \\d+, \\d{4} *$")
#Extract the urls assuming that all urls will start by http and store them in a new vector called url
url <- str_extract(RawData$V1, "http.+")
#Remove the urls from text and store them into a vector called text
text <- gsub("http.+", "", RawData$V1)
#Remove the "indyref" that tells twitter where to put the urls in a tweet and overwrite the result in the text vector
text <- gsub(" indyref", "", text)
#Create a data.frame containing the tidy data
Data <- data.frame(dates, text, url, stringsAsFactors=F)