how to get text from a website python
import requests from bs4 import BeautifulSoup #pip install bs4 url = 'https://www.troyhunt.com/the-773-million-record-collection-1-data-reach/' res = requests.get(url) html_page = res.content soup = BeautifulSoup(html_page, 'html.parser') text = soup.find_all(text=True) output = '' blacklist = [ '[document]', 'noscript', 'header', 'html', 'meta', 'head', 'input', 'script', # there may be more elements you don't want, such as "style", etc. ] for t in text: if t.parent.name not in blacklist: output += '{} '.format(t) print(output)