How to scrape email addresses from a website using Python
from extract_emails import EmailExtractor
from extract_emails.browsers import BrowserInterface
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
class FirefoxBrowser(BrowserInterface):
def __init__(self):
ff_options = Options()
self._driver = webdriver.Firefox(
options=ff_options, executable_path="/home/di/geckodriver",
)
def close(self):
self._driver.quit()
def get_page_source(self, url: str) -> str:
self._driver.get(url)
return self._driver.page_source
with FirefoxBrowser() as browser:
email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2)
emails = email_extractor.get_emails()
for email in emails:
print(email)
print(email.as_dict())
# Email(email="[email protected]", source_page="http://www.tomatinos.com/")
# {'email': '[email protected]', 'source_page': 'http://www.tomatinos.com/'}
# Email(email="[email protected]", source_page="http://www.tomatinos.com/")
# {'email': '[email protected]', 'source_page': 'http://www.tomatinos.com/'}