web scraper python
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
web scraper python
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
web scraper python
>>> from bs4 import BeautifulSoup
>>> raw_html = open('contrived.html').read()
>>> html = BeautifulSoup(raw_html, 'html.parser')
>>> for p in html.select('p'):
... if p['id'] == 'walrus':
... print(p.text)
'I am the walrus'
web scraper python
>>> raw_html = simple_get('http://www.fabpedigree.com/james/mathmen.htm')
>>> html = BeautifulSoup(raw_html, 'html.parser')
>>> for i, li in enumerate(html.select('li')):
print(i, li.text)
0 Isaac Newton
Archimedes
Carl F. Gauss
Leonhard Euler
Bernhard Riemann
1 Archimedes
Carl F. Gauss
Leonhard Euler
Bernhard Riemann
2 Carl F. Gauss
Leonhard Euler
Bernhard Riemann
3 Leonhard Euler
Bernhard Riemann
4 Bernhard Riemann
# 5 ... and many more...
web scraper python
def get_hits_on_name(name):
"""
Accepts a `name` of a mathematician and returns the number
of hits that mathematician's Wikipedia page received in the
last 60 days, as an `int`
"""
# url_root is a template string that is used to build a URL.
url_root = 'URL_REMOVED_SEE_NOTICE_AT_START_OF_ARTICLE'
response = simple_get(url_root.format(name))
if response is not None:
html = BeautifulSoup(response, 'html.parser')
hit_link = [a for a in html.select('a')
if a['href'].find('latest-60') > -1]
if len(hit_link) > 0:
# Strip commas
link_text = hit_link[0].text.replace(',', '')
try:
# Convert to integer
return int(link_text)
except:
log_error("couldn't parse {} as an `int`".format(link_text))
log_error('No pageviews found for {}'.format(name))
return None
Copyright © 2021 Codeinu
Forgot your account's password or having trouble logging into your Account? Don't worry, we'll help you to get back your account. Enter your email address and we'll send you a recovery link to reset your password. If you are experiencing problems resetting your password contact us