beautifulsoup4
from bs4 import BeautifulSoup # works very well with Requests or urllib module # Quick Start from bs4 import BeautifulSoup soup = BeautifulSoup("<p>Some<b>bad<i>HTML") print(soup.prettify()) #OUTPUT: <html> <body> <p> Some <b> bad <i> HTML </i> </b> </p> </body> </html> >>> soup.find(text="bad") 'bad' >>> soup.i <i>HTML</i> # Another Example >>> soup = BeautifulSoup("<tag1>Some<tag2/>bad<tag3>XML", "xml") >>> print(soup.prettify()) <?xml version="1.0" encoding="utf-8"?> <tag1> Some <tag2/> bad <tag3> XML </tag3> </tag1>