scrapy project example
class SuperSpider(CrawlSpider):
name = 'spider'
allowed_domains = ['quotes.toscrape.com']
start_urls = ['http://quotes.toscrape.com/']
base_url = 'http://quotes.toscrape.com'
rules = [Rule(LinkExtractor(allow = 'page/', deny='tag/'),
callback='parse_filter_book', follow=True)]
def parse_filter_book(self, response):
for quote in response.css('div.quote'):
yield {
'Author': quote.xpath('.//span/a/@href').get(),
'Quote': quote.xpath('.//span[@class= "text"]/text()').get(),