scrapy-extract links
from scrapy.spiders import CrawlSpider
class SuperSpider(CrawlSpider):
name = 'extractor'
allowed_domains = ['en.wikipedia.org']
start_urls = ['https://en.wikipedia.org/wiki/Python_(programming_language)']
base_url = 'https://en.wikipedia.org'
def parse(self, response):
for link in response.xpath('//div/p/a'):
yield {
"link": self.base_url + link.xpath('.//@href').get()
}