genspider scrapy
scrapy genspider mydomain mydomain.com
genspider scrapy
scrapy genspider mydomain mydomain.com
scrapy project example
class SuperSpider(CrawlSpider):
name = 'spider'
allowed_domains = ['quotes.toscrape.com']
start_urls = ['http://quotes.toscrape.com/']
base_url = 'http://quotes.toscrape.com'
rules = [Rule(LinkExtractor(allow = 'page/', deny='tag/'),
callback='parse_filter_book', follow=True)]
def parse_filter_book(self, response):
for quote in response.css('div.quote'):
yield {
'Author': quote.xpath('.//span/a/@href').get(),
'Quote': quote.xpath('.//span[@class= "text"]/text()').get(),
scrapy tutorial
# -*- coding: utf-8 -*-
import scrapy
class AliexpressTabletsSpider(scrapy.Spider):
name = 'aliexpress_tablets'
allowed_domains = ['aliexpress.com']
start_urls = ['https://www.aliexpress.com/category/200216607/tablets.html',
'https://www.aliexpress.com/category/200216607/tablets/2.html?site=glo&g=y&tag=']
def parse(self, response):
print("procesing:"+response.url)
#Extract data using css selectors
product_name=response.css('.product::text').extract()
price_range=response.css('.value::text').extract()
#Extract data using xpath
orders=response.xpath("//em[@title='Total Orders']/text()").extract()
company_name=response.xpath("//a[@class='store $p4pLog']/text()").extract()
row_data=zip(product_name,price_range,orders,company_name)
#Making extracted data row wise
for item in row_data:
#create a dictionary to store the scraped info
scraped_info = {
#key:value
'page':response.url,
'product_name' : item[0], #item[0] means product in the list and so on, index tells what value to assign
'price_range' : item[1],
'orders' : item[2],
'company_name' : item[3],
}
#yield or give the scraped info to scrapy
yield scraped_info
Copyright © 2021 Codeinu
Forgot your account's password or having trouble logging into your Account? Don't worry, we'll help you to get back your account. Enter your email address and we'll send you a recovery link to reset your password. If you are experiencing problems resetting your password contact us