Answers for "scrapy tutorial"

1

scrapy project example

class SuperSpider(CrawlSpider):
    name = 'spider'
    allowed_domains = ['quotes.toscrape.com']
    start_urls = ['http://quotes.toscrape.com/']
    base_url = 'http://quotes.toscrape.com'
    rules = [Rule(LinkExtractor(allow = 'page/', deny='tag/'),
                  callback='parse_filter_book', follow=True)]
 
    def parse_filter_book(self, response):
        for quote in response.css('div.quote'):
            yield {
                'Author': quote.xpath('.//span/a/@href').get(),
                'Quote': quote.xpath('.//span[@class= "text"]/text()').get(),
Posted by: Guest on August-04-2021
2

scrapy create project

scrapy startproject projectname
Posted by: Guest on June-03-2020
1

scrapy tutorial

# -*- coding: utf-8 -*-
import scrapy


class AliexpressTabletsSpider(scrapy.Spider):
    name = 'aliexpress_tablets'
    allowed_domains = ['aliexpress.com']
    start_urls = ['https://www.aliexpress.com/category/200216607/tablets.html',
                 'https://www.aliexpress.com/category/200216607/tablets/2.html?site=glo&g=y&tag=']


    def parse(self, response):

        print("procesing:"+response.url)
        #Extract data using css selectors
        product_name=response.css('.product::text').extract()
        price_range=response.css('.value::text').extract()
        #Extract data using xpath
        orders=response.xpath("//em[@title='Total Orders']/text()").extract()
        company_name=response.xpath("//a[@class='store $p4pLog']/text()").extract()

        row_data=zip(product_name,price_range,orders,company_name)

        #Making extracted data row wise
        for item in row_data:
            #create a dictionary to store the scraped info
            scraped_info = {
                #key:value
                'page':response.url,
                'product_name' : item[0], #item[0] means product in the list and so on, index tells what value to assign
                'price_range' : item[1],
                'orders' : item[2],
                'company_name' : item[3],
            }

            #yield or give the scraped info to scrapy
            yield scraped_info
Posted by: Guest on April-24-2021

Python Answers by Framework

Browse Popular Code Answers by Language