Answered step by step

Verified Expert Solution

Link Copied!

Question

1 Approved Answer

Posted on Sep 06, 2024

I have written the code for extracting the data but the data is not scrapping into the csv file or excel sheet import scrapy import

I have written the code for extracting the data but the data is not scrapping into the csv file or excel sheet

import scrapy

import pandas as pd

import time

import csv

starttime=time.time()

class ArtisanDataSpider(scrapy.Spider):

name = "artisan_data"

start_urls = ['http://www.handicrafts.nic.in/ArtisanData.aspx?MID=SZmOd%2fCrxTo9CHD2XKF+pA%3d%3d']

def parse(self, response):

# Select the form and fill in the form data

form = response.xpath('//form[@id="form1"]')

form.xpath('.//select[@name="ddlState"]/option[text()="Uttar Pradesh"]/@value').extract_first()

form.xpath('.//select[@name="ddlDistrict"]/option[text()="Sant Ravidas Nagar"]/@value').extract_first()

form.xpath('.//select[@name="ddlDistrict"]/option[text()="Agra"]/@value').extract_first()

form.xpath('.//select[@name="ddlDistrict"]/option[text()="Varanasi"]/@value').extract_first()

yield scrapy.FormRequest.from_response(response, formdata={'ddlState': 'Uttar Pradesh', 'ddlDistrict': ['Sant Ravidas Nagar', 'Agra', 'Varanasi'],'btnSubmit': 'Submit'},

callback=self.parse_result)

def parse_result(self, response):

rows = response.xpath('//table[@id="gvArtisanData"]/tr')

for row in rows:

PEHCHAN_CARD_NO = row.xpath('./td[1]/text()').extract_first()

ARTISIAN_NAME = row.xpath('./td[2]/text()').extract_first()

Father_spouse = row.xpath('./td[3]/text()').extract_first()

Category = row.xpath('./td[4]/text()').extract_first()

AADHARNO = row.xpath('./td[5]/text()').extract_first()

NAME_OF_CRAFT = row.xpath('./td[6]/text()').extract_first()

MOBILENO= row.xpath('./td[7]/text()').extract_first()

VILLAGE= row.xpath('./td[8]/text()').extract_first()

TOWN= row.xpath('./td[9]/text()').extract_first()

CITY = row.xpath('./td[10]/text()').extract_first()

DISTRICT = row.xpath('./td[11]/text()').extract_first()

STATE = row.xpath('./td[12]/text()').extract_first()

yield {'PEHCHAN_CARD_NO':PEHCHAN_CARD_NO, 'ARTISIAN_NAME':ARTISIAN_NAME, 'Father_spouse':Father_spouse, 'Category': Category, 'AADHAR_NO': AADHAR_NO, 'NAME_OF_CRAFT':NAME_OF_CRAFT,'MOBILENO':MOBILENO,'VILLAGE':VILLAGE,'TOWN':TOWN,'CITY':CITY, 'DISTRICT':DISTRICT, 'STATE':STATE}

next_page = response.xpath('//a[text()="Next"]/@href').extract_first()

if next_page:

stripped = (line.strip() for line in scrapy.Request(response.urljoin(next_page), callback=self.parse_result))

lines = (line.split(",") for line in stripped if line)

with open('log.csv', 'w') as out_file:

writer = csv.writer(out_file)

writer.writerow(('title', 'intro'))

writer.writerows(lines)

endtime=time.time()

result=endtime-starttime

print("the time taken is:", result)

I am getting 0 items were scrapped

STATE = row.xpath('./td[12]/text()').extract_first()

next_page = response.xpath('//a[text()="Next"]/@href').extract_first()

stripped = (line.strip() for line in scrapy.Request(response.urljoin(next_page), callback=self.parse_result))

lines = (line.split(",") for line in stripped if line)

with open('log.csv', 'w') as out_file:

writer = csv.writer(out_file)

writer.writerow(('title', 'intro'))

writer.writerows(lines)

endtime=time.time()

result=endtime-starttime

print("the time taken is:", result)

Step by Step Solution

There are 3 Steps involved in it

Step: 1

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

Step: 3

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Data And Databases

Authors: Jeff Mapua

1st Edition

★★★★★

1. Describe the goals of informative speaking

Answered: 1 week ago

Previous Question Next Question