Question
I have written the code for extracting the data but the data is not scrapping into the csv file or excel sheet import scrapy import
I have written the code for extracting the data but the data is not scrapping into the csv file or excel sheet
import scrapy
import pandas as pd
import time
import csv
starttime=time.time()
class ArtisanDataSpider(scrapy.Spider):
name = "artisan_data"
start_urls = ['http://www.handicrafts.nic.in/ArtisanData.aspx?MID=SZmOd%2fCrxTo9CHD2XKF+pA%3d%3d']
def parse(self, response):
# Select the form and fill in the form data
form = response.xpath('//form[@id="form1"]')
form.xpath('.//select[@name="ddlState"]/option[text()="Uttar Pradesh"]/@value').extract_first()
form.xpath('.//select[@name="ddlDistrict"]/option[text()="Sant Ravidas Nagar"]/@value').extract_first()
form.xpath('.//select[@name="ddlDistrict"]/option[text()="Agra"]/@value').extract_first()
form.xpath('.//select[@name="ddlDistrict"]/option[text()="Varanasi"]/@value').extract_first()
yield scrapy.FormRequest.from_response(response, formdata={'ddlState': 'Uttar Pradesh', 'ddlDistrict': ['Sant Ravidas Nagar', 'Agra', 'Varanasi'],'btnSubmit': 'Submit'},
callback=self.parse_result)
def parse_result(self, response):
rows = response.xpath('//table[@id="gvArtisanData"]/tr')
for row in rows:
PEHCHAN_CARD_NO = row.xpath('./td[1]/text()').extract_first()
ARTISIAN_NAME = row.xpath('./td[2]/text()').extract_first()
Father_spouse = row.xpath('./td[3]/text()').extract_first()
Category = row.xpath('./td[4]/text()').extract_first()
AADHARNO = row.xpath('./td[5]/text()').extract_first()
NAME_OF_CRAFT = row.xpath('./td[6]/text()').extract_first()
MOBILENO= row.xpath('./td[7]/text()').extract_first()
VILLAGE= row.xpath('./td[8]/text()').extract_first()
TOWN= row.xpath('./td[9]/text()').extract_first()
CITY = row.xpath('./td[10]/text()').extract_first()
DISTRICT = row.xpath('./td[11]/text()').extract_first()
STATE = row.xpath('./td[12]/text()').extract_first()
yield {'PEHCHAN_CARD_NO':PEHCHAN_CARD_NO, 'ARTISIAN_NAME':ARTISIAN_NAME, 'Father_spouse':Father_spouse, 'Category': Category, 'AADHAR_NO': AADHAR_NO, 'NAME_OF_CRAFT':NAME_OF_CRAFT,'MOBILENO':MOBILENO,'VILLAGE':VILLAGE,'TOWN':TOWN,'CITY':CITY, 'DISTRICT':DISTRICT, 'STATE':STATE}
next_page = response.xpath('//a[text()="Next"]/@href').extract_first()
if next_page:
stripped = (line.strip() for line in scrapy.Request(response.urljoin(next_page), callback=self.parse_result))
lines = (line.split(",") for line in stripped if line)
with open('log.csv', 'w') as out_file:
writer = csv.writer(out_file)
writer.writerow(('title', 'intro'))
writer.writerows(lines)
endtime=time.time()
result=endtime-starttime
print("the time taken is:", result)
I am getting 0 items were scrapped
STATE = row.xpath('./td[12]/text()').extract_first()
yield {'PEHCHAN_CARD_NO':PEHCHAN_CARD_NO, 'ARTISIAN_NAME':ARTISIAN_NAME, 'Father_spouse':Father_spouse, 'Category': Category, 'AADHAR_NO': AADHAR_NO, 'NAME_OF_CRAFT':NAME_OF_CRAFT,'MOBILENO':MOBILENO,'VILLAGE':VILLAGE,'TOWN':TOWN,'CITY':CITY, 'DISTRICT':DISTRICT, 'STATE':STATE}
next_page = response.xpath('//a[text()="Next"]/@href').extract_first()
stripped = (line.strip() for line in scrapy.Request(response.urljoin(next_page), callback=self.parse_result))
lines = (line.split(",") for line in stripped if line)
with open('log.csv', 'w') as out_file:
writer = csv.writer(out_file)
writer.writerow(('title', 'intro'))
writer.writerows(lines)
endtime=time.time()
result=endtime-starttime
print("the time taken is:", result)
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started