Question
below I have written the code for extracting the data from url but the data is not getting in excel sheet and also the time
below I have written the code for extracting the data from url but the data is not getting in excel sheet and also the time calculation is getting 0 secs
import scrapy
import pandas as pd
import time
starttime=time.time()
class ArtisanDataSpider(scrapy.Spider):
name = "artisan_data"
start_urls = ['http://www.handicrafts.nic.in/ArtisanData.aspx?MID=SZmOd%2fCrxTo9CHD2XKF+pA%3d%3d']
def parse(self, response):
# Select the form and fill in the form data
form = response.xpath('//form[@id="form1"]')
form.xpath('.//select[@name="ddlState"]/option[text()="Uttar Pradesh"]/@value').extract_first()
form.xpath('.//select[@name="ddlDistrict"]/option[text()="Sant Ravidas Nagar"]/@value').extract_first()
form.xpath('.//select[@name="ddlDistrict"]/option[text()="Agra"]/@value').extract_first()
form.xpath('.//select[@name="ddlDistrict"]/option[text()="Varanasi"]/@value').extract_first()
yield scrapy.FormRequest.from_response(response, formdata={'ddlState': 'Uttar Pradesh', 'ddlDistrict': ['Sant Ravidas Nagar', 'Agra', 'Varanasi'],'btnSubmit': 'Submit'},
callback=self.parse_result)
def parse_result(self, response):
rows = response.xpath('//table[@id="gvArtisanData"]/tr')
data = []
for row in rows:
PEHCHAN_CARD_NO = row.xpath('./td[1]/text()').extract_first()
ARTISIAN_NAME = row.xpath('./td[2]/text()').extract_first()
Father_spouse = row.xpath('./td[3]/text()').extract_first()
Category = row.xpath('./td[4]/text()').extract_first()
AADHARNO = row.xpath('./td[5]/text()').extract_first()
NAME_OF_CRAFT = row.xpath('./td[6]/text()').extract_first()
MOBILENO= row.xpath('./td[7]/text()').extract_first()
VILLAGE= row.xpath('./td[8]/text()').extract_first()
TOWN= row.xpath('./td[9]/text()').extract_first()
CITY = row.xpath('./td[10]/text()').extract_first()
DISTRICT = row.xpath('./td[11]/text()').extract_first()
STATE = row.xpath('./td[12]/text()').extract_first()
data.append({'PEHCHAN_CARD_NO':PEHCHAN_CARD_NO, 'ARTISIAN_NAME':ARTISIAN_NAME, 'Father_spouse':Father_spouse, 'Category': Category, 'AADHAR_NO': AADHAR_NO, 'NAME_OF_CRAFT':NAME_OF_CRAFT,'MOBILENO':MOBILENO,'VILLAGE':VILLAGE,'TOWN':TOWN,'CITY':CITY, 'DISTRICT':DISTRICT, 'STATE':STATE})
CHAN_CARD_NO, 'ARTISIAN_NAME':ARTISIAN_NAME, 'Father_spouse':Father_spouse, 'Category': Category, 'AADHARNO': AADHARNO, 'NAME_OF_CRAFT':NAME_OF_CRAFT,'MOBILENO':MOBILENO,'VILLAGE':VILLAGE,'TOWN':TOWN,'CITY':CITY, 'DISTRICT':DISTRICT, 'STATE':STATE})
df = pd.DataFrame(data)
df.to_excel('artisan_data.xlsx')
next_page = response.xpath('//a[text()="Next"]/@href').extract_first()
if next_page:
yield scrapy.Request(response.urljoin(next_page), callback=self.parse_result)
endtime=time.time()
result=endtime-starttime
print("the time taken is:", result)
output is getting the time as 0.0
C:\Users\Admin\Desktop\python scrapping> & C:/Users/Admin/AppData/Local/Programs/Python/Python311/python.exe "c:/Users/Admin/Desktop/python scrapping/firstproject/firstproject/spiders/pythonspider.py" the time taken is: 0.0
can u please help me out the code where I am going to miss the code I need all the data in excel sheet and time should be calculated
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started