Question

1 Approved Answer

Posted on Sep 05, 2024

below I have written the code for extracting the data from url but the data is not getting in excel sheet and also the time

below I have written the code for extracting the data from url but the data is not getting in excel sheet and also the time calculation is getting 0 secs

import scrapy

import pandas as pd

import time

starttime=time.time()

class ArtisanDataSpider(scrapy.Spider):

name = "artisan_data"

start_urls = ['http://www.handicrafts.nic.in/ArtisanData.aspx?MID=SZmOd%2fCrxTo9CHD2XKF+pA%3d%3d']

def parse(self, response):

# Select the form and fill in the form data

form = response.xpath('//form[@id="form1"]')

form.xpath('.//select[@name="ddlState"]/option[text()="Uttar Pradesh"]/@value').extract_first()

form.xpath('.//select[@name="ddlDistrict"]/option[text()="Sant Ravidas Nagar"]/@value').extract_first()

form.xpath('.//select[@name="ddlDistrict"]/option[text()="Agra"]/@value').extract_first()

form.xpath('.//select[@name="ddlDistrict"]/option[text()="Varanasi"]/@value').extract_first()

yield scrapy.FormRequest.from_response(response, formdata={'ddlState': 'Uttar Pradesh', 'ddlDistrict': ['Sant Ravidas Nagar', 'Agra', 'Varanasi'],'btnSubmit': 'Submit'},

callback=self.parse_result)

def parse_result(self, response):

rows = response.xpath('//table[@id="gvArtisanData"]/tr')

data = []

for row in rows:

PEHCHAN_CARD_NO = row.xpath('./td[1]/text()').extract_first()

ARTISIAN_NAME = row.xpath('./td[2]/text()').extract_first()

Father_spouse = row.xpath('./td[3]/text()').extract_first()

Category = row.xpath('./td[4]/text()').extract_first()

AADHARNO = row.xpath('./td[5]/text()').extract_first()

NAME_OF_CRAFT = row.xpath('./td[6]/text()').extract_first()

MOBILENO= row.xpath('./td[7]/text()').extract_first()

VILLAGE= row.xpath('./td[8]/text()').extract_first()

TOWN= row.xpath('./td[9]/text()').extract_first()

CITY = row.xpath('./td[10]/text()').extract_first()

DISTRICT = row.xpath('./td[11]/text()').extract_first()

STATE = row.xpath('./td[12]/text()').extract_first()

data.append({'PEHCHAN_CARD_NO':PEHCHAN_CARD_NO, 'ARTISIAN_NAME':ARTISIAN_NAME, 'Father_spouse':Father_spouse, 'Category': Category, 'AADHAR_NO': AADHAR_NO, 'NAME_OF_CRAFT':NAME_OF_CRAFT,'MOBILENO':MOBILENO,'VILLAGE':VILLAGE,'TOWN':TOWN,'CITY':CITY, 'DISTRICT':DISTRICT, 'STATE':STATE})

CHAN_CARD_NO, 'ARTISIAN_NAME':ARTISIAN_NAME, 'Father_spouse':Father_spouse, 'Category': Category, 'AADHARNO': AADHARNO, 'NAME_OF_CRAFT':NAME_OF_CRAFT,'MOBILENO':MOBILENO,'VILLAGE':VILLAGE,'TOWN':TOWN,'CITY':CITY, 'DISTRICT':DISTRICT, 'STATE':STATE})

df = pd.DataFrame(data)

df.to_excel('artisan_data.xlsx')

next_page = response.xpath('//a[text()="Next"]/@href').extract_first()

if next_page:

yield scrapy.Request(response.urljoin(next_page), callback=self.parse_result)

endtime=time.time()

result=endtime-starttime

print("the time taken is:", result)

output is getting the time as 0.0

C:\Users\Admin\Desktop\python scrapping> & C:/Users/Admin/AppData/Local/Programs/Python/Python311/python.exe "c:/Users/Admin/Desktop/python scrapping/firstproject/firstproject/spiders/pythonspider.py" the time taken is: 0.0

can u please help me out the code where I am going to miss the code I need all the data in excel sheet and time should be calculated