Data not being scraped in sequence

aadi · December 10, 2019, 10:18am

Website: https://www.indcareer.com/find/all-colleges-in-maharashtra

I am trying to get a excel format of the links which are present within the link of the above website but after running this program the data is eing scraped in a random order. What should i change so it sequentially traverses the link?

import scrapy

class CollegedetailsSpider(scrapy.Spider):
name="link_list"

start_urls=["https://www.indcareer.com/find/all-colleges-in-maharashtra"]

def parse(self,response):
    for coll in response.xpath("//div[@class='media']"):
        next_coll= coll.xpath(".//h4/a/@href").extract_first()
        next_coll_link=response.urljoin(next_coll)
        yield scrapy.Request(url=next_coll_link,callback=self.parse)
    if response.xpath(".//tr/td/a[@rel='nofollow']/@href") is not None:
        yield{
            'College_Link': response.xpath(".//tr/td/a[@rel='nofollow']/@href").extract_first()
        }
        next_page=response.xpath("//li[@class='pager-next']/a/@href").extract_first()
        if next_page is not None:               
            next_page_link=response.urljoin(next_page)             
            yield scrapy.Request(url=next_page_link,callback=self.parse)