Trying to scrape Stack Overflow job ads. Every job ad has an unique URL and yet not all ads are scraped.
- The sitemap goes first through the search results data (list).
- Then hops to the job ad and collects its content.
- Then it hops to the company profile and collects the rest of the needed data.
Some company profiles are repeating and maybe this is the problem. Are the duplicate company profile URL's blocking Webscraper from scraping the content from all of the job ads? If not, please advise how to tackle the problem?
Url: https://stackoverflow.com/jobs?q=keyword+keyword&l=United+States&sort=i
Web Scraper version: 0.3.8
Sitemap:
{"_id":"stack_overflow_jobs-01-02-ws-forum-topic","startUrl":["https://stackoverflow.com/jobs?q=keyword+keyword&l=United+States"],"selectors":[{"id":"List","type":"SelectorElement","parentSelectors":["_root"],"selector":"div.listResults div.p24","multiple":true,"delay":0},{"id":"Company_Name","type":"SelectorText","parentSelectors":["List"],"selector":"div.fc-black-700 span:nth-of-type(1)","multiple":false,"regex":"","delay":0},{"id":"Company_Headquaters","type":"SelectorText","parentSelectors":["List"],"selector":"div.fc-black-700 span.fc-black-500","multiple":false,"regex":"","delay":0},{"id":"Job_Ad_Title","type":"SelectorLink","parentSelectors":["List"],"selector":"h2 a","multiple":false,"delay":0},{"id":"Job_Ad_Snippet","type":"SelectorText","parentSelectors":["List"],"selector":"div.mt12","multiple":false,"regex":"","delay":0},{"id":"Job_Ad_Time","type":"SelectorText","parentSelectors":["List"],"selector":"span.ps-absolute.pt2","multiple":false,"regex":"","delay":0},{"id":"Job_Ad_Item_Title","type":"SelectorText","parentSelectors":["Job_Ad_Title"],"selector":"h1","multiple":false,"regex":"","delay":0},{"id":"Job_Ad_Item_Description","type":"SelectorText","parentSelectors":["Job_Ad_Title"],"selector":"div#overview-items.nav-content","multiple":false,"regex":"","delay":0},{"id":"Job_Ad_Item_Time","type":"SelectorText","parentSelectors":["Job_Ad_Title"],"selector":"a.fc-black-700","multiple":false,"regex":"","delay":0},{"id":"Job_Ad_Item_Source","type":"SelectorElementAttribute","parentSelectors":["Job_Ad_Title"],"selector":"h1 a.fc-black-900","multiple":false,"extractAttribute":"href","delay":0},{"id":"Company_Name_2","type":"SelectorLink","parentSelectors":["Job_Ad_Title"],"selector":"div.grid--cell.fl1 div a","multiple":false,"delay":0},{"id":"Company_Headquaters_2","type":"SelectorText","parentSelectors":["Job_Ad_Title"],"selector":"div.grid--cell span.fc-black-500","multiple":false,"regex":"","delay":0},{"id":"Company_Profile_Page","type":"SelectorLink","parentSelectors":["Job_Ad_Title"],"selector":"header div.s-avatar a","multiple":false,"delay":0},{"id":"Company_Overview","type":"SelectorText","parentSelectors":["Company_Profile_Page"],"selector":"#about-items","multiple":false,"regex":"","delay":0},{"id":"Company_Website","type":"SelectorElementAttribute","parentSelectors":["Company_Profile_Page"],"selector":"#right-column a.fw-bold","multiple":false,"extractAttribute":"href","delay":0},{"id":"Company_Industry","type":"SelectorText","parentSelectors":["Company_Profile_Page"],"selector":"span.d-block:nth-of-type(3)","multiple":false,"regex":"","delay":0},{"id":"Company_Size","type":"SelectorText","parentSelectors":["Company_Profile_Page"],"selector":"span.d-inline-block:nth-of-type(4)","multiple":false,"regex":"","delay":0},{"id":"Company_Headquaters_3","type":"SelectorText","parentSelectors":["Company_Profile_Page"],"selector":"div.grid--cell:nth-of-type(1) p.fs-body2","multiple":false,"regex":"","delay":0},{"id":"Company_Type","type":"SelectorText","parentSelectors":["Company_Profile_Page"],"selector":"span.d-inline-block:nth-of-type(6)","multiple":false,"regex":"","delay":0},{"id":"Company_Founded","type":"SelectorText","parentSelectors":["Company_Profile_Page"],"selector":"span.d-inline-block:nth-of-type(5)","multiple":false,"regex":"","delay":0},{"id":"Company_Specialities","type":"SelectorText","parentSelectors":["Company_Profile_Page"],"selector":"div.fs-body2 div.mb16","multiple":false,"regex":"","delay":0},{"id":"Company_Employees_Registered","type":"SelectorText","parentSelectors":["Company_Profile_Page"],"selector":"span.d-inline-block:nth-of-type(4)","multiple":false,"regex":"","delay":0},{"id":"Company_Revenue","type":"SelectorText","parentSelectors":["Company_Profile_Page"],"selector":"span.d-inline-block.pl8","multiple":false,"regex":"","delay":0},{"id":"Company_Competition","type":"SelectorText","parentSelectors":["Company_Profile_Page"],"selector":"div.mt32.js-locations","multiple":false,"regex":"","delay":0},{"id":"Company_Tech_Stack","type":"SelectorText","parentSelectors":["Company_Profile_Page"],"selector":"div#tech-stack-items","multiple":false,"regex":"","delay":0},{"id":"Company_Leads","type":"SelectorText","parentSelectors":["Company_Profile_Page"],"selector":"div.js-people","multiple":false,"regex":"","delay":0},{"id":"Company_Facebook","type":"SelectorElementAttribute","parentSelectors":["Company_Profile_Page"],"selector":"span.ps-absolute.t16.r16 a:nth-child(2)","multiple":false,"extractAttribute":"href","delay":0},{"id":"Company_Linkedin","type":"SelectorElementAttribute","parentSelectors":["Company_Profile_Page"],"selector":"span.ps-absolute.t16.r16 a:nth-child(1)","multiple":false,"extractAttribute":"href","delay":0},{"id":"Company_Youtube","type":"SelectorElementAttribute","parentSelectors":["Company_Profile_Page"],"selector":"span.ps-absolute.t16.r16 a:nth-child(3)","multiple":false,"extractAttribute":"href","delay":0},{"id":"Company_Twitter","type":"SelectorElementAttribute","parentSelectors":["Company_Profile_Page"],"selector":"span.ps-absolute.t16.r16 a:nth-child(1)","multiple":false,"extractAttribute":"href","delay":0},{"id":"Company_Instagram","type":"SelectorElementAttribute","parentSelectors":["Company_Profile_Page"],"selector":"span.ps-absolute.t16.r16 a:nth-child(4)","multiple":false,"extractAttribute":"href","delay":0},{"id":"Company_Github","type":"SelectorElementAttribute","parentSelectors":["Company_Profile_Page"],"selector":"span.ps-absolute.t16.r16 a:nth-child(3)","multiple":false,"extractAttribute":"href","delay":0}]}