Pagination misses pages 2 thru 6?

Hello all
Having trouble getting all the pages of a yellow pages search to scrape. I can get the pagination to look at page 1, then pages 7 through to the end but it misses pages 2, 3, 4, 5 and 6 ?
Any idea what I'm missing here

Sitemap:

{"_id":"enginnering","startUrl":["https://www.yellowpages.com.au/search/listings?clue=engineering&eventType=pagination&locationClue=Greater+Adelaide%2C+SA&pageNumber=1&referredBy=UNKNOWN"],"selectors":[{"id":"link","type":"SelectorLink","parentSelectors":["_root","pagination"],"selector":".body a.listing-name","multiple":true,"delay":0},{"id":"element","type":"SelectorElement","parentSelectors":["link"],"selector":"div.business-details","multiple":false,"delay":0},{"id":"name","type":"SelectorText","parentSelectors":["element"],"selector":"h1","multiple":false,"regex":"","delay":0},{"id":"address","type":"SelectorText","parentSelectors":["element"],"selector":"p","multiple":false,"regex":"","delay":0},{"id":"phone","type":"SelectorText","parentSelectors":["element"],"selector":".click-to-call .text div","multiple":false,"regex":"","delay":0},{"id":"website","type":"SelectorLink","parentSelectors":["element"],"selector":"a.contact-url","multiple":false,"delay":0},{"id":"email","type":"SelectorLink","parentSelectors":["element"],"selector":"a.contact-main.contact-email","multiple":false,"delay":0},{"id":"pagination","type":"SelectorLink","parentSelectors":["_root","pagination"],"selector":"a.pagination","multiple":true,"delay":0}]}

Thanks in advance.

This should work:

{"_id":"enginnering","startUrl":["https://www.yellowpages.com.au/search/listings?clue=engineering&eventType=pagination&locationClue=Greater+Adelaide%2C+SA&pageNumber=1&referredBy=UNKNOWN"],"selectors":[{"id":"link","type":"SelectorLink","parentSelectors":["_root","pagination"],"selector":".body a.listing-name","multiple":true,"delay":0},{"id":"element","type":"SelectorElement","parentSelectors":["link"],"selector":"div.business-details","multiple":false,"delay":0},{"id":"name","type":"SelectorText","parentSelectors":["element"],"selector":"h1","multiple":false,"regex":"","delay":0},{"id":"address","type":"SelectorText","parentSelectors":["element"],"selector":"p","multiple":false,"regex":"","delay":0},{"id":"phone","type":"SelectorText","parentSelectors":["element"],"selector":".click-to-call .text div","multiple":false,"regex":"","delay":0},{"id":"website","type":"SelectorLink","parentSelectors":["element"],"selector":"a.contact-url","multiple":false,"delay":0},{"id":"email","type":"SelectorLink","parentSelectors":["element"],"selector":"a.contact-main.contact-email","multiple":false,"delay":0},{"id":"pagination","type":"SelectorLink","parentSelectors":["_root","pagination"],"selector":"a.navigation:contains('Next')","multiple":true,"delay":0}]}

1 Like

Thanks, I'll check it tomorrow when I have time.
So I should just select the "next" page instead of the whole page number series then ?

Try this

{"_id":"enginnering","startUrl":["https://www.yellowpages.com.au/search/listings?clue=engineering&eventType=pagination&locationClue=Greater+Adelaide%2C+SA&pageNumber=1&referredBy=UNKNOWN"],"selectors":[{"id":"Pagination","type":"SelectorLink","parentSelectors":["_root","Pagination"],"selector":"a.pagination.navigation:last","multiple":false,"delay":0},{"id":"Element Select","type":"SelectorElement","parentSelectors":["_root","Pagination"],"selector":".listing","multiple":true,"delay":0},{"id":"Company name","type":"SelectorText","parentSelectors":["Element Select"],"selector":".listing-name","multiple":false,"regex":"","delay":0},{"id":"URL","type":"SelectorElementAttribute","parentSelectors":["Element Select"],"selector":".listing-name","multiple":false,"extractAttribute":"href","delay":0},{"id":"Email","type":"SelectorElementAttribute","parentSelectors":["Element Select"],"selector":".contact-email","multiple":false,"extractAttribute":"data-email","delay":0},{"id":"address","type":"SelectorText","parentSelectors":["Element Select"],"selector":".mappable-address","multiple":false,"regex":"","delay":0},{"id":"Phone","type":"SelectorText","parentSelectors":["Element Select"],"selector":".contact-phone","multiple":false,"regex":"","delay":0}]}

I like to link Pagination to the "next" button and then use an element select to group the data. This way get's what you want from the search page without going into each profile.

Which we can add by adding a link selector inside the element selector.

Thanks Webber, worked a treat :slight_smile: