Hello,
I am trying to scrape data about video game mods from this website: Mods at Nexus mods and community.
Problem is, the pages load dynamically, which means the URLs do not change when I click on 1, 2, 3, etc. I had to set up pagination with element click and gave it 5000 ms of delay to make sure.
Now, I only want to scrape from the first 25 pages when they are sorted by Endorsements and not Date Published or Download or Unique Downloads, etc.
How do I set it up so only the first 25 pages of the most endorsed mods are scraped?
Here is the sitemap:
{"_id":"nexusmods8","startUrl":["https://www.nexusmods.com/mods"],"selectors":[{"id":"modname","multiple":false,"parentSelectors":["modpage"],"regex":"","selector":"h1","type":"SelectorText"},{"id":"moddername","multiple":false,"parentSelectors":["modpage"],"selector":".sideitems.clearfix a","type":"SelectorLink"},{"id":"modpage","multiple":true,"parentSelectors":["mod"],"selector":"body","type":"SelectorElement"},{"id":"posts","multiple":false,"parentSelectors":["modpage"],"regex":"","selector":".modtabs #mod-page-tab-posts span.alert","type":"SelectorText"},{"id":"endorsements","multiple":false,"parentSelectors":["modpage"],"regex":"","selector":".stat a","type":"SelectorText"},{"id":"unique-downloads","multiple":false,"parentSelectors":["modpage"],"regex":"","selector":".stat-uniquedls div.stat","type":"SelectorText"},{"id":"total-downloads","multiple":false,"parentSelectors":["modpage"],"regex":"","selector":".stat-totaldls div.stat","type":"SelectorText"},{"id":"total-views","multiple":false,"parentSelectors":["modpage"],"regex":"","selector":".stat-totalviews div.stat","type":"SelectorText"},{"id":"modder-endorsements","multiple":false,"parentSelectors":["modder-profile"],"regex":"","selector":".stat-endorsements div.stat","type":"SelectorText"},{"id":"modder-profile","multiple":true,"parentSelectors":["moddername"],"selector":"body","type":"SelectorElement"},{"id":"kudos","multiple":false,"parentSelectors":["modder-profile"],"regex":"","selector":".stat-kudos div.stat","type":"SelectorText"},{"id":"all tiles","multiple":true,"parentSelectors":["click-pagination"],"selector":"ul.tiles","type":"SelectorElement"},{"id":"mod","multiple":true,"parentSelectors":["all tiles"],"selector":"div.tile-desc:nth-of-type(n+2) .tile-name a","type":"SelectorLink"},{"id":"tiles-wrapper","multiple":true,"parentSelectors":["_root"],"selector":"div.wrapper","type":"SelectorElement"},{"clickElementSelector":".head-nav li:nth-of-type(n+2) a","clickElementUniquenessType":"uniqueText","clickType":"clickOnce","delay":5000,"discardInitialElements":"discard-when-click-element-exists","id":"click-pagination","multiple":true,"parentSelectors":["tiles-wrapper"],"selector":"div.container","type":"SelectorElementClick"}]}