Hi,
I am trying to scrape ClinicalTrials.gov but my sitemaps seems to extract only the trials on the first page (the scrape goes through to the last page and then restarts to extract the name and info of the trials only on the first page). I tried 2 different sitemaps - one that has :not([disabled]) for the pagination and one that does not
Url: ClinicalTrials.gov
Sitemap: #1 try
{"_id":"Clinical_trials_melanoma","startUrl":["https://clinicaltrials.gov/search?cond=Malignant%20Melanoma&intr=neoadjuvant"],"selectors":[{"elementLimit":0,"id":"trials_list_el","multiple":false,"parentSelectors":["_root"],"scroll":false,"selector":"div.margin-bottom-5","type":"SelectorElement"},{"elementLimit":0,"id":"trial_el","multiple":true,"parentSelectors":["trials_list_el"],"scroll":false,"selector":"div.usa-card__container","type":"SelectorElement"},{"id":"trial_link","linkType":"linkFromHref","multiple":false,"parentSelectors":["trial_el"],"selector":"a.hit-card-title","type":"SelectorLink"},{"id":"trial_name","multiple":false,"multipleType":"singleColumn","parentSelectors":["trial_link"],"regex":"","selector":"h2.brief-title","type":"SelectorText"},{"id":"publication_date","multiple":false,"multipleType":"singleColumn","parentSelectors":["trial_link"],"regex":"","selector":"div:nth-of-type(4) span.padding-left-05","type":"SelectorText"},{"id":"pagination","paginationType":"auto","parentSelectors":["_root","pagination"],"selector":"div.usa-pagination__button, button.usa-pagination__button, li.usa-pagination__item:nth-of-type(10):not([disabled])","type":"SelectorPagination"}]}
#2 try
{"_id":"Clinical_trials_melanoma","startUrl":["https://clinicaltrials.gov/search?cond=Malignant%20Melanoma&intr=neoadjuvant"],"selectors":[{"elementLimit":0,"id":"trials_list_el","multiple":false,"parentSelectors":["_root"],"scroll":false,"selector":"div.margin-bottom-5","type":"SelectorElement"},{"elementLimit":0,"id":"trial_el","multiple":true,"parentSelectors":["trials_list_el"],"scroll":false,"selector":"div.usa-card__container","type":"SelectorElement"},{"id":"trial_link","linkType":"linkFromHref","multiple":false,"parentSelectors":["trial_el"],"selector":"a.hit-card-title","type":"SelectorLink"},{"id":"trial_name","multiple":false,"multipleType":"singleColumn","parentSelectors":["trial_link"],"regex":"","selector":"h2.brief-title","type":"SelectorText"},{"id":"publication_date","multiple":false,"multipleType":"singleColumn","parentSelectors":["trial_link"],"regex":"","selector":"div:nth-of-type(4) span.padding-left-05","type":"SelectorText"},{"id":"pagination","paginationType":"auto","parentSelectors":["_root","pagination"],"selector":"div.usa-pagination__button, button.usa-pagination__button, li.usa-pagination__item:nth-of-type(10)","type":"SelectorPagination"}]}
Any help would be much appreciated!