Stuck after multiple "show more" type on elementclick

I have a site that scrapes fine. Some pages have a "load more" button. The scraping works fine if I use elementclick to click once. If I change the type of elementclick to click type= click more, then I see the page pressing "more" multiple times as expected, but then simply gets stuck, and simply doesn't progress further. I've tried many combinations, but this seems to be a bug.

Any help resolving this appreciated.

Web Scraper version: 0.3.7
Chrome version: Version 63.0.3239.132 (Official Build) (64-bit)
OS: Windows 10

{"_id":"test4","startUrl":["https://www."],"selectors":[{"id":"show_more","type":"SelectorElementClick","selector":"div.trigger-more-comments","parentSelectors":["_root"],"multiple":false,"delay":"2000","clickElementSelector":"div.trigger-more-comments","clickType":"clickMore","discardInitialElements":true,"clickElementUniquenessType":"uniqueText"},{"id":"RATING","type":"SelectorText","selector":"div.font-rating","parentSelectors":["_root"],"multiple":false,"regex":"","delay":0},{"id":"boxes","type":"SelectorElement","selector":"div.set-relative div.uk-width-large-2-3 div.uk-block-small","parentSelectors":["_root"],"multiple":true,"delay":0},{"id":"name","type":"SelectorText","selector":"div.comment-title a","parentSelectors":["boxes"],"multiple":false,"regex":"","delay":0},{"id":"review_age","type":"SelectorText","selector":"span.font-12","parentSelectors":["boxes"],"multiple":false,"regex":"","delay":0}]}

Error Message:

background_script.js:1 {"url":"","timestamp":1517429868,"level_name":"INFO","message":"Job execution started"}
background_script.js:1 {"url":"","tabUrl":"chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/empty-page.html","status":"loading","timestamp":1517429868,"level_name":"ERROR","message":"chrome tab didn't start loading"}
log @ background_script.js:1
error @ background_script.js:1
chrome.tabs.get.n @ background_script.js:24
safeCallbackApply @ extensions::uncaught_exception_handler:27
handleResponse @ extensions::sendRequest:67
background_script.js:1 {"timestamp":1517429873,"level_name":"INFO","message":"will not be sleeping for additional time"}
background_script.js:1 {"url":" ","parentSelector":"clickcar","sitemapName":"socialcarbcn","driver":"chrometab","error":"Error: timeout","timestamp":1517429893,"level_name":"ERROR","message":"Job execution failed"}
log @ background_script.js:1
error @ background_script.js:1
(anonymous) @ background_script.js:24
s @ background_script.js:24
Promise rejected (async)
u @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
handle @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
t @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
handle @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
t @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
handle @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
t @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
handle @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
t @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
handle @ background_script.js:24
(anonymous) @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
background_script.js:1 {"timestamp":1517429893,"level_name":"PROFILE","message":"60018 ms job execution"}
background_script.js:1 {"url":"https://www.socialcar.com/alquilar/coche-mediano/barcelona/audi-a3-1-6-tdi-105cv-dpf-11/15468","timestamp":1517429893,"level_name":"INFO","message":"Syning storage because a job failed"}
background_script.js:1 {"url":" ","timestamp":1517429893,"level_name":"INFO","message":"Job execution started"}
_generated_background_page.html:1 Error in response to tabs.update: TypeError: Cannot read property 'url' of undefined
    at Object.chrome.tabs.update.n [as callback] (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:348605)
    at Promise (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:348523)
    at s.<anonymous> (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:348491)
    at chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:346798
    at r (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:346575)
_generated_background_page.html:1 Unchecked runtime.lastError while running tabs.update: No tab with id: 408.
    at Promise (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:348523)
    at s.<anonymous> (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:348491)
    at chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:346798
    at r (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:346575)
background_script.js:1 {"url":" 
 ","parentSelector":"clickcar","sitemapName":"socialcarbcn","driver":"chrometab","error":"Error: timeout","timestamp":1517429913,"level_name":"ERROR","message":"Job execution failed"}
log @ background_script.js:1
error @ background_script.js:1
(anonymous) @ background_script.js:24
s @ background_script.js:24
Promise rejected (async)
u @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
handle @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
t @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
handle @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
t @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
handle @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
t @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
handle @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
t @ background_script.js:24
(anonymous) @ background_script.js:24
(anonymous) @ background_script.js:24
r @ background_script.js:24
handle @ background_script.js:24
(anonymous) @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
Promise resolved (async)
u @ background_script.js:24
a @ background_script.js:24
background_script.js:1 {"timestamp":1517429913,"level_name":"PROFILE","message":"60021 ms job execution"}
background_script.js:1 {"url":"https://mini-one-34/12793","timestamp":1517429913,"level_name":"INFO","message":"Syning storage because a job failed"}
background_script.js:1 {"url":"https://www.socialcar.com/alquilar/coche-mediano/gerona/ford-focus-319/13367","timestamp":1517429913,"level_name":"INFO","message":"Job execution started"}
_generated_background_page.html:1 Error in response to tabs.update: TypeError: Cannot read property 'url' of undefined
    at Object.chrome.tabs.update.n [as callback] (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:348605)
    at Promise (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:348523)
    at s.<anonymous> (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:348491)
    at chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:346798
    at r (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:346575)
_generated_background_page.html:1 Unchecked runtime.lastError while running tabs.update: No tab with id: 427.
    at Promise (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:348523)
    at s.<anonymous> (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:348491)
    at chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:346798
    at r (chrome-extension://jnhgnonknehpejjnehehllkliplmbmhn/background_script.js:24:346575)

Any thoughts? I really need this functionality to work, and I don't think I'm doing anything wrong.

Check "multiple" for your element click selector and select comments with selector. Set parent selectors for "name" and "review_age" as element click selector and get rid off "boxes" selector. OR you can change your boxes selector to element click and delete the current element click selector.

1 Like

I will try these changes and report back. Out of curiosity, what was wrong with my code structure? Thanks for looking into this!

I've tried your suggestions, and it doesn't work for me. Perhaps I'm not understanding your instructions correctly.

What I was trying was:

  • load the page

  • click selector on the load more button, make the click multiple type, until all comments are visible

  • create an element per review, which is multiple

** inside each review, extract reviewer, age of review, comment in the review

In other words

  • page (root)
    ** click more button (pressed multiple times)
    ** review box
    *** review value1
    *** review value2
    *** etc

What I understand you are suggesting is:

  • page (root)
    ** multiple element click selector on the load more button
    *** selector on the reviews
    *** name selector
    *** review age selector

This didn't work for me. You also suggested a secondary option which I understand as:

  • page (root)
    ** click selector on the reviews (not the load more button)
    *** review values

This doesn't select the load more button, so not sure how this worked for you.

Thank you for your help, and sorry if I'm missing something obvious. I'm totally new to webscraper. Thanks

Element click selector has 2 selectors, one is "click selector" and the other one is "selector". With click selector you have to select "Load more" button and with "selector" you have to select reviews.

1 Like

Now I understood. Just tried it, and it works. Thanks a million!

I now realize that this solution works only in cases were the "load more" button is present. If there are say 5 reviews, and therefore no "load more", then the element_click selector has nothing to click on, and scrapes no data from the elements at all.

In my original attempt, where there was an element selector on one hand, and a click selector on the other, this wasn't a problem. Personally, I think the click selector has a bug since the multiple click option should not get stuck in the end. There are work arounds like using element click, but those have problems if the clikable item is there sometimes, and sometimes not.

Appreciate the help.

Did you check in "Discard initial elements that are available before click" option?