Data Preview works but not the actual scrape

Describe the problem.
Hi there,
I have come up with a problem with one particular website that I have a scrape build for. It used to work perfectly fine but now nothing is extracted from the website.

But I have noticed that when I click on the data preview on the element selector, I am getting the correct results but when i actually do the whole scrape, there is nothing extracted.

I have tried this both on chrome and firefox and they both are extracting no data.

Is anyone else having similar issues? Is there any fix or workaround for this?

Thanks

Url: https://www.dewalt.com

Sitemap is below

Sitemap:

{"_id":"dewalt-itempage","startUrl":["https://www.dewalt.com/products/power-tools/nailers-and-staplers/finish-and-brad-nailers/20v-max-xr-18-ga-cordless-brad-nailer-tool-only/dcn680b","https://www.dewalt.com/products/accessories/tool-accessories/router-planer-and-joiner-accessories/13-vacuum-heat-treated-double-sided-two-pack-replacment-planer-knives/dw7352-2","https://www.dewalt.com/products/hand-tools/hammers-and-pry-bars/212-lb-fiberglass-blacksmith-hammer/dwht56146","https://www.dewalt.com/products/power-tools/saws/track-saws/flexvolt-60v-max-612-in-cordless-tracksaw-kit/dcs520st1","https://www.dewalt.com/products/gear-and-equipment/jobsite-lighting-and-flashlights/1000-lumen-rechargeable-area-light/dwht81422","https://www.dewalt.com/products/power-tools/saws/chop-saws-and-multi-cutter-saws/flexvolt-60v-max-cordless-brushless-9-in-cutoff-saw-kit/dcs690x2"],"selectors":[{"id":"ITEMPAGE ELEMENT","type":"SelectorElement","parentSelectors":["_root"],"selector":"body","multiple":true,"delay":0},{"id":"mfpn","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"span.pdp-details__sku","multiple":false,"regex":"","delay":0},{"id":"itemname","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"span.pdp-details__name","multiple":false,"regex":"","delay":0},{"id":"description","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"p.pdp-details__description","multiple":false,"regex":"","delay":0},{"id":"feature 1","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__use:nth-of-type(1)","multiple":false,"regex":"","delay":0},{"id":"feat 2","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__use:nth-of-type(2)","multiple":false,"regex":"","delay":0},{"id":"feat 3","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__use:nth-of-type(3)","multiple":false,"regex":"","delay":0},{"id":"feat 4","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__use:nth-of-type(4)","multiple":false,"regex":"","delay":0},{"id":"feat5","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__use:nth-of-type(5)","multiple":false,"regex":"","delay":0},{"id":"feat6","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__use:nth-of-type(6)","multiple":false,"regex":"","delay":0},{"id":"feat7","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__use:nth-of-type(7)","multiple":false,"regex":"","delay":0},{"id":"feat8","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__use:nth-of-type(8)","multiple":false,"regex":"","delay":0},{"id":"feat9","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__use:nth-of-type(9)","multiple":false,"regex":"","delay":0},{"id":"feat10","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__use:nth-of-type(10)","multiple":false,"regex":"","delay":0},{"id":"image1","type":"SelectorImage","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"img.pdp-imagery__image--zoom","multiple":false,"delay":0},{"id":"image2","type":"SelectorImage","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-imagery-thumbnail:nth-of-type(2) img.pdp-imagery-thumbnail__image","multiple":false,"delay":0},{"id":"image3","type":"SelectorImage","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-imagery-thumbnail:nth-of-type(3) img.pdp-imagery-thumbnail__image","multiple":false,"delay":0},{"id":"includes","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"h3#includesTitle.pdp-specs__title","multiple":false,"regex":"","delay":0},{"id":"includesitems","type":"SelectorHTML","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"ul.pdp-specs__includes","multiple":false,"regex":"","delay":0},{"id":"name1","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(1) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val1","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(1) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name2","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(2) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val2","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(2) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name3","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(3) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val3","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(3) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name4","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(4) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val4","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(4) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name5","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(5) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val5","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(5) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name6","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(6) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val6","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(6) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name7","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(7) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val7","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(7) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name8","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(8) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val8","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(8) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name9","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(9) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val9","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(9) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name10","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(10) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val10","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(10) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name11","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(11) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val11","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(11) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name12","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(12) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val12","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(12) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name13","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(13) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val13","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(13) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name14","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(14) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val14","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(14) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name15","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(15) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val15","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(15) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name16","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(16) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val16","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(16) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"name17","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(17) strong.pdp-specs__specification-name","multiple":false,"regex":"","delay":0},{"id":"val17","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.pdp-specs__specification:nth-of-type(17) span.pdp-specs__specification-value","multiple":false,"regex":"","delay":0},{"id":"level 1","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.breadcrumb-trail__breadcrumb:nth-of-type(2)","multiple":false,"regex":"","delay":0},{"id":"level 2","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.breadcrumb-trail__breadcrumb:nth-of-type(3)","multiple":false,"regex":"","delay":0},{"id":"level 3","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.breadcrumb-trail__breadcrumb:nth-of-type(4)","multiple":false,"regex":"","delay":0},{"id":"level 4","type":"SelectorText","parentSelectors":["ITEMPAGE ELEMENT"],"selector":"li.breadcrumb-trail__breadcrumb:nth-of-type(5)","multiple":false,"regex":"","delay":0}]}

Hi,

I have the same problem.

Any Solution?

No I still don't have a solution. Which website are you trying to scrape? Maybe it is certain types of websites. Not really sure.

I generated a new sitemap and did a test scrape on a category. Seems to work OK and I got all the info. You can copy/modify the map below; you probably need to expand the "Info Selector". I used Page load delay (ms): 6000

{"_id":"dewalt_new","startUrl":["https://www.dewalt.com/products/power-tools/drills"],"selectors":[{"id":"Click Item Links","type":"SelectorLink","parentSelectors":["_root"],"selector":".product-card__product-name a","multiple":true,"delay":0},{"id":"Info Selector","type":"SelectorElement","parentSelectors":["Click Item Links"],"selector":"article","multiple":false,"delay":0},{"id":"model","type":"SelectorText","parentSelectors":["Info Selector"],"selector":"span.pdp-details__sku","multiple":false,"regex":"","delay":0},{"id":"name","type":"SelectorText","parentSelectors":["Info Selector"],"selector":"span[itemprop='name']","multiple":false,"regex":"","delay":0},{"id":"Desc","type":"SelectorText","parentSelectors":["Info Selector"],"selector":"p","multiple":false,"regex":"","delay":0}]}

1 Like

Hi,

What does "Info Selector" mean? Can you please explain. I'm using Linkedin Sales Navigator for scraping and there is same problem.

I have the same problem on LinkedIn. Data Preview shows the correct data. But Scraping only returns the first 2 of 25 rows. Also, data preview is flaky. It works sometimes, and others not.

I have worked with WebScraper and LinkedIn a lot in the past. So I am lost to understand what the problem is.

I use the Chrome console and jquery selectors and commands to verify that it should be finding 25 rows. (e.g. >> $('artdeco-entity-lockup-title a').length correctly returns 25.

Hi, "Info Selector" is merely the name of the wrapper element (container) for all the data. I meant, you would need to add more data scrapers (in addition to model, name, desc) to get all the data you want. Anyway I tested the dewalt scraper again today (Oct 29) and it still works.

I figured out what the problem is with LinkedIn's new web page. They initially load only those candidates that would be visible on the page. And then dynamically load additional candidates as you scroll down. This is why a Data Preview is inconsistent. The data it returns depends on whether you have scrolled down the entire page first before clicking the button. And that Scraping only returns those candidates visible in the initial page load of the smaller WebScraper window during scraping.

Now the question is, how to get WebScraper to scroll to the bottom of the page before scraping on each new page load? Anyone?

Here's a scraper for the LinkedIn jobs page, with scroller. It works for me. I don't have Sales Navigator, so modify this for your needs:

{"_id":"linkedin-jobs","startUrl":["https://www.linkedin.com/jobs/"],"selectors":[{"id":"Scroller","type":"SelectorElementScroll","parentSelectors":["_root"],"selector":"li.card-list__item.job-card","multiple":true,"delay":"3000"},{"id":"Position","type":"SelectorText","parentSelectors":["Scroller"],"selector":"h3","multiple":false,"regex":"","delay":0},{"id":"Company","type":"SelectorText","parentSelectors":["Scroller"],"selector":"h4","multiple":false,"regex":"","delay":0},{"id":"Location","type":"SelectorText","parentSelectors":["Scroller"],"selector":"h5","multiple":false,"regex":"","delay":0}]}

Thank you LeeMeng! I now have a SelectorElementScroll working for my needs. And it is per how you are using it above. However, how do I get pagination (SelectorElementClick I assume) to work in order to get me to the next pages? I am trying several things but I haven't figured out the correct way to do this. I assumed _root >> SelectorElementClick >> SelectorElementScroll >> ... hierarchy, but that isn't working.

This code works for one page.

{
  "_id": "one_page_works",
  "startUrl": [
    "https://www.linkedin.com/talent/..."
  ],
  "selectors": [
    {
      "id": "FullName",
      "type": "SelectorText",
      "parentSelectors": [
        "scroll_element"
      ],
      "selector": "artdeco-entity-lockup-title a",
      "multiple": true,
      "regex": "",
      "delay": 0
    },
    {
      "id": "scroll_element",
      "type": "SelectorElementScroll",
      "parentSelectors": [
        "_root"
      ],
      "selector": "article.profile-list-item",
      "multiple": true,
      "delay": "500"
    }
  ]
}

This code correctly pages through all 40 pages...

{
  "_id": "onepage3",
  "startUrl": [
    "https://www.linkedin.com/talent/hire/..."
  ],
  "selectors": [
    {
      "id": "pager",
      "type": "SelectorElementClick",
      "parentSelectors": [
        "_root"
      ],
      "selector": "ol.profile-list",
      "multiple": true,
      "delay": "2000",
      "clickElementSelector": ".mini-pagination__quick-link [type='chevron-right-icon']",
      "clickType": "clickMore",
      "discardInitialElements": "do-not-discard",
      "clickElementUniquenessType": "uniqueText"
    }
  ]
}

So, I made the Pager element in the second one the a child of root, and made the Scroll Element from the first a child of the Pager element.
It works down to about the 20th page, and then closes. No data, no error msg, nothing!

Any ideas?

It could be because webscraper seems to scrape using a small window size, which may change the size of the elements displayed as compared to when you configure the selectors.

Try changing the window size of your browser on the site you want to scrape. Does it change the layout? Does it remove some elements that you want to select?

If yes, try configuring the selectors using a smaller browser window size.

In my case, some elements completely disappear on the smaller window, and I'm looking for settings to scrape using a larger window.

w/r my own reply: found it, just change the scraper window.