Omitted items when srcapping

When scrapping the page below elements are omitted. Instead of 469 elements the results give 440, 430 and are different.

Page:
{"id":"rossmann_eveline_","startUrl":["Wszystkie produkty | Drogeria Rossmann.plroot","strony"],"paginationType":"auto","type":"SelectorPagination","selector":"[aria-label='Następna strona'] svg"},{"id":"produkty","parentSelectors":["strony"],"type":"SelectorElement","selector":"div.ANFOR\+nCtiiBTnj4qshlPw\=\=","multiple":true},{"id":"nazwa","parentSelectors":["produkty"],"type":"SelectorText","selector":"strong","multiple":false,"regex":""},{"id":"nazwa","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.T3Qr7QxUwvjK6d97mrsI9g\=\=","multiple":false,"regex":""},{"id":"nazwa__","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.ArTk1hM-rID8PjxP9rM8oQ\=\=","multiple":false,"regex":""},{"id":"status","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.wW64wddacB38LOmL00vmCg\=\=","multiple":false,"regex":""},{"id":"chwilowo_niedostepny","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.cE531VIUekY1n0Nf73YpjQ\=\=:nth-of-type(1) div.LTlifAgXXZs\+f4VkCyY2kg\=\=","multiple":false,"regex":""},{"id":"cena_aktualna","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.-WGMGMKmN\+ka261jsQn\+UA\=\=","multiple":false,"regex":""},{"id":"cena_bez_apki","parentSelectors":["produkty"],"type":"SelectorText","selector":"span.PriceWithoutApp-module_text--cxLQa","multiple":false,"regex":""},{"id":"cena_30dni","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.xR0YpYXbdXnkSrCd7NGVfg\=\=","multiple":false,"regex":""},{"id":"cena_regularna","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.THuUndqKVbMFbPAMJTvqqA\=\=","multiple":false,"regex":""},{"id":"link","parentSelectors":["produkty"],"type":"SelectorLink","selector":".Uk1HSbaEW1DW7GlqOLiAhA\=\= a","multiple":false,"linkType":"linkFromHref"}]}

What could be the reason for this ?

Thank you for your answer.

Hi,

Please post the sitemap as Preformatted text otherwise the code is broken.

code

{"_id":"rossmann_eveline__","startUrl":["https://www.rossmann.pl/produkty?BrandIds=10202&BrandIds=7483"],"selectors":[{"id":"strony","parentSelectors":["_root","strony"],"paginationType":"auto","type":"SelectorPagination","selector":"[aria-label='Następna strona'] svg"},{"id":"produkty","parentSelectors":["strony"],"type":"SelectorElement","selector":"div.ANFOR\\+nCtiiBTnj4qshlPw\\=\\=","multiple":true},{"id":"nazwa","parentSelectors":["produkty"],"type":"SelectorText","selector":"strong","multiple":false,"regex":""},{"id":"nazwa_","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.T3Qr7QxUwvjK6d97mrsI9g\\=\\=","multiple":false,"regex":""},{"id":"nazwa__","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.ArTk1hM-rID8PjxP9rM8oQ\\=\\=","multiple":false,"regex":""},{"id":"status","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.wW64wddacB38LOmL00vmCg\\=\\=","multiple":false,"regex":""},{"id":"chwilowo_niedostepny","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.cE531VIUekY1n0Nf73YpjQ\\=\\=:nth-of-type(1) div.LTlifAgXXZs\\+f4VkCyY2kg\\=\\=","multiple":false,"regex":""},{"id":"cena_aktualna","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.-WGMGMKmN\\+ka261jsQn\\+UA\\=\\=","multiple":false,"regex":""},{"id":"cena_bez_apki","parentSelectors":["produkty"],"type":"SelectorText","selector":"span.PriceWithoutApp-module_text--cxLQa","multiple":false,"regex":""},{"id":"cena_30dni","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.xR0YpYXbdXnkSrCd7NGVfg\\=\\=","multiple":false,"regex":""},{"id":"cena_regularna","parentSelectors":["produkty"],"type":"SelectorText","selector":"div.THuUndqKVbMFbPAMJTvqqA\\=\\=","multiple":false,"regex":""},{"id":"link","parentSelectors":["produkty"],"type":"SelectorLink","selector":".Uk1HSbaEW1DW7GlqOLiAhA\\=\\= a","multiple":false,"linkType":"linkFromHref"}]}

I have made some adjustments to the sitemap:

  1. Pagination should target the element that has the next page link.
  2. Selected a more universal selector value for 'produkty', it is better to avoid selectors with strings of random characters.
{"_id":"rossmann_eveline__","startUrl":["https://www.rossmann.pl/produkty?BrandIds=10202&BrandIds=7483"],"selectors":[{"id":"strony","paginationType":"auto","parentSelectors":["_root","strony"],"selector":"[data-testid=\"pagination-next-page\"]:not([aria-disabled=\"true\"])","type":"SelectorPagination"},{"id":"produkty","multiple":true,"parentSelectors":["strony"],"selector":"[style=\"grid-template-columns: repeat(4, 1fr);\"] > div","type":"SelectorElement"},{"id":"nazwa","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"strong","type":"SelectorText"},{"id":"nazwa_","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.T3Qr7QxUwvjK6d97mrsI9g\\=\\=","type":"SelectorText"},{"id":"nazwa__","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.ArTk1hM-rID8PjxP9rM8oQ\\=\\=","type":"SelectorText"},{"id":"status","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.wW64wddacB38LOmL00vmCg\\=\\=","type":"SelectorText"},{"id":"chwilowo_niedostepny","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.cE531VIUekY1n0Nf73YpjQ\\=\\=:nth-of-type(1) div.LTlifAgXXZs\\+f4VkCyY2kg\\=\\=","type":"SelectorText"},{"id":"cena_aktualna","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.-WGMGMKmN\\+ka261jsQn\\+UA\\=\\=","type":"SelectorText"},{"id":"cena_bez_apki","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"span.PriceWithoutApp-module_text--cxLQa","type":"SelectorText"},{"id":"cena_30dni","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.xR0YpYXbdXnkSrCd7NGVfg\\=\\=","type":"SelectorText"},{"id":"cena_regularna","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.THuUndqKVbMFbPAMJTvqqA\\=\\=","type":"SelectorText"},{"id":"link","linkType":"linkFromHref","multiple":false,"parentSelectors":["produkty"],"selector":".Uk1HSbaEW1DW7GlqOLiAhA\\=\\= a","type":"SelectorLink"}]}

Unfortunately the result exported to ms excel is empty after the changes.

Ok, I noticed that the element selector was not working if the scraping window was not maximized.

Here is a updated version which should work in any case:

{"_id":"rossmann_eveline__","startUrl":["https://www.rossmann.pl/produkty?BrandIds=10202&BrandIds=7483"],"selectors":[{"id":"strony","paginationType":"auto","parentSelectors":["_root","strony"],"selector":"[data-testid=\"pagination-next-page\"]:not([aria-disabled=\"true\"])","type":"SelectorPagination"},{"id":"produkty","multiple":true,"parentSelectors":["strony"],"selector":"[style*=\"grid-template-columns\"] > div","type":"SelectorElement"},{"id":"nazwa","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"strong","type":"SelectorText"},{"id":"nazwa_","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.T3Qr7QxUwvjK6d97mrsI9g\\=\\=","type":"SelectorText"},{"id":"nazwa__","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.ArTk1hM-rID8PjxP9rM8oQ\\=\\=","type":"SelectorText"},{"id":"status","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.wW64wddacB38LOmL00vmCg\\=\\=","type":"SelectorText"},{"id":"chwilowo_niedostepny","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.cE531VIUekY1n0Nf73YpjQ\\=\\=:nth-of-type(1) div.LTlifAgXXZs\\+f4VkCyY2kg\\=\\=","type":"SelectorText"},{"id":"cena_aktualna","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.-WGMGMKmN\\+ka261jsQn\\+UA\\=\\=","type":"SelectorText"},{"id":"cena_bez_apki","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"span.PriceWithoutApp-module_text--cxLQa","type":"SelectorText"},{"id":"cena_30dni","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.xR0YpYXbdXnkSrCd7NGVfg\\=\\=","type":"SelectorText"},{"id":"cena_regularna","multiple":false,"parentSelectors":["produkty"],"regex":"","selector":"div.THuUndqKVbMFbPAMJTvqqA\\=\\=","type":"SelectorText"},{"id":"link","linkType":"linkFromHref","multiple":false,"parentSelectors":["produkty"],"selector":".Uk1HSbaEW1DW7GlqOLiAhA\\=\\= a","type":"SelectorLink"}]}

Works great. Thank you for your help.