Too many entries

Web Scraper version: 0.3.5
Chrome version: 63.0.3239.132
OS: Windows 10

Hi! Apparently there's a limit to the number of entries we can scrape with webscraper. If this is not an error that can be overcome, it would be very helpful to know what is the limit.

Thank you, you did a great job with this extension so far!
Florin

Sitemap:
{"_id":"yp_searches","startUrl":["https://www.yellowpages.com/"],"selectors":[{"id":"select_city","type":"SelectorLink","selector":"section#popular-cities a","parentSelectors":["_root"],"multiple":true,"delay":0},{"id":"searches","type":"SelectorLink","selector":"section#trends.section-box a.see-more:nth-of-type(1)","parentSelectors":["select_city"],"multiple":false,"delay":0},{"id":"pagination1","type":"SelectorLink","selector":"div.holder:nth-of-type(n+2) a","parentSelectors":["searches"],"multiple":true,"delay":0},{"id":"select_search1","type":"SelectorLink","selector":"section li a","parentSelectors":["searches"],"multiple":true,"delay":0},{"id":"select_search2","type":"SelectorLink","selector":"section li a","parentSelectors":["pagination1"],"multiple":true,"delay":0},{"id":"business2","type":"SelectorElement","selector":"div.result div.v-card","parentSelectors":["select_search2"],"multiple":true,"delay":0},{"id":"business1","type":"SelectorElement","selector":"div.result div.v-card","parentSelectors":["select_search1"],"multiple":true,"delay":0},{"id":"pagination3","type":"SelectorLink","selector":"div.pagination a","parentSelectors":["select_search1"],"multiple":true,"delay":0},{"id":"business3","type":"SelectorElement","selector":"div.result div.v-card","parentSelectors":["pagination3"],"multiple":true,"delay":0},{"id":"pagination4","type":"SelectorLink","selector":"div.pagination a","parentSelectors":["select_search2"],"multiple":true,"delay":0},{"id":"business4","type":"SelectorElement","selector":"div.result div.v-card","parentSelectors":["pagination4"],"multiple":true,"delay":0},{"id":"Category4","type":"SelectorText","selector":"div.categories","parentSelectors":["business4"],"multiple":true,"regex":"","delay":0},{"id":"website4","type":"SelectorLink","selector":"div.links a:nth-of-type(1)","parentSelectors":["business4"],"multiple":true,"delay":0},{"id":"category2","type":"SelectorText","selector":"div.categories","parentSelectors":["business2"],"multiple":true,"regex":"","delay":0},{"id":"website2","type":"SelectorLink","selector":"div.links a:nth-of-type(1)","parentSelectors":["business2"],"multiple":true,"delay":0},{"id":"category1","type":"SelectorText","selector":"div.categories","parentSelectors":["business1"],"multiple":true,"regex":"","delay":0},{"id":"website1","type":"SelectorLink","selector":"div.links a:nth-of-type(1)","parentSelectors":["business1"],"multiple":true,"delay":0},{"id":"website3","type":"SelectorLink","selector":"div.links a:nth-of-type(1)","parentSelectors":["business3"],"multiple":true,"delay":0},{"id":"category3","type":"SelectorText","selector":"div.categories","parentSelectors":["business3"],"multiple":true,"regex":"","delay":0}]}

Error Message:
initial configuration i
background_script.js:24 initializing Background Script message listener
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 content script error Object {"status":500,"name":"indexed_db_went_bad","message":"unknown","reason":"UnknownError"}
2background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
background_script.js:24 Uncaught (in promise) n
background_script.js:24 content script error Object {"status":500,"name":"indexed_db_went_bad","message":"unknown","reason":"UnknownError"}
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24

The error is caused by internal chrome messaging limits. All chrome extensions are limited by these limits. Technically this isn't web scrapers limitation.

How did you get this error? Was the error thrown during scraping process or during data export process?

Hi! Thank you for your reply!
The scraping goes on uninterrupted, I've even let the scraper run on a computer for more than a week without it crashing.
When I go to my sitemap and click "Export data as CSV", the "Download" button just doesn't show up (and I get that error instead).
Do you think there's any way to make my browser ignore the limitation, or bring the limit up?
Or maybe break the export into several CSVs from the extension? (maybe as a paid feature)

It seems that the problem was that chrome messaging system limited the amount of data that can be sent from database to devtools page within the extension. We added a pagination system so that the devtools panel can fetch records in batches of 100. This should fix the problem. We will release the fix in version 0.3.7. It should be released next week.

2 Likes

Wow!! Amazing, thank you so much!

We have released 0.3.7 version. After your extension updates can you try to export the data?

Thank you! I will try to export the data again tonight.

I still get the error even after updating the extension to 0.3.7

initial configuration i
background_script.js:24 initializing Background Script message listener
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
c.onabort @ background_script.js:24
background_script.js:24 Uncaught (in promise) n {status: 500, name: "indexed_db_went_bad", message: "unknown", error: true, reason: "UnknownError"}
s @ background_script.js:24
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
c.onabort @ background_script.js:24
background_script.js:24 Uncaught (in promise) n {status: 500, name: "indexed_db_went_bad", message: "unknown", error: true, reason: "UnknownError"}
s @ background_script.js:24
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
c.onabort @ background_script.js:24
background_script.js:24 Uncaught (in promise) n {status: 500, name: "indexed_db_went_bad", message: "unknown", error: true, reason: "UnknownError"}
s @ background_script.js:24
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
c.onabort @ background_script.js:24
background_script.js:24 Uncaught (in promise) n {status: 500, name: "indexed_db_went_bad", message: "unknown", error: true, reason: "UnknownError"}
background_script.js:24 Database has a global failure DOMException: Maximum IPC message size exceeded.
_ @ background_script.js:24
c.onabort @ background_script.js:24
background_script.js:24 Uncaught (in promise) n {status: 500, name: "indexed_db_went_bad", message: "unknown", error: true, reason: "UnknownError"}

We know for sure that we fixed a data export limitation bug. The only limitation now would be that 100 records cannot exceed 30 MB of data. Do you think your data could exceed that?

Hi!

Yes, I think that's possible. I made an export of ~25mb and it worked. Trying with other sitemaps brought that error, so it's likely that the data is >30mb.