I have a script that scrapes data from a public death certificate archive site. This works ok but every once and a while, entries are skipped. When using below script it will always skip the 7th, 14th and 16,17,18,19 line! I have no clue why this is happening? Is this related to the script or is it something in the webpage?
script:
Sitemaps
Sitemap rat_overlijden_baarle_nassau_single
Create new sitemap
{"_id":"rat_overlijden_baarle_nassau_single","startUrl":["Zoek een persoon uit Tilburg - Regionaal Archief Tilburg a.person","multiple":false,"regex":""},{"id":"relatie","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":".relation a.person","multiple":false,"regex":""},{"id":"leeftid","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":".ng-binding span[data-ng-if]","multiple":false,"regex":""},{"id":"overlijdensplaats","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":"span.ng-binding[ng-if='person.metadata.plaats_overlijden']","multiple":false,"regex":""},{"id":"overlijdensdatum","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":"span[ng-if='person.metadata.datum_overlijden']","multiple":false,"regex":""},{"id":"vader","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":".male.ng-scope a.person","multiple":false,"regex":""},{"id":"moeder","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":".female.ng-scope a.person","multiple":false,"regex":""},{"id":"gemeente","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":"div[data-ng-if='register.metadata.gemeente']","multiple":false,"regex":""},{"id":"periode","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":"div[data-ng-if='register.metadata.periode']","multiple":false,"regex":""},{"id":"aktedatum","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":"li[ng-if='deed.metadata.datum']","multiple":false,"regex":""},{"id":"aktenr","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":"span[data-ng-if='deed.metadata.nummer']","multiple":false,"regex":""},{"id":"aktelink","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorLink","selector":"a.image[ng-href]","multiple":false},{"id":"overledenetot","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":"[data-person='overledene'] div.ng-binding","multiple":false,"regex":""},{"id":"DiversenOverledene","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":"span span.ng-binding","multiple":false,"regex":""},{"id":"DiversenVader","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":".male p","multiple":false,"regex":""},{"id":"DiversenMoeder","parentSelectors":["SelectorElementClick-Rows"],"type":"SelectorText","selector":".female p","multiple":false,"regex":""}]}