Stuck with scraping Facebook "About" details

Hi,
I can't find a way to get all the detail information from the about section on pages.
I try to get a list of restaurants in a city, together with all contact information and details as mentioned in the about section.

Problem is some fields are present on some pages and not on others and most pages do not fill the same fields, so that the available info fill in the wrong column.

Also every entry is duplicated.

https://www.facebook.com/search/110445852316970/places-in/273819889375819/places/intersect/

Sitemap:
{"_id":"test-resto","startUrl":["https://www.facebook.com/search/110445852316970/places-in/199377230079198/places/intersect/"],"selectors":[{"id":"wrapper","type":"SelectorElementScroll","parentSelectors":["_root"],"selector":"span.1wu a, div._26fh ul.uiList._6-i","multiple":true,"delay":"1000"},{"id":"name-link","type":"SelectorLink","parentSelectors":["wrapper"],"selector":"span.1wu a","multiple":true,"delay":"500"},{"id":"about-link","type":"SelectorLink","parentSelectors":["name-link"],"selector":"a._5u7u","multiple":false,"delay":"6000"},{"id":"street","type":"SelectorText","parentSelectors":["about-link"],"selector":"div._5aj7 div:nth-of-type(1) span._2iem","multiple":false,"regex":"","delay":0},{"id":"city","type":"SelectorText","parentSelectors":["about-link"],"selector":"div._4bl9 div:nth-of-type(2) span._2iem","multiple":false,"regex":"","delay":0},{"id":"messenger","type":"SelectorLink","parentSelectors":["about-link"],"selector":"a span._2iem","multiple":false,"delay":0},{"id":"phone","type":"SelectorText","parentSelectors":["about-link"],"selector":"div._3n4o div._50f4","multiple":false,"regex":"(?<=Call).*$","delay":0},{"id":"opening-hours","type":"SelectorText","parentSelectors":["about-link"],"selector":"div._6a div._4bl7","multiple":false,"regex":"","delay":0},{"id":"email","type":"SelectorText","parentSelectors":["about-link"],"selector":"div._4-u2:nth-of-type(3) div._5aj7:nth-of-type(2) div._4bl9","multiple":false,"regex":"","delay":0},{"id":"website","type":"SelectorText","parentSelectors":["about-link"],"selector":"a div div._50f4","multiple":false,"regex":"","delay":0},{"id":"general-manager","type":"SelectorText","parentSelectors":["about-link"],"selector":"div._5aj7:nth-of-type(5) div._3-8w","multiple":false,"regex":"","delay":0},{"id":"Followers","type":"SelectorText","parentSelectors":["name-link"],"selector":"div._4-u2._6590 div._2pi9:nth-of-type(4) div._4bl9 div","multiple":false,"regex":"","delay":0},{"id":"Price","type":"SelectorText","parentSelectors":["wrapper"],"selector":"span._1n0 span span","multiple":false,"regex":"","delay":0},{"id":"category","type":"SelectorText","parentSelectors":["wrapper"],"selector":"li:nth-of-type(n+3) a._1wuz span._c24","multiple":false,"regex":"","delay":0},{"id":"price2","type":"SelectorText","parentSelectors":["about-link"],"selector":"div._3-8j div.clearfix:nth-of-type(3) div._4bl9","multiple":false,"regex":"","delay":0}]}

1 Like

Try this. It is not perfect.

{"_id":"facebookpagestemplate","startUrl":["https://www.facebook.com/nsuase.sibstrin","https://www.facebook.com/nsuem.rus","https://www.facebook.com/NSUFlorida","https://www.facebook.com/nsuheadoffice","https://www.facebook.com/nsuniv","https://www.facebook.com/nsuniversity.official","https://www.facebook.com/NSURiverHawks","https://www.facebook.com/nta.isny","https://www.facebook.com/NTCManila","https://www.facebook.com/nthu.tw","https://www.facebook.com/ntnu.no","https://www.facebook.com/NTNU.Taiwan","https://www.facebook.com/NTPU1949","https://www.facebook.com/ntu.edu.iq","https://www.facebook.com/ntu.xpi","https://www.facebook.com/ntuanews1017","https://www.facebook.com/NTUB1917"],"selectors":[{"id":"pagetitle","multiple":false,"parentSelectors":["_root"],"regex":"","selector":"span.ircgss63,span.kcqno65y","type":"SelectorText"},{"id":"likes","multiple":false,"parentSelectors":["_root"],"regex":"","selector":"a.rtxb060y.innypi6y:nth-of-type(1),div.jcxyg2ei:nth-of-type(4) span.b6ax4al1","type":"SelectorText"},{"id":"followers","multiple":false,"parentSelectors":["_root"],"regex":"","selector":"a.rse6dlih:nth-of-type(2),div:nth-of-type(5) .t7p7dqev span.k1z55t6l.pbevjfx6","type":"SelectorText"},{"id":"address","multiple":false,"parentSelectors":["_root"],"regex":"","selector":"div.f36a8esv","type":"SelectorText"},{"id":"phone","multiple":false,"parentSelectors":["_root"],"regex":"\\(?\\+[0-9]{1,3}\\)? ?-?[0-9]{1,3} ?-?[0-9]{3,5} ?-?[0-9]{4}( ?-?[0-9]{3})? ?(\\w{1,10}\\s?\\d{1,6})?","selector":"div.svm27lag","type":"SelectorText"},{"id":"email","multiple":false,"parentSelectors":["_root"],"regex":"(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])","selector":"div.svm27lag","type":"SelectorText"},{"id":"website","multiple":false,"parentSelectors":["_root"],"regex":"https?:\\/\\/(www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b([-a-zA-Z0-9()@:%_\\+.~#?&//=]*)","selector":"div.svm27lag","type":"SelectorText"},{"id":"pagelogourl","multiple":false,"parentSelectors":["_root"],"selector":".lcfup58g a.o9erhkwx","type":"SelectorLink"},{"id":"pagecoverphotourl","multiple":false,"parentSelectors":["_root"],"selector":".nuz1ool1.lq84ybu9 img.bdao358l","type":"SelectorImage"},{"id":"about","multiple":false,"parentSelectors":["_root"],"regex":"","selector":".km253p1d > div.p8bdhjjv:contains(\"About\"),div[data-pagelet='ProfileTilesFeed_0']:contains(\"Intro\")","type":"SelectorText"}]}