Hi JanAp,
Thank you that you will help me!
I would like to scrape all the 358 pages with dentists info from:
example of the structure is:
(first mainpage) 7146 tandartsen in Nederland
(first subpage) Tandarts Elst, M.
in this first subpage there are 2 addresses (subsubpages)
In both subsubpages there are address details:
- Endogooi, praktijk voor endodontologie, Bussum
- Staas & Bergmans, locatie Schubertsingel, 's-Hertogenbosch
(Not all the subpages have 2 subsubpages, sometimes only 1, but also sometimes more then 2.)
===============
I am very unexperienced with scripts and found this script below, using AI:
"
import requests
from bs4 import BeautifulSoup
def find_modal_content_values(url):
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
modal_content = soup.find('div', class_='modal-content')
if modal_content:
naam_element = modal_content.find('h2', class_='mb-2')
adres_element = modal_content.find('address', class_='flex-fill non-italic m-0')
telefoon_element = modal_content.find('a', class_='underline')
if naam_element:
naam = naam_element.text.strip()
else:
naam = "Niet beschikbaar"
if adres_element:
adres = adres_element.text.strip()
else:
adres = "Niet beschikbaar"
if telefoon_element:
telefoon = telefoon_element.text.strip()
else:
telefoon = "Niet beschikbaar"
print("Naam:", naam)
print("Adres:", adres)
print("Telefoon:", telefoon)
website_element = modal_content.find('div', class_='flex-fill d-flex flex-column')
if website_element:
website_link = website_element.find('a')
if website_link:
website = website_link['href']
else:
website = "Niet beschikbaar"
print("Website:", website)
else:
print("Website: Niet beschikbaar")
else:
print("Geen <div class='modal-content'> gevonden op de pagina.")
else:
print(f"Fout bij het ophalen van de pagina: {url}")
url = "7146 tandartsen in Nederland"
find_modal_content_values(url)
