How to use :contains('string') to find exact match?

How can I use li:contains('Shoe') to find the element containing that exact value only?
It also returns elements containing 'Shoes' and I need een exact match....

You need to select a couple selectors before or after the li:contains selector so that the scraper can hone into the specific item you are after.

For example, you can use the :contains selector at any point in the chain of selectors that you are using - div.example > div.example2 > li:contains('Shoe') > div.example3 > span.result

I mean <li>Shoessss</li> also contains "Shoe". I don't want those results. Only <li>Shoe</li>

You would have to string contains/not contains together:

li:contains('Shoe'):not(:contains('Shoes'))

1 Like

Thanks a lot! I figured that out last night. The problem is that the scraper suddenly stops after about 10 results. The scraping window suddenly closes. I already tried to increase the scraping timeout to 5s but that doesn't help. How can I solve this?

{"_id":"bmc","startUrl":["https://bx-en.bmc-switzerland.com/models.html"],"selectors":[{"id":"bikes","type":"SelectorLink","parentSelectors":["_root"],"selector":"a.product-item-link","multiple":true,"delay":0},{"id":"frame","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Frame') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"tubing","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Tubing') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"fork","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Fork') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"gears","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Gears') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"chainwheel","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Chainwheel') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"cassette","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Cassette') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"chain","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Chain'):not(:contains('Chainwheel')) .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"front derailleur","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Front Derailleur') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"rear derailleur","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Rear Derailleur') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"shifters","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Shifters') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"brakes","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Brakes') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"handlebar","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Handlebar') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"stem","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Stem') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"seatpost","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Seatpost') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"saddle","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Saddle') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"hubs","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Hubs') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"rims","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Rims') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"tires","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Tires') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"frame material","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Frame Material') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"rear shock","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Rear Shock') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"drive unit","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Drive Unit') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"battery pack","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Battery Pack') .product-attributes__value","multiple":false,"regex":"","delay":0}]}

The scraper acts exactly as it should. The reason you are getting only 9 results is the fact that you have only selected 9 results to be scraped. You need to add pagination to the sitemap, so that the scraper iterates through all of the pages.

Here is a working version of the sitemap:

{"_id":"bmc","startUrl":["https://bx-en.bmc-switzerland.com/models.html"],"selectors":[{"id":"bikes","type":"SelectorLink","parentSelectors":["_root","pagination"],"selector":"a.product-item-link","multiple":true,"delay":0},{"id":"frame","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Frame') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"tubing","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Tubing') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"fork","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Fork') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"gears","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Gears') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"chainwheel","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Chainwheel') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"cassette","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Cassette') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"chain","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Chain'):not(:contains('Chainwheel')) .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"front derailleur","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Front Derailleur') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"rear derailleur","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Rear Derailleur') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"shifters","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Shifters') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"brakes","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Brakes') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"handlebar","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Handlebar') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"stem","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Stem') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"seatpost","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Seatpost') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"saddle","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Saddle') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"hubs","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Hubs') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"rims","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Rims') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"tires","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Tires') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"frame material","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Frame Material') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"rear shock","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Rear Shock') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"drive unit","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Drive Unit') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"battery pack","type":"SelectorText","parentSelectors":["bikes"],"selector":"li:contains('Battery Pack') .product-attributes__value","multiple":false,"regex":"","delay":0},{"id":"pagination","type":"SelectorLink","parentSelectors":["_root","pagination"],"selector":"a[title=\"Next\"]","multiple":true,"delay":0}]}

Great!!! I thought I had them all listed on a single page at first. My bad.