python 3.x - What is the appropriate tag to scrape those particular informations? -
i'm trying scrape bunch of informations website.
i need product product list looking :
[name, packaging, price/kg]
so first 1 output 3 lists looking like.
- [royal canin veterinary diet - hypoallergenic dr 21, 7kg,4,84]
- [royal canin veterinary diet - hypoallergenic dr 21, 14kg,3,71]
- [royal canin veterinary diet - hypoallergenic dr 21, 14kg,3,71]
i've managed names in name list.
from bs4 import beautifulsoup import csv import urllib.request collections import * import time start_time = time.clock() page_0=urllib.request.urlopen("http://www.zooplus.co.uk/shop/dogs/dry_dog_food/royal_canin_vet_diet") soup_0 = beautifulsoup(page_0.read(),"html.parser") restricted_webpage=soup_0.find_all( "tbody", {"class":"product-variants-list"} ) readable_restricted_webpage=str(restricted_webpage) soup_1 = beautifulsoup(readable_restricted_webpage,"html.parser") title=soup_1.find_all("tr", {"style":["background-color:#ffffff;", "background-color:#ededed;"]}) soup_title = beautifulsoup(str(title),"html.parser") name=[] b in soup_title.find_all("b"): name.append(str(b).replace("<b>","").replace("</b>",""))
the 2 others informations hard me. can't see tag, couple of tag use (package, price/kg) couple.
i've try multiple tags , closest i've got : ("td", {"style":"background-color:#ededed;"},{"valign":"top"} )
think should use <span>
a way or ?
how proceed guys ?
from bs4 import beautifulsoup import urllib2 url = 'http://www.zooplus.co.uk/shop/dogs/dry_dog_food/royal_canin_vet_diet' response = urllib2.urlopen(url) html = response.read() soup = beautifulsoup(html,"html.parser") product_list = soup.find_all("tbody", {"class":"product-variants-list"}) i=0 def visible(text): lines = (line.strip() line in text.splitlines()) # break multi-headlines line each chunks = (phrase.strip() line in lines phrase in line.split(" ")) # drop blank lines text = ''.join(chunk chunk in chunks if chunk) return text product in product_list: product_name = product.find("a",{"class":"follow3"}).find("b").text print i, product_name product_variants = product.find_all("tr",{"class":"product-variant"}) +=1 variant in product_variants: product_weight = variant.find_all("td")[0].string print '-'+product_weight, print ' | ', if variant.find_all("td")[1].find("span",{"class":"specialprices"}) not none: product_price = variant.find_all("td")[1].find("span",{"class":"specialprices"}).find("span",{"class":"smalltextprices"}).get_text(" ", strip=true) product_price_per_kg = visible(product_price) else: product_price = variant.find_all("td")[1].find("span",{"class":"smalltext"}).get_text(" ", strip=true) product_price_per_kg = visible(product_price) print product_price_per_kg
output:
0 royal canin veterinary diet - hypoallergenic dr 21 -- 7kg | (£4.84/ kg) -- 14kg | (£3.71/ kg) -- economy pack: 2 x 14kg | (£3.68/ kg) 1 royal canin veterinary diet - sensitivity control sc 21 -- 7kg | (£5.13/ kg) -- 14kg | (£3.78/ kg) -- economy pack: 2 x 14kg | (£3.71/ kg) 2 royal canin veterinary diet - gastro intestinal gi 25 -- 2kg | (£6.95/ kg) -- 7.5kg | (£5.05/ kg) -- 14kg | (£3.56/ kg) -- economy pack: 2 x 14kg | (£3.50/ kg) 3 royal canin veterinary diet - renal rf 14 -- 7kg | (£4.99/ kg) -- 14kg | (£3.49/ kg) -- economy pack: 2 x 14kg | (£3.43/ kg) 4 royal canin veterinary diet - obesity management dp 34 -- 6kg | (£4.82/ kg) -- 14kg | (£3.56/ kg) -- economy pack: 2 x 14kg | (£3.50/ kg) 5 royal canin veterinary diet - urinary s/o lp 18 -- 7.5kg | (£5.05/ kg) -- 14kg | (£3.56/ kg) -- economy pack: 2 x 14kg | (£3.50/ kg) 6 royal canin veterinary diet - mobility ms 25 -- 7kg | (£4.99/ kg) -- 14kg | (£4.06/ kg) -- economy pack: 2 x 14kg | (£3.93/ kg) 7 royal canin veterinary diet - satiety support sat 30 -- 12kg | (£3.99/ kg) -- economy pack: 2 x 12kg | (£3.91/ kg) 8 royal canin veterinary diet - hepatic hf 16 -- 6kg | (£5.32/ kg) -- 12kg | (£3.99/ kg) -- economy pack: 2 x 12kg | (£3.91/ kg) 9 royal canin veterinary diet - dental dlk 22 -- 14kg | (£4.14/ kg) -- economy pack: 2 x 14kg | (£4.07/ kg) 10 royal canin veterinary diet - diabetic ds 37 -- 7kg | (£4.99/ kg) -- 12kg | (£3.91/ kg) -- economy pack: 2 x 12kg | (£3.75/ kg) 11 royal canin veterinary diet - calm cd 25 -- 4kg | (£6.72/ kg) -- economy pack: 2 x 4kg | (£6.61/ kg)
Comments
Post a Comment