import scrapy
from scrapy.http import Request
from bs4 import BeautifulSoup
class TestSpider(scrapy.Spider):
name = 'test'
start_urls = ['https://www.baroul-bucuresti.ro/index.php?urlpag=tablou-definitivi&p=1']
def parse(self, response):
base_url='https://www.baroul-bucuresti.ro'
soup=BeautifulSoup(response.text, 'html.parser')
tra = soup.find_all('div',class_='panel-title')
productlinks=[]
for links in tra:
for link in links.find_all('a',href=True)[1:]:
comp=base_url link['href']
yield Request(comp, callback=self.parse_book)
d1=''
def parse_book(self, response):
title=response.xpath("//h1//text()").get()
detail=response.xpath("//div[@class='av_bot_left left']//p")
for i in range(len(detail)):
if 'Decizia de intrare:' in detail[i].get():
d1=response.xpath("//em[@class='ral_i']//text()").get()
print(d1)
他們將為我提供以下輸出:
Decizia de intrare:
但我想要的實際輸出是您在網站頁面下方看到的這些https://www.baroul-bucuresti.ro/avocat/15655/aanegroae-ana-maria:
Decizia de intrare: 2469/1-06.12.16
uj5u.com熱心網友回復:
嘗試這個:
我沒有在 if 陳述句中采用根節點的 xpath,而是采用您已經識別為具有所需文本的節點的 xpath。然后我只是做一些字串格式化。
def parse_book(self, response):
title=response.xpath("//h1//text()").get()
detail=response.xpath("//div[@class='av_bot_left left']//p")
for i in range(len(detail)):
if 'Decizia de intrare:' in detail[i].get():
d1=detail[i].xpath('.//text()').getall()
d1 = " ".join([i.strip() for i in d1 if i.strip()])
print(d1)
轉載請註明出處,本文鏈接:https://www.uj5u.com/qiye/492888.html
下一篇:Golang嵌套地圖過濾器