how can i scrape if i know only half of the title of sibling in bs4?
from urllib.request import urlopen
from bs4 import BeautifulSoup as BS
from urllib import request
import pandas as pd
import os
import re
html = request.urlopen(https://en.wikipedia.org/wiki/Charles_Ehresmann)
bs = BS(html.read(), 'html.parser')
data = pd.DataFrame({''known for':[],)}
try:
name = bs.find('h1').text
except:
name = ''
try:
known = bs.find('th',string = 'Known.*').next_element.text #?
except:
known = ''
Thanks for ideas
You can use :contains and next_sibling
from bs4 import BeautifulSoup as bs
import requests
r = requests.get('https://en.wikipedia.org/wiki/Charles_Ehresmann')
soup = bs(r.text, 'lxml')
print(soup.select_one('th:contains("Known")').next_sibling.get_text('\n').split('\n'))
Not as a list:
print(soup.select_one('th:contains("Known")').next_sibling.get_text('\n'))
Collected from the Internet
Please contact [email protected] to delete if infringement.
Comments