我對編碼很陌生,我需要解決一件實際的事情,只需從站點獲取信息並將其寫入 excel(我希望我可以通過指南進行管理),但主要問題是我無法進入該站點(該網站是免費)你能看看我的代碼嗎?當我運行它時,我得到
[] 進程完成,退出代碼 0
import requests
from bs4 import BeautifulSoup
import pytest
import time
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
URL ='http://way2drug.com/passonline/'
HEADERS= {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'}
class Test1():
def setup_method(self, method):
self.driver = webdriver.Chrome()
self.vars = {}
def teardown_method(self, method):
self.driver.quit()
def test_1(self):
# Test name: 1
# Step # | name | target | value | comment
# 1 | open | /passonline/ | |
self.driver.get("http://way2drug.com/passonline/")
# 2 | setWindowSize | 1920x1030 | |
self.driver.set_window_size(1920, 1030)
# 3 | click | css=#registration img | |
self.driver.find_element(By.CSS_SELECTOR, "#registration img").click()
# 4 | click | name=user_login | |
self.driver.find_element(By.NAME, "user_login").click()
# 5 | type | name=user_login | |
self.driver.find_element(By.NAME, "user_login").send_keys("MY USER")
# 6 | click | id=page1 | |
self.driver.find_element(By.ID, "page1").click()
# 7 | type | name=user_password | |
self.driver.find_element(By.NAME, "user_password").send_keys("MY PASS")
# 8 | click | id=register | |
self.driver.find_element(By.ID, "register").click()
# 9 | click | id=myHeader1 | |
self.driver.find_element(By.ID, "myHeader1").click()
# 10 | click | id=smiles | |
self.driver.find_element(By.ID, "smiles").click()
self.driver.find_element(By.ID, "smi").click()
self.driver.find_element(By.ID, "smi").send_keys("CC1(C)C(O)CC[C@@]2(C)C1CC[C@]3(C)C2CCC4[C@@]3(C)CC[C@]5(C(O)=O)C4[C@H](C)C(C)=CC5")
self.driver.find_element(By.CSS_SELECTOR, "#myContent4 input:nth-child(4)").click()
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('a', class_='Antineoplastic')
print(items)
def parse():
html = get_html(URL)
if html.status_code == 200:
get_content(html.text)
else:
print('ALL YOUR BASE ARE BELONG TO US')
parse()
請注意-永遠不要提供憑據
您提到您必須執行登錄,而 selenium 是一個不錯的選擇,但您正在做的是parse()
通過requests
. 因此,如果您查看自己的內容soup
,則不會找到您要查找的內容。
執行您的 selenium 操作並走到您想要抓取的網站。在下一步中,將您推driver.page_source
入BeautifulSoup
並找到您的元素:
soup = BeautifulSoup(driver.page_source,'html.parser')
items = soup.find_all('a', class_='Antineoplastic')
print(items)
如果你的選擇是正確的,你就會得到你的結果。
關於您的評論,您可以結束的線索,對於之間的調試步驟,您應該提出帶有重點示例的新問題:
import requests
from bs4 import BeautifulSoup
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
driver = webdriver.Chrome()
driver.get("http://way2drug.com/passonline/")
driver.set_window_size(1920, 1030)
driver.find_element(By.CSS_SELECTOR, "#registration img").click()
driver.find_element(By.NAME, "user_login").click()
driver.find_element(By.NAME, "user_login").send_keys("MY USER")
driver.find_element(By.ID, "page1").click()
driver.find_element(By.NAME, "user_password").send_keys("MY PASS")
driver.find_element(By.ID, "register").click()
driver.find_element(By.ID, "myHeader1").click()
driver.find_element(By.ID, "smiles").click()
driver.find_element(By.ID, "smi").click()
driver.find_element(By.ID, "smi").send_keys("CC1(C)C(O)CC[C@@]2(C)C1CC[C@]3(C)C2CCC4[C@@]3(C)CC[C@]5(C(O)=O)C4[C@H](C)C(C)=CC5")
driver.find_element(By.CSS_SELECTOR, "#myContent4 input:nth-child(4)").click()
soup = BeautifulSoup(driver.page_source,'html.parser')
items = soup.find_all('a', class_='Antineoplastic')
print(items)
driver.quit()
本文收集自互联网,转载请注明来源。
如有侵权,请联系 [email protected] 删除。
我来说两句