diff --git a/Browser.py b/Browser.py new file mode 100644 index 0000000000000000000000000000000000000000..76b26658ac39d728061c50bcd408dc7d09a22ac7 --- /dev/null +++ b/Browser.py @@ -0,0 +1,84 @@ +from selenium import webdriver +from bs4 import BeautifulSoup +from SearchEngine import EngineConfManage +from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.common.by import By +import hashlib +import time +import xlwt + +class Browser: + def __init__(self,conf): + self.browser=webdriver.Chrome() + self.conf=conf + self.conf['kw']='' + self.engine_conf=EngineConfManage().get_Engine_conf(conf['engine']).get_conf() + #搜索内容设置 + def set_kw(self,kw): + self.conf['kw']=kw + #搜索内容写入到搜素引擎中 + def send_keyword(self): + input = self.browser.find_element_by_id(self.engine_conf['searchTextID']) + input.send_keys(self.conf['kw']) + #搜索框点击 + def click_search_btn(self): + search_btn = self.browser.find_element_by_id(self.engine_conf['searchBtnID']) + search_btn.click() + #获取搜索结果与文本 + def get_search_res_url(self): + res_link={} + WebDriverWait(self.browser,timeout=30,poll_frequency=1).until(EC.presence_of_element_located((By.ID, "page"))) + #内容通过 BeautifulSoup 解析 + content=self.browser.page_source + soup = BeautifulSoup(content, "html.parser") + search_res_list=soup.select('.'+self.engine_conf['searchContentHref_class']) + while len(res_link)100: + return False + return True +class BrowserManage(Browser): + #打开目标搜索引擎进行搜索 + def search(self): + self.browser.get(self.engine_conf['website']) #打开搜索引擎站点 + self.send_keyword() #输入搜索kw + self.click_search_btn() #点击搜索 + return self.get_search_res_url() #获取web页搜索数据 + + + \ No newline at end of file