class MyStore(cpt.StoreBase):
def __init__(self):
self.data = []
@cpt.check(exclude=["driver", "store"])
def crawl_baidu_list(driver: WebDriver, store: MyStore, limit: int) -> None:
if not driver:
return None
pager = MyPager(button_selector=cl.XpathWebElementSelector('//*[@id="page"]/div/a/span'),
webdriver=driver, interval=2)
selector = cl.CssSelector(pattern="#content_left > div")
analyzer = cl.AnalyzerPrettify(pager, selector)
for e in analyzer(limit):
store.data.append(e)
cpt.Script.add_action(crawl_baidu_list)
webdriver = get_driver(is_headless=True)
step = [{
"method": "redirect",
"url": "https://www.baidu.com/",
}, {
"method": "input",
"xpath": "//*[@id=\"kw\"]",
"text": "__v-keyword__",
}, {
"method": "click",
"xpath": "//*[@id=\"su\"]"
}, {
"method": "crawl_baidu_list",
"limit": "__v-limit__",
},{
"method": "clear"
}]
v1 = cpt.Variable({
"limit": 20,
"keyword": "和泉雾纱"
})
store1 = MyStore()
v2 = cpt.Variable({
"limit": 20,
"keyword": "python"
})
store2 = MyStore()
loader = cpt.Script(step, interval=1)
loader.process(webdriver=webdriver, store=store1, variable=v1)
print(store1.data)
loader.process(webdriver=webdriver, store=store2, variable=v2)
print(store2.data)
webdriver.quit()