43 lines
1.5 KiB
Python
43 lines
1.5 KiB
Python
import csv
|
|
import datetime
|
|
import re
|
|
|
|
from bs4 import BeautifulSoup
|
|
import time
|
|
from selenium import webdriver
|
|
driver = webdriver.Chrome()
|
|
series_name = []
|
|
series_address = []
|
|
series_telphone = []
|
|
series_gps = []
|
|
|
|
p = re.compile('<[^>]+>')
|
|
|
|
|
|
def opendriver(path):
|
|
driver.get(path)
|
|
time.sleep(5)
|
|
driver.implicitly_wait(10)
|
|
page = BeautifulSoup(driver.page_source, 'lxml')
|
|
data1 = page.select("div#content_left > div.new-pmd")
|
|
data2 = page.select("div#content_left > div.result > div.c-abstract")
|
|
print(data1)
|
|
for d in data1:
|
|
# for d2 in data2:
|
|
try:
|
|
if '战时状态' in str(d):
|
|
url = str(d).split('href="')[1].split('" target="_blank"')[0]
|
|
title = str(d).split('target="_blank">')[1].split('</a></h3>')[0]
|
|
writer.writerow(
|
|
["www.dingxi.gov.cn", title, url])
|
|
except:
|
|
continue
|
|
|
|
|
|
if __name__ == '__main__':
|
|
with open("F:/网站监测.csv", "a", newline='', encoding='utf-8') as csvfile:
|
|
writer = csv.writer(csvfile)
|
|
writer.writerow(["网址","标题", "链接"])
|
|
|
|
opendriver('http://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=site%3Awww.dingxi.gov.cn%20%E6%88%98%E6%97%B6%E7%8A%B6%E6%80%81&fenlei=256&oq=site%253Awww.minxian.gov.cn%2520%25E6%2588%2598%25E6%2597%25B6%25E7%258A%25B6%25E6%2580%2581&rsv_pq=efad99020001f751&rsv_t=0e63ypN2i2H1kya2gHEhSzcCuCJmIG8ymr0S%2BSSEK9g4olqGY%2FPo62b5%2FJU&rqlang=cn&rsv_enter=0&rsv_dl=tb&rsv_btype=t&inputT=2643&rsv_sug3=79&rsv_sug4=3901')
|