MonitoringOfPublicOpinion/代码/网站监测/获取关键字.py

43 lines
1.5 KiB
Python

import csv
import datetime
import re
from bs4 import BeautifulSoup
import time
from selenium import webdriver
driver = webdriver.Chrome()
series_name = []
series_address = []
series_telphone = []
series_gps = []
p = re.compile('<[^>]+>')
def opendriver(path):
driver.get(path)
time.sleep(5)
driver.implicitly_wait(10)
page = BeautifulSoup(driver.page_source, 'lxml')
data1 = page.select("div#content_left > div.new-pmd")
data2 = page.select("div#content_left > div.result > div.c-abstract")
print(data1)
for d in data1:
# for d2 in data2:
try:
if '战时状态' in str(d):
url = str(d).split('href="')[1].split('" target="_blank"')[0]
title = str(d).split('target="_blank">')[1].split('</a></h3>')[0]
writer.writerow(
["www.dingxi.gov.cn", title, url])
except:
continue
if __name__ == '__main__':
with open("F:/网站监测.csv", "a", newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["网址","标题", "链接"])
opendriver('http://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=site%3Awww.dingxi.gov.cn%20%E6%88%98%E6%97%B6%E7%8A%B6%E6%80%81&fenlei=256&oq=site%253Awww.minxian.gov.cn%2520%25E6%2588%2598%25E6%2597%25B6%25E7%258A%25B6%25E6%2580%2581&rsv_pq=efad99020001f751&rsv_t=0e63ypN2i2H1kya2gHEhSzcCuCJmIG8ymr0S%2BSSEK9g4olqGY%2FPo62b5%2FJU&rqlang=cn&rsv_enter=0&rsv_dl=tb&rsv_btype=t&inputT=2643&rsv_sug3=79&rsv_sug4=3901')