diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 1879447..1c1795f 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -19,87 +19,20 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -112,12 +45,6 @@
-
-
-
-
-
-
@@ -126,11 +53,6 @@
-
-
-
-
-
@@ -141,11 +63,6 @@
-
-
-
-
-
@@ -165,11 +82,6 @@
-
-
-
-
-
@@ -188,12 +100,6 @@
-
-
-
-
-
-
@@ -203,13 +109,6 @@
-
-
-
-
-
-
-
@@ -219,41 +118,14 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -287,7 +159,7 @@
-
+
@@ -300,19 +172,19 @@
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
+
+
@@ -320,12 +192,100 @@
-
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -3480,31 +3440,13 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
@@ -3629,6 +3571,7 @@
+
1592970928443
@@ -3637,7 +3580,14 @@
1592970928443
-
+
+ 1614586411628
+
+
+
+ 1614586411628
+
+
@@ -3673,47 +3623,55 @@
-
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
-
+
+
-
+
+
+
+
+
+
+
+
+
-
+
@@ -3729,7 +3687,7 @@
-
+
@@ -3739,6 +3697,7 @@
+
diff --git a/代码/网站监测/代码/定西党政网/区县要情.py b/代码/网站监测/代码/定西党政网/区县要情.py
index 9b8eabb..3dc69ed 100644
--- a/代码/网站监测/代码/定西党政网/区县要情.py
+++ b/代码/网站监测/代码/定西党政网/区县要情.py
@@ -19,7 +19,7 @@ p = re.compile('<[^>]+>')
def opendriver(num):
- print(str(num)+"111111111111111111111111111")
+ print(str(num)+"#######################################")
driver.get('http://www.dingxi.gov.cn/col/col5/index.html?uid=4623&pageNum=' + str(num))
time.sleep(5)
driver.implicitly_wait(10)
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.simple_pgContainer > div.bt-mod-wzpb-02 > ul > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://www.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["区县要情", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西党政网'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西党政网'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/区县要情.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/定西党政网/部门动态.py b/代码/网站监测/代码/定西党政网/部门动态.py
index 0f091df..3506242 100644
--- a/代码/网站监测/代码/定西党政网/部门动态.py
+++ b/代码/网站监测/代码/定西党政网/部门动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.simple_pgContainer > div.bt-mod-wzpb-02 > ul > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://www.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,20 +44,20 @@ def opendriver(num):
writer.writerow(
["部门动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西党政网'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西党政网'
if not os.path.exists(path):
os.makedirs(path)
- with open(path+"/部门动态.csv", "w", newline='', encoding='utf-8') as csvfile:
+ with open(path+"/部门动态.csv", "a", newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["模块", "时间", "标题", "标题url", "正文"])
- i = 1
+ i = 74
while i < 9999:
opendriver(i)
i += 1
diff --git a/代码/网站监测/代码/定西市畜牧兽医局/产业简报.py b/代码/网站监测/代码/定西市畜牧兽医局/产业简报.py
index 04f0e6d..c3a5c8c 100644
--- a/代码/网站监测/代码/定西市畜牧兽医局/产业简报.py
+++ b/代码/网站监测/代码/定西市畜牧兽医局/产业简报.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://xmj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["产业简报", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/产业简报.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/定西市畜牧兽医局/产业组织.py b/代码/网站监测/代码/定西市畜牧兽医局/产业组织.py
index fc7a397..229ece5 100644
--- a/代码/网站监测/代码/定西市畜牧兽医局/产业组织.py
+++ b/代码/网站监测/代码/定西市畜牧兽医局/产业组织.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://xmj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["产业组织", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/产业组织.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/定西市畜牧兽医局/兽医工作.py b/代码/网站监测/代码/定西市畜牧兽医局/兽医工作.py
index 9eff9db..abef155 100644
--- a/代码/网站监测/代码/定西市畜牧兽医局/兽医工作.py
+++ b/代码/网站监测/代码/定西市畜牧兽医局/兽医工作.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://xmj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["兽医工作", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/兽医工作.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/定西市畜牧兽医局/基层动态.py b/代码/网站监测/代码/定西市畜牧兽医局/基层动态.py
index 10c2422..c37a7a7 100644
--- a/代码/网站监测/代码/定西市畜牧兽医局/基层动态.py
+++ b/代码/网站监测/代码/定西市畜牧兽医局/基层动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://xmj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["基层动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/基层动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/定西市畜牧兽医局/实用技术.py b/代码/网站监测/代码/定西市畜牧兽医局/实用技术.py
index 29649ff..623b3dc 100644
--- a/代码/网站监测/代码/定西市畜牧兽医局/实用技术.py
+++ b/代码/网站监测/代码/定西市畜牧兽医局/实用技术.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://xmj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["实用技术", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/实用技术.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/定西市畜牧兽医局/工作动态.py b/代码/网站监测/代码/定西市畜牧兽医局/工作动态.py
index ffb3698..05cf02d 100644
--- a/代码/网站监测/代码/定西市畜牧兽医局/工作动态.py
+++ b/代码/网站监测/代码/定西市畜牧兽医局/工作动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://xmj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["工作动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/工作动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/定西市畜牧兽医局/市场动态.py b/代码/网站监测/代码/定西市畜牧兽医局/市场动态.py
index 898d054..d21c89f 100644
--- a/代码/网站监测/代码/定西市畜牧兽医局/市场动态.py
+++ b/代码/网站监测/代码/定西市畜牧兽医局/市场动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://xmj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["市场动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/市场动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/定西市畜牧兽医局/政策法规.py b/代码/网站监测/代码/定西市畜牧兽医局/政策法规.py
index 3a42e3d..3a003d8 100644
--- a/代码/网站监测/代码/定西市畜牧兽医局/政策法规.py
+++ b/代码/网站监测/代码/定西市畜牧兽医局/政策法规.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://xmj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["政策法规", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/政策法规.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/定西市畜牧兽医局/政策解读.py b/代码/网站监测/代码/定西市畜牧兽医局/政策解读.py
index 3c82151..5ee196f 100644
--- a/代码/网站监测/代码/定西市畜牧兽医局/政策解读.py
+++ b/代码/网站监测/代码/定西市畜牧兽医局/政策解读.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://xmj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["政策解读", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/政策解读.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/定西市畜牧兽医局/行业专家.py b/代码/网站监测/代码/定西市畜牧兽医局/行业专家.py
index 58f4a2f..a1fe629 100644
--- a/代码/网站监测/代码/定西市畜牧兽医局/行业专家.py
+++ b/代码/网站监测/代码/定西市畜牧兽医局/行业专家.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://xmj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["行业专家", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/行业专家.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/定西市畜牧兽医局/部门文件.py b/代码/网站监测/代码/定西市畜牧兽医局/部门文件.py
index 5e5a2e4..48c72ee 100644
--- a/代码/网站监测/代码/定西市畜牧兽医局/部门文件.py
+++ b/代码/网站监测/代码/定西市畜牧兽医局/部门文件.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://xmj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["部门文件", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/部门文件.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/定西市畜牧兽医局/项目建设.py b/代码/网站监测/代码/定西市畜牧兽医局/项目建设.py
index 4cb2353..565db74 100644
--- a/代码/网站监测/代码/定西市畜牧兽医局/项目建设.py
+++ b/代码/网站监测/代码/定西市畜牧兽医局/项目建设.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://xmj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["项目建设", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/定西市畜牧兽医局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/项目建设.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市人社局/人才招聘.py b/代码/网站监测/代码/市人社局/人才招聘.py
index 6fda75b..893b0b4 100644
--- a/代码/网站监测/代码/市人社局/人才招聘.py
+++ b/代码/网站监测/代码/市人社局/人才招聘.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://rsj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["人才招聘", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市人社局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市人社局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/人才招聘.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市人社局/工作动态.py b/代码/网站监测/代码/市人社局/工作动态.py
index 033983f..0686e54 100644
--- a/代码/网站监测/代码/市人社局/工作动态.py
+++ b/代码/网站监测/代码/市人社局/工作动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://rsj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["工作动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市人社局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市人社局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/工作动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市人社局/政策解读.py b/代码/网站监测/代码/市人社局/政策解读.py
new file mode 100644
index 0000000..13c0b2b
--- /dev/null
+++ b/代码/网站监测/代码/市人社局/政策解读.py
@@ -0,0 +1,63 @@
+import csv
+import datetime
+import os
+import re
+
+from bs4 import BeautifulSoup
+import time
+from selenium import webdriver
+
+time1 = datetime.datetime(2021, 1, 20)
+time2 = datetime.datetime(2021, 2, 20)
+driver = webdriver.Chrome()
+series_name = []
+series_address = []
+series_telphone = []
+series_gps = []
+
+p = re.compile('<[^>]+>')
+
+
+def opendriver(num):
+ print(str(num)+"111111111111111111111111111")
+ driver.get('http://zwgk.dingxi.gov.cn/col/col12452/index.html?number=XA00608&uid=40159&pageNum=' + str(num))
+ time.sleep(5)
+ driver.implicitly_wait(10)
+ page = BeautifulSoup(driver.page_source, 'lxml')
+ data = page.select(
+ "div.scroll_main > div.scroll_wrap > div.scroll_cont > div.zfxxgk_zd2 > ul > div > div.default_pgContainer > li")
+ for d in data:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('')[1].split('')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
+ print(url, title, date_source)
+ driver.get('http://zwgk.dingxi.gov.cn' + url)
+ driver.implicitly_wait(10)
+ content = \
+ str(BeautifulSoup(driver.page_source, 'lxml').select("div.content > table"))
+ print(content)
+ writer.writerow(
+ ["政策解读", date_source, title, url, p.sub("", content)])
+ time.sleep(3)
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
+
+
+if __name__ == '__main__':
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市人社局'
+ if not os.path.exists(path):
+ os.makedirs(path)
+ with open(path+"/政策解读.csv", "w", newline='', encoding='utf-8') as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["模块", "时间", "标题", "标题url", "正文"])
+ i = 1
+ while i < 9999:
+ opendriver(i)
+ i += 1
diff --git a/代码/网站监测/代码/市人社局/部门文件.py b/代码/网站监测/代码/市人社局/部门文件.py
new file mode 100644
index 0000000..d4b3380
--- /dev/null
+++ b/代码/网站监测/代码/市人社局/部门文件.py
@@ -0,0 +1,63 @@
+import csv
+import datetime
+import os
+import re
+
+from bs4 import BeautifulSoup
+import time
+from selenium import webdriver
+
+time1 = datetime.datetime(2021, 1, 20)
+time2 = datetime.datetime(2021, 2, 20)
+driver = webdriver.Chrome()
+series_name = []
+series_address = []
+series_telphone = []
+series_gps = []
+
+p = re.compile('<[^>]+>')
+
+
+def opendriver(num):
+ print(str(num)+"111111111111111111111111111")
+ driver.get('http://zwgk.dingxi.gov.cn/col/col12398/index.html?number=XA00508&uid=40159&pageNum=' + str(num))
+ time.sleep(5)
+ driver.implicitly_wait(10)
+ page = BeautifulSoup(driver.page_source, 'lxml')
+ data = page.select(
+ "div.scroll_main > div.scroll_wrap > div.scroll_cont > div.zfxxgk_zd2 > ul > div > div.default_pgContainer > li")
+ for d in data:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('')[1].split('')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
+ print(url, title, date_source)
+ driver.get('http://zwgk.dingxi.gov.cn' + url)
+ driver.implicitly_wait(10)
+ content = \
+ str(BeautifulSoup(driver.page_source, 'lxml').select("div.content > table"))
+ print(content)
+ writer.writerow(
+ ["部门文件", date_source, title, url, p.sub("", content)])
+ time.sleep(3)
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
+
+
+if __name__ == '__main__':
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市人社局'
+ if not os.path.exists(path):
+ os.makedirs(path)
+ with open(path+"/部门文件.csv", "w", newline='', encoding='utf-8') as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["模块", "时间", "标题", "标题url", "正文"])
+ i = 1
+ while i < 9999:
+ opendriver(i)
+ i += 1
diff --git a/代码/网站监测/代码/市住建局/住房保障及房地产管理.py b/代码/网站监测/代码/市住建局/住房保障及房地产管理.py
index 03e69be..dc0c53d 100644
--- a/代码/网站监测/代码/市住建局/住房保障及房地产管理.py
+++ b/代码/网站监测/代码/市住建局/住房保障及房地产管理.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://zjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["住房保障及房地产管理", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市住建局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市住建局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/住房保障及房地产管理.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市住建局/其他.py b/代码/网站监测/代码/市住建局/其他.py
index c171385..a923c05 100644
--- a/代码/网站监测/代码/市住建局/其他.py
+++ b/代码/网站监测/代码/市住建局/其他.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://zjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["其他", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市住建局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市住建局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/其他.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市住建局/城乡建设.py b/代码/网站监测/代码/市住建局/城乡建设.py
index cd0dfdd..f6739f2 100644
--- a/代码/网站监测/代码/市住建局/城乡建设.py
+++ b/代码/网站监测/代码/市住建局/城乡建设.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://zjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["城乡建设", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市住建局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市住建局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/城乡建设.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市住建局/工作动态.py b/代码/网站监测/代码/市住建局/工作动态.py
index 7b8b80b..e8d662c 100644
--- a/代码/网站监测/代码/市住建局/工作动态.py
+++ b/代码/网站监测/代码/市住建局/工作动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://zjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["工作动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市住建局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市住建局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/工作动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市住建局/建筑业管理.py b/代码/网站监测/代码/市住建局/建筑业管理.py
index 3ccf10e..b5cb85b 100644
--- a/代码/网站监测/代码/市住建局/建筑业管理.py
+++ b/代码/网站监测/代码/市住建局/建筑业管理.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://zjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["建筑业管理", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市住建局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市住建局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/建筑业管理.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市住建局/通知公告.py b/代码/网站监测/代码/市住建局/通知公告.py
index 3ad989e..c82a47f 100644
--- a/代码/网站监测/代码/市住建局/通知公告.py
+++ b/代码/网站监测/代码/市住建局/通知公告.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://zjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["通知公告", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市住建局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市住建局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/通知公告.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市农业农村局/下载专区.py b/代码/网站监测/代码/市农业农村局/下载专区.py
index a002799..b79cfba 100644
--- a/代码/网站监测/代码/市农业农村局/下载专区.py
+++ b/代码/网站监测/代码/市农业农村局/下载专区.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ny.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["下载专区", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市农业农村局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市农业农村局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/下载专区.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市农业农村局/中医药产业.py b/代码/网站监测/代码/市农业农村局/中医药产业.py
index 8134c9a..46ec976 100644
--- a/代码/网站监测/代码/市农业农村局/中医药产业.py
+++ b/代码/网站监测/代码/市农业农村局/中医药产业.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ny.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["中医药产业", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市农业农村局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市农业农村局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/中医药产业.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市农业农村局/中药材价格.py b/代码/网站监测/代码/市农业农村局/中药材价格.py
index 5475380..ad71975 100644
--- a/代码/网站监测/代码/市农业农村局/中药材价格.py
+++ b/代码/网站监测/代码/市农业农村局/中药材价格.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ny.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["中药材价格", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市农业农村局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市农业农村局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/中药材价格.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市农业农村局/农业文化.py b/代码/网站监测/代码/市农业农村局/农业文化.py
index 5632c79..07eae98 100644
--- a/代码/网站监测/代码/市农业农村局/农业文化.py
+++ b/代码/网站监测/代码/市农业农村局/农业文化.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ny.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["农业文化", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市农业农村局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市农业农村局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/农业文化.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市农业农村局/农业科技.py b/代码/网站监测/代码/市农业农村局/农业科技.py
index 4ca355d..850abfe 100644
--- a/代码/网站监测/代码/市农业农村局/农业科技.py
+++ b/代码/网站监测/代码/市农业农村局/农业科技.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ny.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["农业科技", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市农业农村局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市农业农村局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/农业科技.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市农业农村局/区县动态.py b/代码/网站监测/代码/市农业农村局/区县动态.py
index c7d9595..b7fd9cd 100644
--- a/代码/网站监测/代码/市农业农村局/区县动态.py
+++ b/代码/网站监测/代码/市农业农村局/区县动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ny.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["区县动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市农业农村局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市农业农村局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/区县动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市农业农村局/政务要闻.py b/代码/网站监测/代码/市农业农村局/政务要闻.py
index 6a94cd5..66d884a 100644
--- a/代码/网站监测/代码/市农业农村局/政务要闻.py
+++ b/代码/网站监测/代码/市农业农村局/政务要闻.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ny.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["政务要闻", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市农业农村局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市农业农村局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/政务要闻.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市农业农村局/本市简报快报.py b/代码/网站监测/代码/市农业农村局/本市简报快报.py
index e3855f4..05f163c 100644
--- a/代码/网站监测/代码/市农业农村局/本市简报快报.py
+++ b/代码/网站监测/代码/市农业农村局/本市简报快报.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ny.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["本市简报快报", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市农业农村局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市农业农村局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/本市简报快报.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市农业农村局/果蔬产业.py b/代码/网站监测/代码/市农业农村局/果蔬产业.py
index 57f1e04..ebedcac 100644
--- a/代码/网站监测/代码/市农业农村局/果蔬产业.py
+++ b/代码/网站监测/代码/市农业农村局/果蔬产业.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ny.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["果蔬产业", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市农业农村局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市农业农村局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/果蔬产业.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市农业农村局/马铃薯产业.py b/代码/网站监测/代码/市农业农村局/马铃薯产业.py
index 46d93bd..aed7f70 100644
--- a/代码/网站监测/代码/市农业农村局/马铃薯产业.py
+++ b/代码/网站监测/代码/市农业农村局/马铃薯产业.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ny.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["马铃薯产业", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市农业农村局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市农业农村局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/马铃薯产业.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市发展改革委/党的建设.py b/代码/网站监测/代码/市发展改革委/党的建设.py
index f1b9f18..730000a 100644
--- a/代码/网站监测/代码/市发展改革委/党的建设.py
+++ b/代码/网站监测/代码/市发展改革委/党的建设.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get(url)
driver.implicitly_wait(10)
@@ -45,14 +45,14 @@ def opendriver(num):
writer.writerow(
["党的建设", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市发展改革委'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市发展改革委'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/党的建设.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市发展改革委/公示公告.py b/代码/网站监测/代码/市发展改革委/公示公告.py
new file mode 100644
index 0000000..fb669d9
--- /dev/null
+++ b/代码/网站监测/代码/市发展改革委/公示公告.py
@@ -0,0 +1,62 @@
+import csv
+import datetime
+import os
+import re
+
+from bs4 import BeautifulSoup
+import time
+from selenium import webdriver
+
+time1 = datetime.datetime(2021, 1, 20)
+time2 = datetime.datetime(2021, 2, 20)
+driver = webdriver.Chrome()
+series_name = []
+series_address = []
+series_telphone = []
+series_gps = []
+
+p = re.compile('<[^>]+>')
+
+
+def opendriver(num):
+ print(str(num)+"111111111111111111111111111")
+ driver.get('http://zwgk.dingxi.gov.cn/col/col12479/index.html?number=XA00701&uid=39590&pageNum=' + str(num))
+ time.sleep(5)
+ driver.implicitly_wait(10)
+ page = BeautifulSoup(driver.page_source, 'lxml')
+ data = page.select("div.scroll_main > div.scroll_wrap > div.scroll_cont > div.zfxxgk_zd2 > ul > div > div.default_pgContainer > li")
+ for d in data:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('')[1].split('')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
+ print(url, title, date_source)
+ driver.get('http://zwgk.dingxi.gov.cn' + url)
+ driver.implicitly_wait(10)
+ content = \
+ str(BeautifulSoup(driver.page_source, 'lxml').select("tr > td.bt_content > div > p"))
+ print(content)
+ writer.writerow(
+ ["公示公告", date_source, title, url, p.sub("", content)])
+ time.sleep(3)
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
+
+
+if __name__ == '__main__':
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市发展改革委'
+ if not os.path.exists(path):
+ os.makedirs(path)
+ with open(path+"/公示公告.csv", "w", newline='', encoding='utf-8') as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["模块", "时间", "标题", "标题url", "正文"])
+ i = 1
+ while i < 9999:
+ opendriver(i)
+ i += 1
diff --git a/代码/网站监测/代码/市发展改革委/工作动态.py b/代码/网站监测/代码/市发展改革委/工作动态.py
index 0536b37..7945e9b 100644
--- a/代码/网站监测/代码/市发展改革委/工作动态.py
+++ b/代码/网站监测/代码/市发展改革委/工作动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.default_pgContainer > div.bt-mod-wzpb-02 > ul > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://fgw.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["工作动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市发展改革委'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市发展改革委'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/工作动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市发展改革委/物价工作.py b/代码/网站监测/代码/市发展改革委/物价工作.py
index eaf792a..61205ce 100644
--- a/代码/网站监测/代码/市发展改革委/物价工作.py
+++ b/代码/网站监测/代码/市发展改革委/物价工作.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://fgw.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["物价工作", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市发展改革委'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市发展改革委'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/物价工作.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市发展改革委/项目建设.py b/代码/网站监测/代码/市发展改革委/项目建设.py
index 7a9e067..6e967b5 100644
--- a/代码/网站监测/代码/市发展改革委/项目建设.py
+++ b/代码/网站监测/代码/市发展改革委/项目建设.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://fgw.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["项目建设", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市发展改革委'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市发展改革委'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/项目建设.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市司法局/司法考试.py b/代码/网站监测/代码/市司法局/司法考试.py
index 1f8a1e6..4bdc749 100644
--- a/代码/网站监测/代码/市司法局/司法考试.py
+++ b/代码/网站监测/代码/市司法局/司法考试.py
@@ -27,16 +27,16 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sfj.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -47,12 +47,12 @@ def opendriver(num):
["司法考试", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市司法局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市司法局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/司法考试.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市司法局/司法鉴定.py b/代码/网站监测/代码/市司法局/司法鉴定.py
index 4f7e40a..27852e0 100644
--- a/代码/网站监测/代码/市司法局/司法鉴定.py
+++ b/代码/网站监测/代码/市司法局/司法鉴定.py
@@ -27,16 +27,16 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sfj.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -47,12 +47,12 @@ def opendriver(num):
["司法鉴定", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市司法局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市司法局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/司法鉴定.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市司法局/基层动态.py b/代码/网站监测/代码/市司法局/基层动态.py
index 5df7150..6c33d5e 100644
--- a/代码/网站监测/代码/市司法局/基层动态.py
+++ b/代码/网站监测/代码/市司法局/基层动态.py
@@ -27,16 +27,16 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sfj.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -47,12 +47,12 @@ def opendriver(num):
["基层动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市司法局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市司法局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/基层动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市司法局/基层工作.py b/代码/网站监测/代码/市司法局/基层工作.py
index 47b225d..c9e790a 100644
--- a/代码/网站监测/代码/市司法局/基层工作.py
+++ b/代码/网站监测/代码/市司法局/基层工作.py
@@ -27,16 +27,16 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sfj.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -47,12 +47,12 @@ def opendriver(num):
["基层工作", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市司法局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市司法局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/基层工作.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市司法局/工作动态.py b/代码/网站监测/代码/市司法局/工作动态.py
index 5ab9c8e..02d1a15 100644
--- a/代码/网站监测/代码/市司法局/工作动态.py
+++ b/代码/网站监测/代码/市司法局/工作动态.py
@@ -27,16 +27,16 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sfj.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -47,12 +47,12 @@ def opendriver(num):
["工作动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市司法局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市司法局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/工作动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市司法局/律师公证.py b/代码/网站监测/代码/市司法局/律师公证.py
index 7f781f2..4890e35 100644
--- a/代码/网站监测/代码/市司法局/律师公证.py
+++ b/代码/网站监测/代码/市司法局/律师公证.py
@@ -27,16 +27,16 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sfj.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -47,12 +47,12 @@ def opendriver(num):
["律师公证", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市司法局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市司法局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/律师公证.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市司法局/法制宣传.py b/代码/网站监测/代码/市司法局/法制宣传.py
index 4bf22be..515f2e8 100644
--- a/代码/网站监测/代码/市司法局/法制宣传.py
+++ b/代码/网站监测/代码/市司法局/法制宣传.py
@@ -27,16 +27,16 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sfj.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -47,12 +47,12 @@ def opendriver(num):
["法治宣传", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市司法局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市司法局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/法治宣传.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市司法局/法律援助.py b/代码/网站监测/代码/市司法局/法律援助.py
index f133005..122816c 100644
--- a/代码/网站监测/代码/市司法局/法律援助.py
+++ b/代码/网站监测/代码/市司法局/法律援助.py
@@ -27,16 +27,16 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sfj.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -47,12 +47,12 @@ def opendriver(num):
["法律援助", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市司法局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市司法局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/法律援助.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市司法局/社会矫正.py b/代码/网站监测/代码/市司法局/社会矫正.py
index 5268609..cb0b47e 100644
--- a/代码/网站监测/代码/市司法局/社会矫正.py
+++ b/代码/网站监测/代码/市司法局/社会矫正.py
@@ -27,16 +27,16 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sfj.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -47,12 +47,12 @@ def opendriver(num):
["社会矫正", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市司法局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市司法局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/社会矫正.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市司法局/通知公告.py b/代码/网站监测/代码/市司法局/通知公告.py
index 155a7ca..59f2016 100644
--- a/代码/网站监测/代码/市司法局/通知公告.py
+++ b/代码/网站监测/代码/市司法局/通知公告.py
@@ -27,16 +27,16 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sfj.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -47,12 +47,12 @@ def opendriver(num):
["通知公告", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市司法局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市司法局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/通知公告.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市司法局/队伍建设.py b/代码/网站监测/代码/市司法局/队伍建设.py
index 685c034..1f0029d 100644
--- a/代码/网站监测/代码/市司法局/队伍建设.py
+++ b/代码/网站监测/代码/市司法局/队伍建设.py
@@ -27,16 +27,16 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sfj.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -47,12 +47,12 @@ def opendriver(num):
["法治宣传", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市司法局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市司法局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/法治宣传.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市审计局/党建廉政.py b/代码/网站监测/代码/市审计局/党建廉政.py
index aa74231..9b50099 100644
--- a/代码/网站监测/代码/市审计局/党建廉政.py
+++ b/代码/网站监测/代码/市审计局/党建廉政.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > ul > li")
print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["党建廉政", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市审计局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市审计局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/党建廉政.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市审计局/审计动态.py b/代码/网站监测/代码/市审计局/审计动态.py
index c1ed63c..bc2725a 100644
--- a/代码/网站监测/代码/市审计局/审计动态.py
+++ b/代码/网站监测/代码/市审计局/审计动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["审计动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市审计局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市审计局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/审计动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市审计局/审计要闻.py b/代码/网站监测/代码/市审计局/审计要闻.py
index 8976284..dd1591f 100644
--- a/代码/网站监测/代码/市审计局/审计要闻.py
+++ b/代码/网站监测/代码/市审计局/审计要闻.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["审计要闻", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市审计局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市审计局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/审计要闻.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市审计局/审计论坛.py b/代码/网站监测/代码/市审计局/审计论坛.py
index 9a9907a..7072c74 100644
--- a/代码/网站监测/代码/市审计局/审计论坛.py
+++ b/代码/网站监测/代码/市审计局/审计论坛.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://sjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["审计论坛", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市审计局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市审计局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/审计论坛.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/事故调查.py b/代码/网站监测/代码/市应急局/事故调查.py
index d4b6e8d..6a03515 100644
--- a/代码/网站监测/代码/市应急局/事故调查.py
+++ b/代码/网站监测/代码/市应急局/事故调查.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["事故调查", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/事故调查.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/公示公告.py b/代码/网站监测/代码/市应急局/公示公告.py
index 13fabbd..870fb77 100644
--- a/代码/网站监测/代码/市应急局/公示公告.py
+++ b/代码/网站监测/代码/市应急局/公示公告.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["公示公告", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/公示公告.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/化学危险品.py b/代码/网站监测/代码/市应急局/化学危险品.py
index bfe4810..550c3b8 100644
--- a/代码/网站监测/代码/市应急局/化学危险品.py
+++ b/代码/网站监测/代码/市应急局/化学危险品.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["化学危险品", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/化学危险品.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/县区讯息.py b/代码/网站监测/代码/市应急局/县区讯息.py
index a8c7e34..db9bd16 100644
--- a/代码/网站监测/代码/市应急局/县区讯息.py
+++ b/代码/网站监测/代码/市应急局/县区讯息.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["县区讯息", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/县区讯息.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/安全培训.py b/代码/网站监测/代码/市应急局/安全培训.py
index 8a30e32..1dce440 100644
--- a/代码/网站监测/代码/市应急局/安全培训.py
+++ b/代码/网站监测/代码/市应急局/安全培训.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["安全培训", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/安全培训.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/安全天地.py b/代码/网站监测/代码/市应急局/安全天地.py
index 8710a89..1fbd01d 100644
--- a/代码/网站监测/代码/市应急局/安全天地.py
+++ b/代码/网站监测/代码/市应急局/安全天地.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["安全天地", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/安全天地.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/安全预警.py b/代码/网站监测/代码/市应急局/安全预警.py
index ca497a7..7a76bab 100644
--- a/代码/网站监测/代码/市应急局/安全预警.py
+++ b/代码/网站监测/代码/市应急局/安全预警.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["安全预警", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/安全预警.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/工作动态.py b/代码/网站监测/代码/市应急局/工作动态.py
index 755453c..cca3798 100644
--- a/代码/网站监测/代码/市应急局/工作动态.py
+++ b/代码/网站监测/代码/市应急局/工作动态.py
@@ -28,16 +28,16 @@ def opendriver(num):
print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url,'+', title,'+', date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["工作动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/工作动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/应急委工作.py b/代码/网站监测/代码/市应急局/应急委工作.py
index 9325796..a69be4d 100644
--- a/代码/网站监测/代码/市应急局/应急委工作.py
+++ b/代码/网站监测/代码/市应急局/应急委工作.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["应急委工作", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/应急委工作.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/应急救援.py b/代码/网站监测/代码/市应急局/应急救援.py
index 20e7477..26aa49c 100644
--- a/代码/网站监测/代码/市应急局/应急救援.py
+++ b/代码/网站监测/代码/市应急局/应急救援.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["应急救援", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/应急救援.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/执法监察.py b/代码/网站监测/代码/市应急局/执法监察.py
index f7102ec..968dffe 100644
--- a/代码/网站监测/代码/市应急局/执法监察.py
+++ b/代码/网站监测/代码/市应急局/执法监察.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["执法监察", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/执法监察.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/政策法规.py b/代码/网站监测/代码/市应急局/政策法规.py
index 12d3ee7..e3d0064 100644
--- a/代码/网站监测/代码/市应急局/政策法规.py
+++ b/代码/网站监测/代码/市应急局/政策法规.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["政策法规", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/政策法规.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/森林防火.py b/代码/网站监测/代码/市应急局/森林防火.py
index 914f0cf..95fceca 100644
--- a/代码/网站监测/代码/市应急局/森林防火.py
+++ b/代码/网站监测/代码/市应急局/森林防火.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["森林防火", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/森林防火.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/行政审批.py b/代码/网站监测/代码/市应急局/行政审批.py
index 98ee81e..bfaba4f 100644
--- a/代码/网站监测/代码/市应急局/行政审批.py
+++ b/代码/网站监测/代码/市应急局/行政审批.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["行政审批", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/行政审批.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/防灾减灾.py b/代码/网站监测/代码/市应急局/防灾减灾.py
index 04f2643..49cc955 100644
--- a/代码/网站监测/代码/市应急局/防灾减灾.py
+++ b/代码/网站监测/代码/市应急局/防灾减灾.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["防灾减灾", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/防灾减灾.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市应急局/非煤矿山.py b/代码/网站监测/代码/市应急局/非煤矿山.py
index 9a5b3d4..914b36e 100644
--- a/代码/网站监测/代码/市应急局/非煤矿山.py
+++ b/代码/网站监测/代码/市应急局/非煤矿山.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://yjgl.dingxi.gov.cn/' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["非煤矿山", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市应急局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市应急局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/非煤矿山.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市扶贫办/工作动态.py b/代码/网站监测/代码/市扶贫办/工作动态.py
index 23e897e..97dc7af 100644
--- a/代码/网站监测/代码/市扶贫办/工作动态.py
+++ b/代码/网站监测/代码/市扶贫办/工作动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://fpb.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["工作动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市扶贫办'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市扶贫办'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/工作动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市扶贫办/精准扶贫简报.py b/代码/网站监测/代码/市扶贫办/精准扶贫简报.py
index ae129f6..542efcc 100644
--- a/代码/网站监测/代码/市扶贫办/精准扶贫简报.py
+++ b/代码/网站监测/代码/市扶贫办/精准扶贫简报.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://fpb.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["精准扶贫简报", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市扶贫办'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市扶贫办'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/精准扶贫简报.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市文体旅游局/产业促进.py b/代码/网站监测/代码/市文体旅游局/产业促进.py
index b5e95ae..133ffcb 100644
--- a/代码/网站监测/代码/市文体旅游局/产业促进.py
+++ b/代码/网站监测/代码/市文体旅游局/产业促进.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://wlj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["产业促进", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/产业促进.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市文体旅游局/党的建设.py b/代码/网站监测/代码/市文体旅游局/党的建设.py
index 0e7abce..1b0f076 100644
--- a/代码/网站监测/代码/市文体旅游局/党的建设.py
+++ b/代码/网站监测/代码/市文体旅游局/党的建设.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://wlj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["党的建设", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/党的建设.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市文体旅游局/公共服务.py b/代码/网站监测/代码/市文体旅游局/公共服务.py
index 977da94..205cd08 100644
--- a/代码/网站监测/代码/市文体旅游局/公共服务.py
+++ b/代码/网站监测/代码/市文体旅游局/公共服务.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://wlj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["县区动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/公共服务.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市文体旅游局/县区动态.py b/代码/网站监测/代码/市文体旅游局/县区动态.py
index 8a06de1..672e8cd 100644
--- a/代码/网站监测/代码/市文体旅游局/县区动态.py
+++ b/代码/网站监测/代码/市文体旅游局/县区动态.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://wlj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["县区动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/县区动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市文体旅游局/工作动态.py b/代码/网站监测/代码/市文体旅游局/工作动态.py
index ebbb85a..0dcda1e 100644
--- a/代码/网站监测/代码/市文体旅游局/工作动态.py
+++ b/代码/网站监测/代码/市文体旅游局/工作动态.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://wlj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["工作动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/工作动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市文体旅游局/市场监管.py b/代码/网站监测/代码/市文体旅游局/市场监管.py
index c5aa84c..3a5c8c5 100644
--- a/代码/网站监测/代码/市文体旅游局/市场监管.py
+++ b/代码/网站监测/代码/市文体旅游局/市场监管.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://wlj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["市场监管", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/市场监管.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市文体旅游局/广播影视.py b/代码/网站监测/代码/市文体旅游局/广播影视.py
index 34de7c2..0f80a44 100644
--- a/代码/网站监测/代码/市文体旅游局/广播影视.py
+++ b/代码/网站监测/代码/市文体旅游局/广播影视.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://wlj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["广播影视", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/广播影视.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市文体旅游局/文化遗产.py b/代码/网站监测/代码/市文体旅游局/文化遗产.py
index 7c9da57..42111a0 100644
--- a/代码/网站监测/代码/市文体旅游局/文化遗产.py
+++ b/代码/网站监测/代码/市文体旅游局/文化遗产.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://wlj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["文化遗产", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/文化遗产.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市文体旅游局/通知公告.py b/代码/网站监测/代码/市文体旅游局/通知公告.py
index 84bb74d..1884b55 100644
--- a/代码/网站监测/代码/市文体旅游局/通知公告.py
+++ b/代码/网站监测/代码/市文体旅游局/通知公告.py
@@ -28,16 +28,16 @@ def opendriver(num):
# print(data)
for d in data:
print(d)
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s < time1:
- driver.close()
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s < time1:
+ # driver.close()
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://wlj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -48,12 +48,12 @@ def opendriver(num):
["通知公告", date_source, title, url, p.sub("", content)])
time.sleep(3)
- # except:
- # continue
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市文体旅游局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/通知公告.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市林草局/基层动态.py b/代码/网站监测/代码/市林草局/基层动态.py
index 5ec889a..16f018b 100644
--- a/代码/网站监测/代码/市林草局/基层动态.py
+++ b/代码/网站监测/代码/市林草局/基层动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ly.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["基层动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市林草局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市林草局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/基层动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市林草局/工作动态.py b/代码/网站监测/代码/市林草局/工作动态.py
index 05584b4..7a68f6d 100644
--- a/代码/网站监测/代码/市林草局/工作动态.py
+++ b/代码/网站监测/代码/市林草局/工作动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ly.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["工作动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市林草局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市林草局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/工作动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市林草局/政策法规.py b/代码/网站监测/代码/市林草局/政策法规.py
index 316f7a2..3cbfb6e 100644
--- a/代码/网站监测/代码/市林草局/政策法规.py
+++ b/代码/网站监测/代码/市林草局/政策法规.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ly.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["政策法规", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市林草局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市林草局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/政策法规.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市林草局/林业科技.py b/代码/网站监测/代码/市林草局/林业科技.py
index f4b0830..9aee416 100644
--- a/代码/网站监测/代码/市林草局/林业科技.py
+++ b/代码/网站监测/代码/市林草局/林业科技.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ly.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["政策法规", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市林草局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市林草局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/政策法规.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市林草局/林业重点工程.py b/代码/网站监测/代码/市林草局/林业重点工程.py
index a9d2b5a..877d178 100644
--- a/代码/网站监测/代码/市林草局/林业重点工程.py
+++ b/代码/网站监测/代码/市林草局/林业重点工程.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ly.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["林业重点工程", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市林草局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市林草局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/林业重点工程.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市林草局/资源管理.py b/代码/网站监测/代码/市林草局/资源管理.py
index e085b17..cd7327d 100644
--- a/代码/网站监测/代码/市林草局/资源管理.py
+++ b/代码/网站监测/代码/市林草局/资源管理.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ly.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["资源管理", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市林草局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市林草局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/资源管理.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市林草局/造林绿化.py b/代码/网站监测/代码/市林草局/造林绿化.py
index 2784293..fbee349 100644
--- a/代码/网站监测/代码/市林草局/造林绿化.py
+++ b/代码/网站监测/代码/市林草局/造林绿化.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > div.bt-mod-wzpb-02 > a")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://ly.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["造林绿化", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市林草局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市林草局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/造林绿化.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市民政局/公示公告.py b/代码/网站监测/代码/市民政局/公示公告.py
new file mode 100644
index 0000000..284de54
--- /dev/null
+++ b/代码/网站监测/代码/市民政局/公示公告.py
@@ -0,0 +1,63 @@
+import csv
+import datetime
+import os
+import re
+
+from bs4 import BeautifulSoup
+import time
+from selenium import webdriver
+
+time1 = datetime.datetime(2021, 1, 20)
+time2 = datetime.datetime(2021, 2, 20)
+driver = webdriver.Chrome()
+series_name = []
+series_address = []
+series_telphone = []
+series_gps = []
+
+p = re.compile('<[^>]+>')
+
+
+def opendriver(num):
+ print(str(num)+"111111111111111111111111111")
+ driver.get('http://zwgk.dingxi.gov.cn/col/col12483/index.html?number=XA00705&uid=39777&pageNum=' + str(num))
+ time.sleep(5)
+ driver.implicitly_wait(10)
+ page = BeautifulSoup(driver.page_source, 'lxml')
+ data = page.select("div.scroll_main > div.scroll_wrap > div.scroll_cont > div.zfxxgk_zd2 > ul > div > div.default_pgContainer > li")
+ # print(data)
+ for d in data:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('')[1].split('')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
+ print(url, title, date_source)
+ driver.get('http://zwgk.dingxi.gov.cn'+url)
+ driver.implicitly_wait(10)
+ content = \
+ str(BeautifulSoup(driver.page_source, 'lxml').select("div.content > div.div_table_biaotou"))
+ print(content)
+ writer.writerow(
+ ["公示公告", date_source, title, url, p.sub("", content)])
+ time.sleep(3)
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
+
+
+if __name__ == '__main__':
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市民政局'
+ if not os.path.exists(path):
+ os.makedirs(path)
+ with open(path+"/公示公告.csv", "w", newline='', encoding='utf-8') as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["模块", "时间", "标题", "标题url", "正文"])
+ i = 1
+ while i < 9999:
+ opendriver(i)
+ i += 1
diff --git a/代码/网站监测/代码/市民政局/工作动态.py b/代码/网站监测/代码/市民政局/工作动态.py
index 16efdf5..3672147 100644
--- a/代码/网站监测/代码/市民政局/工作动态.py
+++ b/代码/网站监测/代码/市民政局/工作动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://mzj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["工作动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市民政局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市民政局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/工作动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市民政局/政策解读.py b/代码/网站监测/代码/市民政局/政策解读.py
new file mode 100644
index 0000000..f7adb06
--- /dev/null
+++ b/代码/网站监测/代码/市民政局/政策解读.py
@@ -0,0 +1,63 @@
+import csv
+import datetime
+import os
+import re
+
+from bs4 import BeautifulSoup
+import time
+from selenium import webdriver
+
+time1 = datetime.datetime(2021, 1, 20)
+time2 = datetime.datetime(2021, 2, 20)
+driver = webdriver.Chrome()
+series_name = []
+series_address = []
+series_telphone = []
+series_gps = []
+
+p = re.compile('<[^>]+>')
+
+
+def opendriver(num):
+ print(str(num)+"111111111111111111111111111")
+ driver.get('http://zwgk.dingxi.gov.cn/col/col12449/index.html?number=XA00605&uid=39777&pageNum=' + str(num))
+ time.sleep(5)
+ driver.implicitly_wait(10)
+ page = BeautifulSoup(driver.page_source, 'lxml')
+ data = page.select("div.scroll_main > div.scroll_wrap > div.scroll_cont > div.zfxxgk_zd2 > ul > div > div.default_pgContainer > li")
+ # print(data)
+ for d in data:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('')[1].split('')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
+ print(url, title, date_source)
+ driver.get('http://zwgk.dingxi.gov.cn'+url)
+ driver.implicitly_wait(10)
+ content = \
+ str(BeautifulSoup(driver.page_source, 'lxml').select("div.content > div.div_table_biaotou"))
+ print(content)
+ writer.writerow(
+ ["政策解读", date_source, title, url, p.sub("", content)])
+ time.sleep(3)
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
+
+
+if __name__ == '__main__':
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市民政局'
+ if not os.path.exists(path):
+ os.makedirs(path)
+ with open(path+"/政策解读.csv", "w", newline='', encoding='utf-8') as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["模块", "时间", "标题", "标题url", "正文"])
+ i = 1
+ while i < 9999:
+ opendriver(i)
+ i += 1
diff --git a/代码/网站监测/代码/市民政局/法律法规.py b/代码/网站监测/代码/市民政局/法律法规.py
index dc72ba3..4220d12 100644
--- a/代码/网站监测/代码/市民政局/法律法规.py
+++ b/代码/网站监测/代码/市民政局/法律法规.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get(url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["法律法规", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市民政局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市民政局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/法律法规.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市民政局/部门文件.py b/代码/网站监测/代码/市民政局/部门文件.py
new file mode 100644
index 0000000..3831584
--- /dev/null
+++ b/代码/网站监测/代码/市民政局/部门文件.py
@@ -0,0 +1,63 @@
+import csv
+import datetime
+import os
+import re
+
+from bs4 import BeautifulSoup
+import time
+from selenium import webdriver
+
+time1 = datetime.datetime(2021, 1, 20)
+time2 = datetime.datetime(2021, 2, 20)
+driver = webdriver.Chrome()
+series_name = []
+series_address = []
+series_telphone = []
+series_gps = []
+
+p = re.compile('<[^>]+>')
+
+
+def opendriver(num):
+ print(str(num)+"111111111111111111111111111")
+ driver.get('http://zwgk.dingxi.gov.cn/col/col12395/index.html?number=XA00506&uid=39777&pageNum=' + str(num))
+ time.sleep(5)
+ driver.implicitly_wait(10)
+ page = BeautifulSoup(driver.page_source, 'lxml')
+ data = page.select("div.scroll_main > div.scroll_wrap > div.scroll_cont > div.zfxxgk_zd2 > ul > div > div.default_pgContainer > li")
+ # print(data)
+ for d in data:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('')[1].split('')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
+ print(url, title, date_source)
+ driver.get('http://zwgk.dingxi.gov.cn'+url)
+ driver.implicitly_wait(10)
+ content = \
+ str(BeautifulSoup(driver.page_source, 'lxml').select("div.content > p"))
+ print(content)
+ writer.writerow(
+ ["部门文件", date_source, title, url, p.sub("", content)])
+ time.sleep(3)
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
+
+
+if __name__ == '__main__':
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市民政局'
+ if not os.path.exists(path):
+ os.makedirs(path)
+ with open(path+"/部门文件.csv", "w", newline='', encoding='utf-8') as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["模块", "时间", "标题", "标题url", "正文"])
+ i = 1
+ while i < 9999:
+ opendriver(i)
+ i += 1
diff --git a/代码/网站监测/代码/市科技局/主题教育.py b/代码/网站监测/代码/市科技局/主题教育.py
index 78bb1bf..f860526 100644
--- a/代码/网站监测/代码/市科技局/主题教育.py
+++ b/代码/网站监测/代码/市科技局/主题教育.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://kjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["主题教育", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市科技局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市科技局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/主题教育.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市科技局/信息服务.py b/代码/网站监测/代码/市科技局/信息服务.py
index 8a9418b..d0e8470 100644
--- a/代码/网站监测/代码/市科技局/信息服务.py
+++ b/代码/网站监测/代码/市科技局/信息服务.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://kjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["信息服务", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市科技局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市科技局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/信息服务.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市科技局/党的建设.py b/代码/网站监测/代码/市科技局/党的建设.py
new file mode 100644
index 0000000..02fc95d
--- /dev/null
+++ b/代码/网站监测/代码/市科技局/党的建设.py
@@ -0,0 +1,62 @@
+import csv
+import datetime
+import os
+import re
+
+from bs4 import BeautifulSoup
+import time
+from selenium import webdriver
+
+time1 = datetime.datetime(2021, 1, 20)
+time2 = datetime.datetime(2021, 2, 20)
+driver = webdriver.Chrome()
+series_name = []
+series_address = []
+series_telphone = []
+series_gps = []
+
+p = re.compile('<[^>]+>')
+
+
+def opendriver(num):
+ print(str(num)+"111111111111111111111111111")
+ driver.get('http://kjj.dingxi.gov.cn/col/col12096/index.html?uid=18995&pageNum=' + str(num))
+ time.sleep(5)
+ driver.implicitly_wait(10)
+ page = BeautifulSoup(driver.page_source, 'lxml')
+ data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
+ for d in data:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
+ print(url, title, date_source)
+ driver.get('http://kjj.dingxi.gov.cn' + url)
+ driver.implicitly_wait(10)
+ content = \
+ str(BeautifulSoup(driver.page_source, 'lxml').select("tr > td.bt_content > div > p"))
+ print(content)
+ writer.writerow(
+ ["党的建设", date_source, title, url, p.sub("", content)])
+ time.sleep(3)
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
+
+
+if __name__ == '__main__':
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市科技局'
+ if not os.path.exists(path):
+ os.makedirs(path)
+ with open(path+"/党的建设.csv", "w", newline='', encoding='utf-8') as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["模块", "时间", "标题", "标题url", "正文"])
+ i = 1
+ while i < 9999:
+ opendriver(i)
+ i += 1
diff --git a/代码/网站监测/代码/市科技局/公示公告.py b/代码/网站监测/代码/市科技局/公示公告.py
new file mode 100644
index 0000000..3cefc8a
--- /dev/null
+++ b/代码/网站监测/代码/市科技局/公示公告.py
@@ -0,0 +1,62 @@
+import csv
+import datetime
+import os
+import re
+
+from bs4 import BeautifulSoup
+import time
+from selenium import webdriver
+
+time1 = datetime.datetime(2021, 1, 20)
+time2 = datetime.datetime(2021, 2, 20)
+driver = webdriver.Chrome()
+series_name = []
+series_address = []
+series_telphone = []
+series_gps = []
+
+p = re.compile('<[^>]+>')
+
+
+def opendriver(num):
+ print(str(num)+"111111111111111111111111111")
+ driver.get('http://zwgk.dingxi.gov.cn/col/col12481/index.html?number=XA00703&uid=39703&pageNum=' + str(num))
+ time.sleep(5)
+ driver.implicitly_wait(10)
+ page = BeautifulSoup(driver.page_source, 'lxml')
+ data = page.select("div.scroll_main > div.scroll_wrap > div.scroll_cont > div.zfxxgk_zd2 > ul > div > div.default_pgContainer > li")
+ for d in data:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('')[1].split('')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
+ print(url, title, date_source)
+ driver.get('http://zwgk.dingxi.gov.cn' + url)
+ driver.implicitly_wait(10)
+ content = \
+ str(BeautifulSoup(driver.page_source, 'lxml').select("tr > td.bt_content > div > p"))
+ print(content)
+ writer.writerow(
+ ["公示公告", date_source, title, url, p.sub("", content)])
+ time.sleep(3)
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
+
+
+if __name__ == '__main__':
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市科技局'
+ if not os.path.exists(path):
+ os.makedirs(path)
+ with open(path+"/公示公告.csv", "w", newline='', encoding='utf-8') as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["模块", "时间", "标题", "标题url", "正文"])
+ i = 1
+ while i < 9999:
+ opendriver(i)
+ i += 1
diff --git a/代码/网站监测/代码/市科技局/工作动态.py b/代码/网站监测/代码/市科技局/工作动态.py
index 2cc907c..4261aea 100644
--- a/代码/网站监测/代码/市科技局/工作动态.py
+++ b/代码/网站监测/代码/市科技局/工作动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://kjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["工作动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市科技局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市科技局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/工作动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市科技局/政策法规.py b/代码/网站监测/代码/市科技局/政策法规.py
index 6dcc979..c6ed15c 100644
--- a/代码/网站监测/代码/市科技局/政策法规.py
+++ b/代码/网站监测/代码/市科技局/政策法规.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://kjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["政策法规", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市科技局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市科技局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/政策法规.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市科技局/政策解读.py b/代码/网站监测/代码/市科技局/政策解读.py
new file mode 100644
index 0000000..8a77723
--- /dev/null
+++ b/代码/网站监测/代码/市科技局/政策解读.py
@@ -0,0 +1,62 @@
+import csv
+import datetime
+import os
+import re
+
+from bs4 import BeautifulSoup
+import time
+from selenium import webdriver
+
+time1 = datetime.datetime(2021, 1, 20)
+time2 = datetime.datetime(2021, 2, 20)
+driver = webdriver.Chrome()
+series_name = []
+series_address = []
+series_telphone = []
+series_gps = []
+
+p = re.compile('<[^>]+>')
+
+
+def opendriver(num):
+ print(str(num)+"111111111111111111111111111")
+ driver.get('http://zwgk.dingxi.gov.cn/col/col12447/index.html?number=XA00603&uid=39703&pageNum=' + str(num))
+ time.sleep(5)
+ driver.implicitly_wait(10)
+ page = BeautifulSoup(driver.page_source, 'lxml')
+ data = page.select("div.scroll_main > div.scroll_wrap > div.scroll_cont > div.zfxxgk_zd2 > ul > div > div.default_pgContainer > li")
+ for d in data:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('')[1].split('')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
+ print(url, title, date_source)
+ driver.get('http://zwgk.dingxi.gov.cn' + url)
+ driver.implicitly_wait(10)
+ content = \
+ str(BeautifulSoup(driver.page_source, 'lxml').select("tr > td.bt_content > div > p"))
+ print(content)
+ writer.writerow(
+ ["政策解读", date_source, title, url, p.sub("", content)])
+ time.sleep(3)
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
+
+
+if __name__ == '__main__':
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市科技局'
+ if not os.path.exists(path):
+ os.makedirs(path)
+ with open(path+"/政策解读.csv", "w", newline='', encoding='utf-8') as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["模块", "时间", "标题", "标题url", "正文"])
+ i = 1
+ while i < 9999:
+ opendriver(i)
+ i += 1
diff --git a/代码/网站监测/代码/市科技局/科技信息.py b/代码/网站监测/代码/市科技局/科技信息.py
index e15dca8..3704fbc 100644
--- a/代码/网站监测/代码/市科技局/科技信息.py
+++ b/代码/网站监测/代码/市科技局/科技信息.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://kjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["科技信息", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市科技局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市科技局'
if not os.path.exists(path):
os.makedirs(path)
diff --git a/代码/网站监测/代码/市科技局/科技服务.py b/代码/网站监测/代码/市科技局/科技服务.py
index 5c5f5d9..d2e0432 100644
--- a/代码/网站监测/代码/市科技局/科技服务.py
+++ b/代码/网站监测/代码/市科技局/科技服务.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://kjj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["科技服务", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市科技局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市科技局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/科技服务.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市科技局/部门文件.py b/代码/网站监测/代码/市科技局/部门文件.py
new file mode 100644
index 0000000..de9795e
--- /dev/null
+++ b/代码/网站监测/代码/市科技局/部门文件.py
@@ -0,0 +1,62 @@
+import csv
+import datetime
+import os
+import re
+
+from bs4 import BeautifulSoup
+import time
+from selenium import webdriver
+
+time1 = datetime.datetime(2021, 1, 20)
+time2 = datetime.datetime(2021, 2, 20)
+driver = webdriver.Chrome()
+series_name = []
+series_address = []
+series_telphone = []
+series_gps = []
+
+p = re.compile('<[^>]+>')
+
+
+def opendriver(num):
+ print(str(num)+"111111111111111111111111111")
+ driver.get('http://zwgk.dingxi.gov.cn/col/col12393/index.html?number=XA00503&uid=39703&pageNum=' + str(num))
+ time.sleep(5)
+ driver.implicitly_wait(10)
+ page = BeautifulSoup(driver.page_source, 'lxml')
+ data = page.select("div.scroll_main > div.scroll_wrap > div.scroll_cont > div.zfxxgk_zd2 > ul > div > div.default_pgContainer > li")
+ for d in data:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('')[1].split('')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
+ print(url, title, date_source)
+ driver.get('http://zwgk.dingxi.gov.cn' + url)
+ driver.implicitly_wait(10)
+ content = \
+ str(BeautifulSoup(driver.page_source, 'lxml').select("tr > td.bt_content > div > p"))
+ print(content)
+ writer.writerow(
+ ["部门文件", date_source, title, url, p.sub("", content)])
+ time.sleep(3)
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
+
+
+if __name__ == '__main__':
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市科技局'
+ if not os.path.exists(path):
+ os.makedirs(path)
+ with open(path+"/部门文件.csv", "w", newline='', encoding='utf-8') as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(["模块", "时间", "标题", "标题url", "正文"])
+ i = 1
+ while i < 9999:
+ opendriver(i)
+ i += 1
diff --git a/代码/网站监测/代码/市自然资源局/工作动态.py b/代码/网站监测/代码/市自然资源局/工作动态.py
index 92fdf78..631053e 100644
--- a/代码/网站监测/代码/市自然资源局/工作动态.py
+++ b/代码/网站监测/代码/市自然资源局/工作动态.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://zrzyj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["工作动态", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市自然资源局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市自然资源局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/工作动态.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/代码/市自然资源局/规划公示.py b/代码/网站监测/代码/市自然资源局/规划公示.py
index e704158..25e2a0e 100644
--- a/代码/网站监测/代码/市自然资源局/规划公示.py
+++ b/代码/网站监测/代码/市自然资源局/规划公示.py
@@ -27,14 +27,14 @@ def opendriver(num):
data = page.select("div.dx_lm_right > ul > div > div.default_pgContainer > li")
# print(data)
for d in data:
- # try:
- url = str(d).split('')[0]
- date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
- print(date_source)
- s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
- print(s)
- if s <= time2:
+ try:
+ url = str(d).split('')[0]
+ date_source = str(d).split('style="font-size:14px; float:right;">[')[1].split(']')[0]
+ print(date_source)
+ s = datetime.datetime.strptime(date_source, '%Y-%m-%d')
+ print(s)
+ # if s <= time2:
print(url, title, date_source)
driver.get('http://zrzyj.dingxi.gov.cn' + url)
driver.implicitly_wait(10)
@@ -44,14 +44,14 @@ def opendriver(num):
writer.writerow(
["规划公示", date_source, title, url, p.sub("", content)])
time.sleep(3)
- if s < time1:
- driver.close()
- # except:
- # continue
+ # if s < time1:
+ # driver.close()
+ except:
+ continue
if __name__ == '__main__':
- path = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据/市自然资源局'
+ path = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据/市自然资源局'
if not os.path.exists(path):
os.makedirs(path)
with open(path+"/规划公示.csv", "w", newline='', encoding='utf-8') as csvfile:
diff --git a/代码/网站监测/网站监测正文.py b/代码/网站监测/网站监测正文.py
index 816ad96..2a8b753 100644
--- a/代码/网站监测/网站监测正文.py
+++ b/代码/网站监测/网站监测正文.py
@@ -1,7 +1,7 @@
import datetime
import os
import csv
-path1 = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据'
+path1 = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据'
files1 = os.listdir(path1)
for file1 in files1:
print(file1)
@@ -9,11 +9,11 @@ for file1 in files1:
path = path1 + '/' + file1
files = os.listdir(path)
- time = datetime.datetime(2020,10,1)
- time1 = datetime.datetime(2020,10,31)
+ time = datetime.datetime(2019,1,1)
+ time1 = datetime.datetime(2021,3,1)
flag = time1>time
print(time,time1,flag)
- with open("D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据整理/正文/%s.csv" %(file1), "w",newline='',encoding='utf-8') as csvfile:
+ with open("D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据整理/正文/%s.csv" %(file1), "w",newline='',encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["站点","模块", "标题","时间","正文"])
for file in files:
diff --git a/代码/网站监测/网站监测正文汇总.py b/代码/网站监测/网站监测正文汇总.py
index 0a713ee..4cd2b37 100644
--- a/代码/网站监测/网站监测正文汇总.py
+++ b/代码/网站监测/网站监测正文汇总.py
@@ -1,9 +1,9 @@
import datetime
import os
import csv
-path1 = 'D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据'
+path1 = 'D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据'
files1 = os.listdir(path1)
-with open("D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/数据整理/网站监测正文汇总.csv", "a", newline='', encoding='utf-8') as csvfile:
+with open("D:/2021/舆论监测平台_网站监测/新媒体监测数据/定西/网站监测/数据整理/网站监测正文汇总.csv", "a", newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["站点", "模块", "标题", "发文时间", "正文"])
for file1 in files1:
@@ -12,8 +12,8 @@ with open("D:/2021/舆论监测平台/新媒体监测数据/定西/网站监测/
path = path1 + '/' + file1
files = os.listdir(path)
- time1 = datetime.datetime(2021, 1, 20)
- time2 = datetime.datetime(2021, 2, 20)
+ time1 = datetime.datetime(2019, 1, 1)
+ time2 = datetime.datetime(2021, 3, 1)
for file in files:
s = []