MonitoringOfPublicOpinion/代码/微信/庆阳/镇原县/镇原县微信评论统计.py

39 lines
1.8 KiB
Python
Raw Normal View History

2020-11-08 10:52:05 +00:00
import csv
import os
from bs4 import BeautifulSoup
s = []
path = "D:/2020/舆论监测平台/庆阳/数据/镇原县/html"
with open("D:/2020/舆论监测平台/庆阳/数据整理/镇原县/镇原县微信评论统计.csv", "w",newline='') as csvfile:
writer = csv.writer(csvfile)
# 先写入columns_name
writer.writerow(["评论","用户","时间", "公众号","标题"])
files= os.listdir(path)
for file in files:
fi = os.listdir(path+"/"+file)
for f in fi:
p = os.listdir(path+"/"+file+"/"+f)
for x in p:
# print(x.split('.html')[0].split('_')[2])
# print(path+"/"+file+"/"+f+"/"+x)
with open(path+"/"+file+"/"+f+"/"+x,'r',encoding='utf-8',newline='') as wb_data:
try:
Soup = BeautifulSoup(wb_data, 'lxml')
comment = Soup.select("body > div.rich_media > div.msgBox > div.msg > div.msgBody ")
if len(comment) > 0:
for c in comment:
print(c)
username = str(c).split('<p class="userName">')[1].split('<span>')[0]
date = str(c).split('<p class="userName">')[1].split('<span>')[1].split('</span>')[0]
co = str(c).split('<p class="replyBody">')[1].split('<span class="reply_like_num">')[
0].split('</p>')[0]
title = x.split('.html')[0].split('_')[2]
gongzhonghao = x.split('.html')[0].split('_')[0]
writer.writerow([co, username, date, gongzhonghao, title])
except:
pass