newmediamonitoring/敏感词查找_weibo.py

39 lines
1.4 KiB
Python
Raw Normal View History

2021-01-30 13:42:17 +00:00
import csv
import os
file_path = "D:/2021/新媒体/1月份10-20weibo/1月份10-20weibo"
path = os.listdir(file_path)
def get_weibo_cuominzi(cuominzi):
with open('D:/2021/新媒体/weibo错敏字3.csv','w', newline='',encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(
[ "错敏字","id","文章url", "发布时间","位置"])
for file in path:
f_p = os.listdir(file_path+'/'+file)
for f in f_p:
with open(file_path+'/'+file + '/'+f,encoding='utf8') as csvfile:
reader_wb = csv.reader(csvfile)
for r_w in reader_wb:
for c in cuominzi:
print(c)
if str(c[0]) in str(r_w[1]):
print(str(r_w[1]).index(str(c[0])))
writer.writerow([c[0], r_w[0], r_w[2], r_w[8],
str(r_w[1])[str(r_w[1]).index(str(c[0])) - 10:str(r_w[1]).index(str(c[0])) + 10]])
else:
print(r_w[1])
if __name__ == '__main__':
cuominzi = []
with open('D:/2021/新媒体/错敏字.csv') as csvfile:
reader = csv.reader(csvfile)
for r in reader:
if r[0] != '错敏字' and r[0] != '':
cuominzi.append(r)
print(cuominzi)
get_weibo_cuominzi(cuominzi)