39 lines
1.4 KiB
Python
39 lines
1.4 KiB
Python
|
import csv
|
|||
|
import os
|
|||
|
|
|||
|
file_path = "D:/2021/新媒体/1月份(10-20)weibo/1月份(10-20)weibo"
|
|||
|
path = os.listdir(file_path)
|
|||
|
def get_weibo_cuominzi(cuominzi):
|
|||
|
with open('D:/2021/新媒体/weibo错敏字3.csv','w', newline='',encoding='utf-8') as csvfile:
|
|||
|
writer = csv.writer(csvfile)
|
|||
|
writer.writerow(
|
|||
|
[ "错敏字","id","文章url", "发布时间","位置"])
|
|||
|
for file in path:
|
|||
|
f_p = os.listdir(file_path+'/'+file)
|
|||
|
for f in f_p:
|
|||
|
with open(file_path+'/'+file + '/'+f,encoding='utf8') as csvfile:
|
|||
|
reader_wb = csv.reader(csvfile)
|
|||
|
for r_w in reader_wb:
|
|||
|
for c in cuominzi:
|
|||
|
print(c)
|
|||
|
if str(c[0]) in str(r_w[1]):
|
|||
|
print(str(r_w[1]).index(str(c[0])))
|
|||
|
writer.writerow([c[0], r_w[0], r_w[2], r_w[8],
|
|||
|
str(r_w[1])[str(r_w[1]).index(str(c[0])) - 10:str(r_w[1]).index(str(c[0])) + 10]])
|
|||
|
else:
|
|||
|
print(r_w[1])
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
if __name__ == '__main__':
|
|||
|
cuominzi = []
|
|||
|
with open('D:/2021/新媒体/错敏字.csv') as csvfile:
|
|||
|
reader = csv.reader(csvfile)
|
|||
|
|
|||
|
for r in reader:
|
|||
|
if r[0] != '错敏字' and r[0] != '':
|
|||
|
cuominzi.append(r)
|
|||
|
print(cuominzi)
|
|||
|
get_weibo_cuominzi(cuominzi)
|