newmediamonitoring/update_newmedia.py

99 lines
4.9 KiB
Python
Raw Normal View History

2020-12-05 06:52:28 +00:00
#更新新媒体脚本,结果为有出入的新媒体列表。
'''
1,将excel转为csv使用utf-8编码集
2获取csv内容
3与数据库中的数据逐项对比
'''
import csv
2020-12-05 16:39:07 +00:00
import uuid
2020-12-05 06:52:28 +00:00
2020-12-05 16:39:07 +00:00
import psycopg2 as psycopg2
2020-12-05 06:52:28 +00:00
2020-12-05 16:39:07 +00:00
# code = None
# alias = None
# attention = None
# remark = None
# identificationcode = None
# function = None
# articleurl = None
# weixinid = None
# type = None
G2 = 'host=210.77.68.250 port=5432 dbname=newmediaDB3 user=newmedia password=newmedia2020!@#'
2020-12-05 06:52:28 +00:00
2020-12-05 16:39:07 +00:00
def get_csv_weixin(path):
with open("D:/2020/新媒体监测/新媒体对比结果_其他.csv", "w", newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(
["序号", "冲突账号"])
2020-12-05 06:52:28 +00:00
2020-12-05 16:39:07 +00:00
with psycopg2.connect(G2) as connection:
with open(path, encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
for r in reader:
if r[1] != '单位全称':
if '头条' not in r[7] and '微信' not in r[7] and '微博' not in r[7] and '抖音' not in r[7]:
name = r[1]
code = r[4]
identificationcode = r[3]
function = r[8]
articleurl = r[12]
biz = r[13]
province = None
cities = None
district = None
d_name = None
c_name = None
p_name = None
print(r)
try:
with connection.cursor() as cursor_p:
cursor_p.execute(
"select * from dashboard_area_code_2020 where name = '%s'" % (r[9]))
for p in cursor_p:
province = p[1]
p_name = p[2]
with connection.cursor() as cursor_c:
cursor_c.execute(
"select * from dashboard_area_code_2020 where name = '%s'" % (r[10]))
for c in cursor_c:
cities = c[1]
c_name = c[2]
with connection.cursor() as cursor_d:
cursor_d.execute(
"select * from dashboard_area_code_2020 where name = '%s'" % (r[11]))
for d in cursor_d:
district = d[1]
d_name = d[2]
print(code)
with connection.cursor() as cursor:
cursor.execute(
"select * from dashboard_qita where code = '%s'" % (code))
# connection.commit()
for c in cursor:
print(str(c[10]) + "1111111111111111111111")
with connection.cursor() as cursor2:
cursor2.execute(
"select * from dashboard_organization where id = '%s'" % (c[10]))
for c2 in cursor2:
print(str(c2[0]) + "55555555555555555555555")
if str(name) == str(c2[1]) and str(province) == str(c2[3]) and str(
cities) == str(c2[4]) and str(district) == str(c2[5]) and str(
code) == str(c[1]) and str(identificationcode) == str(
c[13]) and str(biz) == str(c[9]):
print(c2)
else:
writer.writerow(
[r[0], name, code, identificationcode, biz, p_name, c_name,
d_name])
writer.writerow(
[r[0], c2[1], c[1], c[13], c[9], c2[3], c2[4], c2[5]])
except:
writer.writerow(
[r[0], name, code, identificationcode, biz, p_name, c_name,
d_name])
2020-12-05 06:52:28 +00:00
if __name__ == '__main__':
2020-12-05 16:39:07 +00:00
get_csv_weixin('D:/2020/新媒体监测/TASK_ALL.csv')