newmediamonitoring/新媒体更新脚本/查找单位行政区划更改.py

60 lines
2.0 KiB
Python
Raw Normal View History

2021-01-07 10:51:58 +00:00
# 更新新媒体脚本,结果为有出入的新媒体列表。
'''
1,将excel转为csv使用utf-8编码集
2获取csv内容
3与数据库中的数据逐项对比
'''
import csv
import uuid
import psycopg2 as psycopg2
# code = None
# alias = None
# attention = None
# remark = None
# identificationcode = None
# function = None
# articleurl = None
# weixinid = None
# type = None
G2 = 'host=210.77.68.250 port=5432 dbname=newmediaDB3 user=newmedia password=newmedia2020!@#'
def get_csv_weixin(path):
2021-01-11 13:08:52 +00:00
with open("D:/2020/舆论监测平台/数据/数据库更新/单位名称对比结果.csv", "w", newline='',encoding='utf-8') as csvfile:
2021-01-07 10:51:58 +00:00
writer = csv.writer(csvfile)
o_code_list = []
csv_code_list = []
with open(path, encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
for r in reader:
if r[1] != '单位全称':
csv_code_list.append(r[1])
with psycopg2.connect(G2) as connection:
with connection.cursor() as cursor:
try:
cursor.execute(
"select * from dashboard_organization")
connection.commit()
for c in cursor:
o_code_list.append(c[1])
except Exception as e:
print(e)
print(len(o_code_list),len(list(set(csv_code_list))))
print( len(list(set( o_code_list ) ^ set( csv_code_list ))))
for i in list(set( o_code_list ) ^ set( csv_code_list )):
if i in o_code_list and i not in csv_code_list:
writer.writerow(
[i,'删除'])
elif i in csv_code_list and i not in o_code_list:
writer.writerow(
[i, '新增'])
else:
writer.writerow(
[i, '异常'])
if __name__ == '__main__':
2021-01-11 13:08:52 +00:00
get_csv_weixin('D:/2020/舆论监测平台/数据/Task_All.csv')