This commit is contained in:
commit
1368bf1f0f
|
@ -0,0 +1,759 @@
|
|||
# 1. 打开监测任务表格
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os, glob, re
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import FuncFormatter
|
||||
import datetime
|
||||
#word toc
|
||||
import win32com
|
||||
import win32com.client as win32
|
||||
from win32com.client import constants
|
||||
#pdf
|
||||
from pikepdf import Pdf,Page,Rectangle
|
||||
#word
|
||||
from docxtpl import DocxTemplate
|
||||
from docxtpl import InlineImage
|
||||
from docx.shared import Mm
|
||||
|
||||
def addStamp(target_pdf_path, watermark_pdf_path, output_pdf_path):
|
||||
#选择需要添加水印的pdf文件
|
||||
target_pdf = Pdf.open(target_pdf_path)
|
||||
#读取水印pdf文件并提取水印
|
||||
watermark_pdf = Pdf.open(watermark_pdf_path)
|
||||
watermark_page_seal = watermark_pdf.pages[0]
|
||||
watermark_page_wyt = watermark_pdf.pages[1]
|
||||
|
||||
#加公章
|
||||
x=240; y=110; w=115; h=115
|
||||
target_pdf.pages[0].add_overlay(watermark_page_seal, Rectangle(x,y, x+w, y+h))
|
||||
|
||||
#加签字
|
||||
x=163; y=573; w=85; h=50
|
||||
target_pdf.pages[2].add_overlay(watermark_page_wyt, Rectangle(x,y, x+w, y+h))
|
||||
|
||||
#target_pdf.save(target_pdf_path[:6] + '_已签章.pdf')
|
||||
target_pdf.save(output_pdf_path)
|
||||
|
||||
|
||||
def update_toc(docx_file): # word路径
|
||||
word = win32com.client.DispatchEx("Word.Application")
|
||||
word.Visible = 0 # 设置应用可见
|
||||
word.DisplayAlerts = 0
|
||||
doc = word.Documents.Open(docx_file) # 使用微软office打开word
|
||||
toc_count = doc.TablesOfContents.Count # 判断是否有无目录,如果数量是1则代表已经有目录了
|
||||
if toc_count == 0:
|
||||
print("无目录")
|
||||
'''
|
||||
for i, p in enumerate(doc.Paragraphs): # 遍历word中的内容
|
||||
if '目录' in p.Range.Text: # 用于指定目录页面,看下面提示
|
||||
p.Range.InsertParagraphAfter() # 添加新的段落
|
||||
p.Range.InsertAfter("---")
|
||||
parag_range = doc.Paragraphs(i+2).Range
|
||||
doc.TablesOfContents.Add(Range=parag_range,
|
||||
UseHeadingStyles=True,
|
||||
LowerHeadingLevel=2) # 生成目录对象
|
||||
'''
|
||||
elif toc_count == 1:
|
||||
toc = doc.TablesOfContents(1)
|
||||
#toc.Update() # 更新整个目录
|
||||
toc.UpdatePageNumbers() # 更新目录页码
|
||||
|
||||
doc.SaveAs(docx_file.replace('.docx', '.pdf'), FileFormat=17)
|
||||
doc.Close(SaveChanges=True)
|
||||
word.Quit()
|
||||
|
||||
def toDate(strDT):
|
||||
dt = pd.to_datetime(strDT, errors='coerce')
|
||||
dts = ''
|
||||
# print('-+-+:', type(dt), dt)
|
||||
if not pd.isna(dt):
|
||||
dts = dt.strftime('%m-%d')
|
||||
return dts
|
||||
|
||||
# word模板替换
|
||||
def temp_word(tmep_path, word_apth, dContext, pathImage, city):
|
||||
tpl = DocxTemplate(tmep_path)
|
||||
dC = {'annulusMediaCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusMediaCount.png'), width=Mm(120)),
|
||||
'annulusCountyCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyCount.png'),
|
||||
width=Mm(120)),
|
||||
'annulusCountyArticle': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyArticle.png'),
|
||||
width=Mm(120)),
|
||||
'annulusResult': InlineImage(tpl, os.path.join(pathImage, city + 'annulusResult.png'), width=Mm(120)),
|
||||
'barCountyRatio': InlineImage(tpl, os.path.join(pathImage, city + 'barCountyRatio.png'), width=Mm(120))
|
||||
}
|
||||
|
||||
dContext.update(dC)
|
||||
tpl.render(dContext)
|
||||
tpl.save(word_apth)
|
||||
|
||||
|
||||
# 画柱状图
|
||||
def drawBar(data, recipe, title='', fn=''):
|
||||
plt.figure(figsize=(6, 4))
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
counties = recipe
|
||||
countyRates = data
|
||||
|
||||
plt.bar(counties, countyRates, width=0.5)
|
||||
plt.xticks(counties, counties, rotation=35)
|
||||
plt.ylim((0, 1))
|
||||
|
||||
def to_percent(temp, position):
|
||||
return '%2.0f' % (100 * temp) + '%'
|
||||
|
||||
plt.gca().yaxis.set_major_formatter(FuncFormatter(to_percent))
|
||||
plt.title(title, fontsize=16)
|
||||
plt.tight_layout()
|
||||
plt.savefig(fn)
|
||||
# plt.show()
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
|
||||
# 画环状图
|
||||
def drawAnnulus(data, recipe, title='', fn=''):
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
xxx = 8 # 画布x,长
|
||||
yyy = 4 # 画布y,高
|
||||
nnncol = 1 # 图例列数
|
||||
fs = 'medium' ## xx--small;x-small;small;medium;large;x-large;xx-large
|
||||
|
||||
# if title == '政务新媒体账号类型':
|
||||
if len(recipe) > 20:
|
||||
if len(recipe) > 40:
|
||||
xxx = 16
|
||||
nnncol = 4
|
||||
fs = 'small'
|
||||
else:
|
||||
xxx = 16
|
||||
nnncol = 2
|
||||
fs = 'small'
|
||||
|
||||
fig, ax = plt.subplots(figsize=(xxx, yyy), subplot_kw=dict(aspect="equal"))
|
||||
|
||||
"""
|
||||
设置圆环宽度,绘图方向,起始角度
|
||||
|
||||
参数wedgeprops以字典形式传递,设置饼图边界的相关属性,例如圆环宽度0.5
|
||||
饼状图默认从x轴正向沿逆时针绘图,参数startangle可指定新的角(例如负40度)度起画
|
||||
"""
|
||||
wedges, texts = ax.pie(data, radius=1.1, wedgeprops=dict(width=0.4), startangle=0) # 画环,返回扇形列表和每个标注文本对象(坐标,文字,属性)
|
||||
|
||||
if 1:
|
||||
x = 1.2
|
||||
if title == '政务新媒体监测结果':
|
||||
x = 1.0
|
||||
plt.legend(labels=recipe, loc="center left", bbox_to_anchor=(x, 0.5), borderaxespad=0., ncol=nnncol,
|
||||
fontsize=fs) # , ncol=3
|
||||
if len(title) > 0:
|
||||
ax.set_title(title, fontsize=16, fontweight='heavy') # , x=0.6
|
||||
|
||||
plt.tight_layout()
|
||||
if len(fn) > 0:
|
||||
plt.savefig(fn)
|
||||
# plt.show()
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
# summaryCity(city, dfc, dfcw, dfcs, context, strfnTemplate, os.path.join(strPathVerified,'Reports', city+'.docx'), strPathVerified )
|
||||
|
||||
|
||||
# 汇总市州数据,
|
||||
# 市州名称, 监测数据, cbz数据, mgc数据, context(编号、名称), word模板文件名称, 输出word文件名称, 临时文件目录
|
||||
# 需要传入模板文件,数据、错别字、敏感词,单位名称等
|
||||
def summaryCity(info, city, df, dfW, dfS, fnTemplate, fnReport, dirTemp):
|
||||
dCityClient = {
|
||||
'甘肃省': "甘肃省人民政府办公厅",
|
||||
'省直部门': "甘肃省人民政府办公厅",
|
||||
'白银市': "白银市人民政府办公室",
|
||||
'定西市': "定西市人民政府办公室",
|
||||
'临夏回族自治州': "临夏回族自治州人民政府办公室",
|
||||
'平凉市': "中共平凉市委网络安全和信息化委员会办公室",
|
||||
"庆阳市": "庆阳市电子政务与信息资源管理办公室",
|
||||
'庆阳市华池县': "华池县人民政府办公室",
|
||||
'庆阳市宁县': "宁县人民政府办公室",
|
||||
"庆阳市镇原县": "镇原县人民政府办公室",
|
||||
"酒泉市": "酒泉市人民政府办公室",
|
||||
"天水市": "天水市人民政府办公室",
|
||||
"武威市": "武威市人民政府办公室",
|
||||
"金昌市": "金昌市人民政府办公室",
|
||||
"嘉峪关市": "嘉峪关市人民政府办公室",
|
||||
"兰州新区": "兰州新区管委会办公室",
|
||||
"陇南市": "陇南市政务服务中心",
|
||||
"张掖市": "张掖市政务服务中心",
|
||||
"甘南藏族自治州": "甘南藏族自治州政务服务中心",
|
||||
"兰州市": "兰州市政务服务中心",
|
||||
"陇南市": "陇南市政务服务中心",
|
||||
}
|
||||
dHavingSubordinateUnits = {'甘肃省': True, '白银市': True, '定西市': True,
|
||||
'临夏回族自治州': True, '平凉市': True, "庆阳市": True, "酒泉市": True, "天水市": True,
|
||||
"陇南市": True, "张掖市": True, "甘南藏族自治州": True, "兰州市": True, "陇南市": True,
|
||||
"武威市": True, "金昌市": True,
|
||||
'省直部门': False, "兰州新区": False, '庆阳市华池县': False,
|
||||
'庆阳市宁县': False, "庆阳市镇原县": False, "嘉峪关市": False}
|
||||
print("----------------" + city + "----------------")
|
||||
# 报告编号、委托单位
|
||||
strID = "%02d" % (list(dCityClient).index(city))
|
||||
# print(strID)
|
||||
context = {
|
||||
"city": city,
|
||||
"client": dCityClient[city],
|
||||
"reportid": strID + info['serialNum'],
|
||||
'havingSubordinateUnits': dHavingSubordinateUnits[city],
|
||||
'havingBelowStandard': True,
|
||||
'havingUpStandard': True,
|
||||
'havingCbz': True,
|
||||
'havingMgc': True
|
||||
}
|
||||
context.update(info)
|
||||
|
||||
subordinate = '区县/地方部门'
|
||||
subordinateName = '县区'
|
||||
# 区县数据筛选
|
||||
if "庆阳市" in city:
|
||||
if "华池县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '华池县')].copy()
|
||||
|
||||
elif "宁县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '宁县')].copy()
|
||||
elif "镇原县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '镇原县')].copy()
|
||||
else:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')].copy()
|
||||
# & (df['区县/地方部门']!='华池县')
|
||||
# & (df['区县/地方部门']!='宁县')
|
||||
# & (df['区县/地方部门']!='镇原县') ].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == '庆阳市'].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == '庆阳市'].copy()
|
||||
elif "甘肃" in city :
|
||||
#dfc = df.copy()
|
||||
#dfcw = dfW.copy()
|
||||
#dfcs = dfS.copy()
|
||||
|
||||
cities = {'白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
|
||||
'嘉峪关市', '陇南市', '张掖市'}
|
||||
|
||||
dfc = df.loc[ df['市/省局'].isin(cities) ].copy()
|
||||
dfcw = dfW.loc[ dfW['市州'].isin(cities) ].copy()
|
||||
dfcs = dfS.loc[ dfS['市州'].isin(cities) ].copy()
|
||||
subordinate = '市/省局'
|
||||
subordinateName = '市州'
|
||||
|
||||
elif "省直部门" in city :
|
||||
dfc = df.loc[df['市/省局'] == city].copy()
|
||||
#dfcw = dfW.loc[dfW['市州'] == dictSC[city]].copy()
|
||||
#dfcs = dfS.loc[dfS['市州'] == dictSC[city]].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == city].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == city].copy()
|
||||
|
||||
else:
|
||||
dfc = df.loc[(df['市/省局'] == city)].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == city].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == city].copy()
|
||||
|
||||
# -----------------------
|
||||
# 统计结果分析
|
||||
|
||||
dCity = {'1': '2'}
|
||||
#
|
||||
# 县区-监测结果 统计
|
||||
#
|
||||
|
||||
# 透视表, 按县区统计各个监测结果账号数量
|
||||
dfCountyAccount = pd.pivot_table(dfc, index=[subordinate], columns=['监测结果'], values=['账号名称'], aggfunc='count',
|
||||
fill_value='', margins=True)
|
||||
dfCountyAccount.columns = dfCountyAccount.columns.droplevel(0)
|
||||
# 准备模板中的表格
|
||||
tt3_list = []
|
||||
for index, row in dfCountyAccount.iterrows():
|
||||
county = ''
|
||||
if index == 'All':
|
||||
county = '总 计'
|
||||
else:
|
||||
county = index
|
||||
hg = ''
|
||||
u2w = ''
|
||||
un = ''
|
||||
count = ''
|
||||
if '合格' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['合格'], str):
|
||||
hg = int(row['合格'])
|
||||
if '监测期间未更新' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['监测期间未更新'], str):
|
||||
un = int(row['监测期间未更新'])
|
||||
if '超过两周未更新' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['超过两周未更新'], str):
|
||||
u2w = int(row['超过两周未更新'])
|
||||
if 'All' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['All'], str):
|
||||
count = int(row['All'])
|
||||
|
||||
tt3_a = {'county': county, 'hg': hg, 'u2w': u2w, 'un': un, 'count': count}
|
||||
tt3_list.append(tt3_a)
|
||||
context['tt3_contents'] = tt3_list
|
||||
# dfCountyAccount.to_excel(dirTask+strPathCity+'县区监测结果.xlsx')
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按媒体类型统计
|
||||
#
|
||||
# 透视表, 按账号类型统计账号数量
|
||||
dfMedia = pd.pivot_table(dfc, index=['账号类型'], values=['账号名称'], aggfunc='count', fill_value='', margins=True)
|
||||
# 提取该市账号数量
|
||||
dCity['nmCount'] = dfMedia.loc['All', '账号名称']
|
||||
print(' 监测账号数:', dCity['nmCount'])
|
||||
# 提取 账号类型-数量 , 拼成文本串
|
||||
dfMedia = dfMedia.sort_values(by='账号名称', ascending=False)
|
||||
lTableCs1 = []
|
||||
strMedia = ''
|
||||
i = 0
|
||||
tt1_list = []
|
||||
for m in dfMedia.index.tolist()[1:]: # 第一个是总数,不用取
|
||||
strNum = str(dfMedia.iloc[:, 0].tolist()[1:][i])
|
||||
strMedia = strMedia + m + strNum + '个,'
|
||||
tt1_a = {'type': m, 'count': strNum}
|
||||
tt1_list.append(tt1_a)
|
||||
i = i + 1
|
||||
dCity['sMediaCount'] = strMedia[:-1].rstrip(',')
|
||||
context.update({'tt1_contents': tt1_list})
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按县区-更新次数 统计
|
||||
#
|
||||
dfCountyArticle = pd.pivot_table(dfc, index=[subordinate], values=['更新次数'], aggfunc='sum', fill_value='',
|
||||
margins=True)
|
||||
dfCountyArticle = dfCountyArticle.sort_values(by='更新次数', ascending=False).copy()
|
||||
dCity['cityArticleCount'] = "%d" % dfCountyArticle.iloc[0, 0]
|
||||
dCity['countyMostArticle'] = dfCountyArticle.index.tolist()[1]
|
||||
dCity['countyMostArticleCount'] = "%d" % dfCountyArticle.iloc[1, 0]
|
||||
strCountyArticle = ''
|
||||
iiii = 0
|
||||
|
||||
if len(dfCountyArticle.index)>2:
|
||||
for cccc in dfCountyArticle.index.tolist()[1:]:
|
||||
iiii = iiii + 1
|
||||
strCountyArticle = strCountyArticle + cccc + "%d" % dfCountyArticle.iloc[iiii, 0] + "次,"
|
||||
strCountyArticle = strCountyArticle.rstrip(',')
|
||||
|
||||
dCity['sCountyArticles'] = ',按管理矩阵统计,' + strCountyArticle
|
||||
|
||||
|
||||
# 市各县区监测结果按总数排序,
|
||||
dfCountyAccount.loc[:, '合格'] = dfCountyAccount['合格'].astype('int')
|
||||
dfCountyAccount = dfCountyAccount.sort_values(by='All', ascending=False).copy()
|
||||
# 计算合格率
|
||||
dfCountyAccount.eval('rate = 合格 / All ', inplace=True)
|
||||
dfResult = dfCountyAccount.copy()
|
||||
# 提取city合格率
|
||||
dCity['cityRatio'] = "{:.1%}".format(dfCountyAccount.loc['All', 'rate'])
|
||||
print(' 合格率:', dCity['cityRatio'])
|
||||
|
||||
# 导出文件
|
||||
# dfCountyAccount.to_excel(dirIntermediate+sFileBase+'县区合格率.xlsx')
|
||||
|
||||
# dfMedia = dfMedia.drop(['All'])
|
||||
# 提取县区名称,县区账号数, 县区合格率,转成字符串
|
||||
dfCountyAccount = dfCountyAccount.drop(['All']) # 删除"All"行
|
||||
counties = dfCountyAccount.index.tolist()
|
||||
countyCounts = dfCountyAccount['All'].values.tolist()
|
||||
countyHeges = dfCountyAccount['合格'].values.tolist()
|
||||
|
||||
# 按县区账号数量排序
|
||||
strCountyCount = ''
|
||||
strCounties = ''
|
||||
i = 0
|
||||
for c in counties:
|
||||
strCounties = strCounties + c + ','
|
||||
strCountyCount = strCountyCount + c + str(countyCounts[i]) + '个,'
|
||||
i = i + 1
|
||||
dCity['countyCount'] = "%d" % i
|
||||
dCity['sCounties'] = strCounties.rstrip(',')
|
||||
dCity['sCountyCount'] = strCountyCount.rstrip(',')
|
||||
|
||||
# 按合格率排序
|
||||
dfCountyAccount = dfCountyAccount.sort_values(by='rate', ascending=False)
|
||||
countieshege = dfCountyAccount.index.tolist()
|
||||
countyRates = dfCountyAccount['rate']
|
||||
strCountyRatio = ''
|
||||
i = 0
|
||||
tt2_list = []
|
||||
for c in countieshege:
|
||||
strRatio = "%.1f" % (100.0 * countyRates[i])
|
||||
strCountyRatio = strCountyRatio + c + strRatio + '%,'
|
||||
tt2_a = {'county': c, 'ratio': strRatio + '%'}
|
||||
tt2_list.append(tt2_a)
|
||||
i = i + 1
|
||||
dCity['sCountyRatio'] = strCountyRatio.rstrip(',')
|
||||
dCity['tt2_contents'] = tt2_list
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 绘图
|
||||
#
|
||||
print(' 生成图片...')
|
||||
drawAnnulus(dfMedia.iloc[:, 0].tolist()[1:], dfMedia.index.tolist()[1:],
|
||||
'政务新媒体账号类型', os.path.join(dirTemp, city + 'annulusMediaCount.png'))
|
||||
|
||||
drawAnnulus(countyCounts, counties,
|
||||
subordinateName + '政务新媒体账号数量', os.path.join(dirTemp, city + 'annulusCountyCount.png'))
|
||||
|
||||
drawAnnulus(dfCountyArticle.iloc[:, 0].tolist()[1:], dfCountyArticle.index.tolist()[1:],
|
||||
subordinateName + '政务新媒体累计更新次数', os.path.join(dirTemp, city + 'annulusCountyArticle.png'))
|
||||
|
||||
# ;{{resultNoUpdated}}个政务新媒体监测期间未更新,占监测总数的{{resultNoUpdatedRatio}}
|
||||
# ;{{resultNoUpdated2W}}个政务新媒体连续未更新时间超过两周,占监测总数的{{resultNoUpdated2WRatio}}
|
||||
# 政务新媒体监测结果
|
||||
dfResult = dfResult.drop('All', axis=1)
|
||||
dfResult = dfResult.drop('rate', axis=1)
|
||||
# 合格数,合格率,不合格数
|
||||
dCity['resultQualified'] = "%d" % (dfResult.loc['All', '合格'])
|
||||
dCity['resultQualifiedRatio'] = "%.1f%%" % (dfResult.loc['All', '合格'] / dCity['nmCount'] * 100.0)
|
||||
dCity['resultUnqualified'] = "%d" % (dCity['nmCount'] - dfResult.loc['All', '合格'])
|
||||
#
|
||||
# numNoupdated = 0
|
||||
if '监测期间未更新' in dfResult.columns.values.tolist():
|
||||
numNoupdated = dfResult.loc['All', '监测期间未更新']
|
||||
dCity['stringResultNoUpdated'] = ";%d个政务新媒体监测期间未更新,占监测总数的%.1f%%" % (
|
||||
numNoupdated, numNoupdated / dCity['nmCount'] * 100.0)
|
||||
dCity['stringNoUpdated'] = "%d个政务新媒体监测期间未更新。" % (numNoupdated)
|
||||
else:
|
||||
dCity['stringResultNoUpdated'] = ''
|
||||
dCity['stringNoUpdated'] = ""
|
||||
# dCity['resultNoUpdated'] = "%d"%(numNoupdated)
|
||||
# dCity['resultNoUpdatedRatio'] = "%.1f%%"%(numNoupdated/dCity['nmCount']*100.0)
|
||||
# numNoupdated2W = 0
|
||||
if '超过两周未更新' in dfResult.columns.values.tolist():
|
||||
numNoupdated2W = dfResult.loc['All', '超过两周未更新']
|
||||
dCity['stringResultNoUpdated2W'] = ";%d个政务新媒体连续未更新时间超过两周,占监测总数的%.1f%%" % (
|
||||
numNoupdated2W, numNoupdated2W / dCity['nmCount'] * 100.0)
|
||||
dCity['stringNoUpdated2W'] = "%d个政务新媒体连续未更新时间超过两周。" % (numNoupdated2W)
|
||||
else:
|
||||
dCity['stringResultNoUpdated2W'] = ''
|
||||
dCity['stringNoUpdated2W'] = ''
|
||||
# dCity['resultNoUpdated2W'] = "%d"%(numNoupdated2W)
|
||||
# dCity['resultNoUpdated2WRatio'] = "%.1f%%"%(numNoupdated2W/dCity['nmCount']*100.0)
|
||||
resultLabels = dfResult.columns.values.tolist()
|
||||
resultCounts = dfResult.loc['All'].values.tolist()
|
||||
drawAnnulus(resultCounts, resultLabels,
|
||||
'政务新媒体监测结果', os.path.join(dirTemp, city + 'annulusResult.png'))
|
||||
|
||||
drawBar(countyRates, countieshege,
|
||||
'政务新媒体管理矩阵发布时效性合格率榜单', os.path.join(dirTemp, city + 'barCountyRatio.png'))
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 准备报告需要的数据
|
||||
#
|
||||
print(' 生成报告...')
|
||||
|
||||
dfCityUnqulified = dfc[dfc['监测结果'] != '合格']
|
||||
dfCityUnqulified = dfCityUnqulified.sort_values(by="监测结果", ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||||
|
||||
#################################################
|
||||
|
||||
dfCityQulified = dfc[dfc['监测结果'] == '合格']
|
||||
dfCityQulified = dfCityQulified.sort_values(by=subordinate, ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||||
|
||||
#
|
||||
# 不合格账号列表
|
||||
if len(dfCityUnqulified)<1:
|
||||
context.update({'havingBelowStandard':False})
|
||||
else:
|
||||
tt4_list = []
|
||||
for index, row in dfCityUnqulified.iterrows():
|
||||
count = ''
|
||||
if row['更新次数']:
|
||||
count = "%d" % row['更新次数']
|
||||
days = ''
|
||||
if row['最大静默日数']:
|
||||
days = "%d" % row['最大静默日数']
|
||||
sD1 = ''
|
||||
sD2 = ''
|
||||
if row['静默开始日期']:
|
||||
sD1 = toDate(str(row['静默开始日期']))
|
||||
if row['静默结束日期']:
|
||||
sD2 = toDate(str(row['静默结束日期']))
|
||||
|
||||
tt4_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||||
'days': days, 'start': sD1, 'end': sD2, }
|
||||
tt4_list.append(tt4_a)
|
||||
tt4_results = {'tt4_contents': tt4_list}
|
||||
context.update(tt4_results)
|
||||
|
||||
#
|
||||
# 合格账号列表
|
||||
if len(dfCityQulified)<1:
|
||||
context.update({'havingUpStandard':False})
|
||||
else:
|
||||
tt5_list = []
|
||||
for index, row in dfCityQulified.iterrows():
|
||||
count = ''
|
||||
if row['更新次数']:
|
||||
count = "%d" % row['更新次数']
|
||||
days = ''
|
||||
if row['最大静默日数']:
|
||||
days = "%d" % row['最大静默日数']
|
||||
sD1 = ''
|
||||
sD2 = ''
|
||||
if row['静默开始日期']:
|
||||
sD1 = toDate(str(row['静默开始日期']))
|
||||
if row['静默结束日期']:
|
||||
sD2 = toDate(str(row['静默结束日期']))
|
||||
|
||||
tt5_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||||
'days': days, 'start': sD1, 'end': sD2, }
|
||||
tt5_list.append(tt5_a)
|
||||
tt5_results = {'tt5_contents': tt5_list}
|
||||
context.update(tt5_results)
|
||||
|
||||
#
|
||||
# 错别字表格
|
||||
|
||||
if dfcw.shape[0]<1:
|
||||
context.update({'havingCbz':False})
|
||||
else:
|
||||
tCbz_list = []
|
||||
dfcw.fillna('')
|
||||
for index, row in dfcw.iterrows():
|
||||
sTitle = ''
|
||||
sDate = toDate(str(row['发文时间']))
|
||||
if '标题' in dfcw.columns:
|
||||
sTitle = row['标题']
|
||||
|
||||
# 去除引号等干扰表格模板输出的字符
|
||||
r = "[——,$%^,。?、~@#¥%……&*《》<>「」{}【】()/\\\[\]'\"]"
|
||||
if pd.isna(row['错误出现位置']):
|
||||
s = ''
|
||||
else:
|
||||
s = re.sub(r, '', row['错误出现位置'])
|
||||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': s, 'type': row['账号类型'], 'name': row['账号名称'],
|
||||
'date': sDate, 'title': sTitle, }
|
||||
tCbz_list.append(a)
|
||||
if dfcw.shape[0] > 0:
|
||||
dCity['stringCbzCount'] = '本次检测发现错别字%d处,详细情况见附表政务新媒体发布内容错别字统计表。' % (dfcw.shape[0])
|
||||
else:
|
||||
dCity['stringCbzCount'] = '本次检测未发现错别字。'
|
||||
tCbz_results = {'tCbz_contents': tCbz_list}
|
||||
context.update(tCbz_results)
|
||||
|
||||
# 读取添加敏感词表格
|
||||
if dfcs.shape[0]<1:
|
||||
context.update({'havingMgc':False})
|
||||
else:
|
||||
tMgc_list = []
|
||||
dfcs.fillna('')
|
||||
for index, row in dfcs.iterrows():
|
||||
sTitle = ''
|
||||
sDate = toDate(str(row['发文时间']))
|
||||
if '标题' in dfcs.columns:
|
||||
sTitle = row['标题']
|
||||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': row['错误出现位置'], 'type': row['账号类型'], 'name': row['账号名称'],
|
||||
'date': sDate, 'title': sTitle, }
|
||||
tMgc_list.append(a)
|
||||
if dfcs.shape[0] > 0:
|
||||
dCity['stringMgcCount'] = '本次检测发现敏感信息%d处,详细情况见附表政务新媒体发布内容敏感信息统计表。' % (dfcs.shape[0])
|
||||
else:
|
||||
dCity['stringMgcCount'] = '本次检测未发现涉敏内容。'
|
||||
tMgc_results = {'tMgc_contents': tMgc_list}
|
||||
context.update(tMgc_results)
|
||||
|
||||
# table1
|
||||
context.update(dCity)
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按模板生成报告
|
||||
#
|
||||
temp_word(fnTemplate,
|
||||
fnReport,
|
||||
context, dirTemp, city)
|
||||
|
||||
#更新目录并另存为pdf
|
||||
update_toc( fnReport )
|
||||
|
||||
#签章
|
||||
addStamp(fnReport.replace('.docx', '.pdf'),'D:/Projects/POM/DEV/SCRIPTS/stamps_dwl.pdf' , fnReport.replace('.docx', '_Stamp.pdf'))
|
||||
|
||||
|
||||
|
||||
def createDir(dirP, dirS):
|
||||
dirN = dirP
|
||||
if os.path.isdir(dirP):
|
||||
dirN = os.path.join(dirP, dirS)
|
||||
if not (os.path.exists(dirN)):
|
||||
os.mkdir(dirN)
|
||||
if os.path.isdir(dirN):
|
||||
pass
|
||||
else:
|
||||
dirN = dirP
|
||||
print('Directory ' + dirN + ' cannot be created.')
|
||||
return dirN
|
||||
# def createDir(dirP, dirS):
|
||||
|
||||
def summary(info, strFnData, strFnW, strFnS, strfnTemplate, strPathOutput):
|
||||
# 打开监测数据、错别字、敏感词
|
||||
df = pd.read_excel(strFnData)
|
||||
dfW = pd.read_excel(strFnW)
|
||||
dfS = pd.read_excel(strFnS)
|
||||
|
||||
# df.loc[df['账号类型'] == '微信服务号', '账号类型'] = '微信'
|
||||
# df.loc[df['账号类型'] == '微信订阅号', '账号类型'] = '微信'
|
||||
|
||||
# 统一监测结果表述
|
||||
df.loc[df['监测结果'] == '连续两周未更新', '监测结果'] = '超过两周未更新'
|
||||
|
||||
# 过长名称替换为简称,便于绘图
|
||||
df.loc[df['区县/地方部门'] == '积石山保安族东乡族撒拉族自治县', '区县/地方部门'] = '积石山县'
|
||||
df.loc[df['区县/地方部门'] == '阿克塞哈萨克族自治县', '区县/地方部门'] = '阿克塞自治县'
|
||||
|
||||
# 省直、 市直、 州直
|
||||
df['市/省局'] = df['市/省局'].fillna('省直部门')
|
||||
df['区县/地方部门'] = df['区县/地方部门'].fillna('市直部门')
|
||||
df.loc[(df['市/省局'] == '临夏回族自治州') & (df['区县/地方部门'] == '市直部门'), '区县/地方部门'] = '州直部门'
|
||||
|
||||
|
||||
# 数据整理
|
||||
df.replace(r'\s+', '', regex=True, inplace=True) # 去除账号、单位名称中的空格、换行、tab等
|
||||
df.replace(r'^其他\+', '', regex=True, inplace=True) # 去除账号类型中的 "其它" 字样
|
||||
df['更新次数'] = df['更新次数'].fillna(0)
|
||||
df = df.fillna(value='')
|
||||
|
||||
|
||||
#########################################################
|
||||
#
|
||||
# 统计市州范围
|
||||
cities = {'白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
|
||||
'嘉峪关市', '庆阳市华池县', '庆阳市镇原县', '庆阳市宁县', '陇南市', '张掖市', '甘肃省'}
|
||||
#cities = cities | {'甘肃省'}#, '省直部门'}
|
||||
#cities = cities | {'陇南市'}#, '兰州市'}, '省直部门'}
|
||||
#cities = cities | {'甘南藏族自治州', '金昌市', '兰州市', '张掖市', '甘肃省', '省直部门'}
|
||||
#
|
||||
#cities = {'白银市','甘肃省'} # 只统计特定市州
|
||||
|
||||
# strPathOutput目录下生成报告目录和临时文件目录:Reports 和 Intermediate
|
||||
dirP = os.path.abspath(os.path.dirname(strPathOutput))
|
||||
dirReports = createDir(dirP, 'Reports')
|
||||
dirIntermediate = createDir(dirP, 'Intermediate')
|
||||
for city in cities:
|
||||
summaryCity(info, city, df, dfW, dfS, strfnTemplate, os.path.join(dirReports, city + '.docx'), dirIntermediate)
|
||||
|
||||
# 合并错别字文件
|
||||
def mergeCMC(keyword, strPathCBZ, strFnCbz):
|
||||
# cityShorten
|
||||
cityShorten = {'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
|
||||
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
|
||||
'兰州新区': '兰州新区', '兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
|
||||
'BY': '白银市', 'DX': '定西市', 'JQ': '酒泉市', 'JYG': '嘉峪关市', 'LN': '陇南市',
|
||||
'LX': '临夏回族自治州', 'PL': '平凉市', 'QY': '庆阳市', 'TS': '天水市', 'WW': '武威市', 'XQ': '兰州新区',
|
||||
'LZXQ': '兰州新区', 'LZ': '兰州市', 'ZY': '张掖市', 'GN': '甘南藏族自治州', 'SZ': '省直部门', 'JC': '金昌市', }
|
||||
df = pd.DataFrame()
|
||||
for fn in glob.glob(os.path.join(strPathCBZ, '*'+keyword+'*.xlsx')):
|
||||
p, f = os.path.split(fn)
|
||||
city=''
|
||||
for c in cityShorten.keys():
|
||||
if c in f:
|
||||
city = cityShorten[c]
|
||||
break
|
||||
if len(city)<1:
|
||||
print("!!!!! City Name not matched ( ", f, " )")
|
||||
dfn = pd.read_excel(fn)
|
||||
dfn['市州'] = city
|
||||
df = df.append(dfn, ignore_index=True)
|
||||
print(city, f, dfn.shape[0], '/', df.shape[0])
|
||||
df.to_excel(strFnCbz)
|
||||
#def mergeCMC
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# 运行之前先转换excel文件的日期列
|
||||
|
||||
info = {
|
||||
"year": "2023",
|
||||
"month": "3",
|
||||
"datePub": "二〇二三年四月",
|
||||
"dateStart": "2023年3月1日",
|
||||
"dateEnd": "2023年3月31日",
|
||||
"days": "31",
|
||||
"serialNum": "4",
|
||||
}
|
||||
# 数据根目录,
|
||||
strPath = 'D:/Projects/POM/DATA/2023年4月/3月报告/'
|
||||
createDir(strPath, '全文')
|
||||
createDir(strPath, '转发')
|
||||
createDir(strPath, '报告')
|
||||
createDir(strPath, '汇总')
|
||||
createDir(strPath, '监测')
|
||||
# 监测数据
|
||||
strFnMonitoring = strPath + '汇总/3月汇总数据_2023.3.xlsx'
|
||||
# word模板文件
|
||||
strPathTemplate = strPath + 'POM_ReportTemplate.docx'
|
||||
# 错别字
|
||||
strFnCbz = strPath + '汇总/CBZ.xlsx'
|
||||
if not os.path.exists(strFnCbz):# 汇总错别字
|
||||
strPathCBZ = strPath + '监测/'
|
||||
mergeCMC("错别", strPathCBZ, strFnCbz)
|
||||
# 敏感词
|
||||
strFnMgc = strPath + '汇总/MGC.xlsx'
|
||||
if not os.path.exists(strFnMgc):#汇总敏感词
|
||||
strPathMGC = strPath + '监测/'
|
||||
mergeCMC("敏感", strPathMGC, strFnMgc)
|
||||
# 数据目录
|
||||
strPathOutput = strPath
|
||||
|
||||
|
||||
# 打开监测数据、错别字、敏感词
|
||||
df = pd.read_excel(strFnMonitoring)
|
||||
dfW = pd.read_excel(strFnCbz)
|
||||
dfS = pd.read_excel(strFnMgc)
|
||||
|
||||
# df.loc[df['账号类型'] == '微信服务号', '账号类型'] = '微信'
|
||||
# df.loc[df['账号类型'] == '微信订阅号', '账号类型'] = '微信'
|
||||
|
||||
# 统一监测结果表述
|
||||
df.loc[df['监测结果'] == '连续两周未更新', '监测结果'] = '超过两周未更新'
|
||||
|
||||
# 过长名称替换为简称,便于绘图
|
||||
df.loc[df['区县/地方部门'] == '积石山保安族东乡族撒拉族自治县', '区县/地方部门'] = '积石山县'
|
||||
df.loc[df['区县/地方部门'] == '阿克塞哈萨克族自治县', '区县/地方部门'] = '阿克塞自治县'
|
||||
|
||||
# 省直、 市直、 州直
|
||||
df['市/省局'] = df['市/省局'].fillna('省直部门')
|
||||
df['区县/地方部门'] = df['区县/地方部门'].fillna('市直部门')
|
||||
df.loc[(df['市/省局'] == '临夏回族自治州') & (df['区县/地方部门'] == '市直部门'), '区县/地方部门'] = '州直部门'
|
||||
|
||||
|
||||
# 数据整理
|
||||
df.replace(r'\s+', '', regex=True, inplace=True) # 去除账号、单位名称中的空格、换行、tab等
|
||||
df.replace(r'^其他\+', '', regex=True, inplace=True) # 去除账号类型中的 "其它" 字样
|
||||
df['更新次数'] = df['更新次数'].fillna(0)
|
||||
df = df.fillna(value='')
|
||||
|
||||
|
||||
#########################################################
|
||||
#
|
||||
# 统计市州范围
|
||||
cities = {'白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
|
||||
'嘉峪关市', '庆阳市华池县', '庆阳市镇原县', '庆阳市宁县', '陇南市', '张掖市', '甘肃省'}
|
||||
#cities = cities | {'甘肃省'}#, '省直部门'}
|
||||
#cities = cities | {'陇南市'}#, '兰州市'}, '省直部门'}
|
||||
#cities = cities | {'甘南藏族自治州', '金昌市', '兰州市', '张掖市', '甘肃省', '省直部门'}
|
||||
#
|
||||
#cities = {'兰州新区','白银市','庆阳市'} # 只统计特定市州
|
||||
|
||||
# strPathOutput目录下生成报告目录和临时文件目录:Reports 和 Intermediate
|
||||
dirP = os.path.abspath(os.path.dirname(strPathOutput))
|
||||
dirReports = createDir(dirP, 'Reports')
|
||||
dirIntermediate = createDir(dirP, 'Intermediate')
|
||||
for city in cities:
|
||||
summaryCity(info, city, df, dfW, dfS, strPathTemplate, os.path.join(dirReports, city + '.docx'), dirIntermediate)
|
|
@ -0,0 +1,604 @@
|
|||
# 1. 打开监测任务表格
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os, glob, re
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import FuncFormatter
|
||||
import datetime
|
||||
|
||||
from docxtpl import DocxTemplate
|
||||
from docxtpl import InlineImage
|
||||
from docx.shared import Mm
|
||||
|
||||
def toDate(strDT):
|
||||
dt = pd.to_datetime(strDT, errors='coerce')
|
||||
dts = ''
|
||||
# print('-+-+:', type(dt), dt)
|
||||
if not pd.isna(dt):
|
||||
dts = dt.strftime('%m-%d')
|
||||
return dts
|
||||
|
||||
# word模板替换
|
||||
def temp_word(tmep_path, word_apth, dContext, pathImage, city):
|
||||
tpl = DocxTemplate(tmep_path)
|
||||
dC = {'annulusMediaCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusMediaCount.png'), width=Mm(120)),
|
||||
'annulusCountyCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyCount.png'),
|
||||
width=Mm(120)),
|
||||
'annulusCountyArticle': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyArticle.png'),
|
||||
width=Mm(120)),
|
||||
'annulusResult': InlineImage(tpl, os.path.join(pathImage, city + 'annulusResult.png'), width=Mm(120)),
|
||||
'barCountyRatio': InlineImage(tpl, os.path.join(pathImage, city + 'barCountyRatio.png'), width=Mm(120))
|
||||
}
|
||||
|
||||
dContext.update(dC)
|
||||
tpl.render(dContext)
|
||||
tpl.save(word_apth)
|
||||
|
||||
|
||||
# 画柱状图
|
||||
def drawBar(data, recipe, title='', fn=''):
|
||||
plt.figure(figsize=(6, 4))
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
counties = recipe
|
||||
countyRates = data
|
||||
|
||||
plt.bar(counties, countyRates, width=0.5)
|
||||
plt.xticks(counties, counties, rotation=35)
|
||||
plt.ylim((0, 1))
|
||||
|
||||
def to_percent(temp, position):
|
||||
return '%2.0f' % (100 * temp) + '%'
|
||||
|
||||
plt.gca().yaxis.set_major_formatter(FuncFormatter(to_percent))
|
||||
plt.title(title, fontsize=16)
|
||||
plt.tight_layout()
|
||||
plt.savefig(fn)
|
||||
# plt.show()
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
|
||||
# 画环状图
|
||||
def drawAnnulus(data, recipe, title='', fn=''):
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
xxx = 8 # 画布x,长
|
||||
yyy = 4 # 画布y,高
|
||||
nnncol = 1 # 图例列数
|
||||
fs = 'medium' ## xx--small;x-small;small;medium;large;x-large;xx-large
|
||||
|
||||
# if title == '政务新媒体账号类型':
|
||||
if len(recipe) > 20:
|
||||
if len(recipe) > 40:
|
||||
xxx = 16
|
||||
nnncol = 4
|
||||
fs = 'x-small'
|
||||
else:
|
||||
xxx = 16
|
||||
nnncol = 2
|
||||
fs = 'xmall'
|
||||
|
||||
fig, ax = plt.subplots(figsize=(xxx, yyy), subplot_kw=dict(aspect="equal"))
|
||||
|
||||
"""
|
||||
设置圆环宽度,绘图方向,起始角度
|
||||
|
||||
参数wedgeprops以字典形式传递,设置饼图边界的相关属性,例如圆环宽度0.5
|
||||
饼状图默认从x轴正向沿逆时针绘图,参数startangle可指定新的角(例如负40度)度起画
|
||||
"""
|
||||
wedges, texts = ax.pie(data, radius=1.1, wedgeprops=dict(width=0.4), startangle=0) # 画环,返回扇形列表和每个标注文本对象(坐标,文字,属性)
|
||||
|
||||
if 1:
|
||||
x = 1.2
|
||||
if title == '政务新媒体监测结果':
|
||||
x = 1.0
|
||||
plt.legend(labels=recipe, loc="center left", bbox_to_anchor=(x, 0.5), borderaxespad=0., ncol=nnncol,
|
||||
fontsize=fs) # , ncol=3
|
||||
if len(title) > 0:
|
||||
ax.set_title(title, fontsize=16, fontweight='heavy') # , x=0.6
|
||||
|
||||
plt.tight_layout()
|
||||
if len(fn) > 0:
|
||||
plt.savefig(fn)
|
||||
# plt.show()
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
# summaryCity(city, dfc, dfcw, dfcs, context, strfnTemplate, os.path.join(strPathVerified,'Reports', city+'.docx'), strPathVerified )
|
||||
|
||||
|
||||
# 汇总市州数据,
|
||||
# 市州名称, 监测数据, cbz数据, mgc数据, context(编号、名称), word模板文件名称, 输出word文件名称, 临时文件目录
|
||||
# 需要传入模板文件,数据、错别字、敏感词,单位名称等
|
||||
def summaryCity(info, city, df, dfW, dfS, fnTemplate, fnReport, dirTemp):
|
||||
dCityClient = {
|
||||
'甘肃省': "甘肃省人民政府办公厅",
|
||||
'省直部门': "甘肃省人民政府办公厅",
|
||||
'白银市': "白银市人民政府办公室",
|
||||
'定西市': "定西市人民政府办公室",
|
||||
'临夏回族自治州': "临夏回族自治州人民政府办公室",
|
||||
'平凉市': "中共平凉市委网络安全和信息化委员会办公室",
|
||||
"庆阳市": "庆阳市电子政务与信息资源管理办公室",
|
||||
'庆阳市华池县': "华池县人民政府办公室",
|
||||
'庆阳市宁县': "宁县人民政府办公室",
|
||||
"庆阳市镇原县": "镇原县人民政府办公室",
|
||||
"酒泉市": "酒泉市人民政府办公室",
|
||||
"天水市": "天水市人民政府办公室",
|
||||
"武威市": "武威市人民政府办公室",
|
||||
"金昌市": "金昌市人民政府办公室",
|
||||
"嘉峪关市": "嘉峪关市人民政府办公室",
|
||||
"兰州新区": "兰州新区管委会办公室",
|
||||
"陇南市": "陇南市人民政府办公室",
|
||||
}
|
||||
print("----------------" + city + "----------------")
|
||||
# 报告编号、委托单位
|
||||
strID = "%02d" % (list(dCityClient).index(city))
|
||||
# print(strID)
|
||||
context = {
|
||||
"city": city,
|
||||
"client": dCityClient[city],
|
||||
"reportid": strID + info['num'],
|
||||
}
|
||||
context.update(info)
|
||||
|
||||
subordinate = '区县/地方部门'
|
||||
subordinateName = '县区'
|
||||
# 区县数据筛选
|
||||
if "庆阳市" in city:
|
||||
if "华池县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '华池县')].copy()
|
||||
|
||||
elif "宁县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '宁县')].copy()
|
||||
elif "镇原县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '镇原县')].copy()
|
||||
else:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')].copy()
|
||||
# & (df['区县/地方部门']!='华池县')
|
||||
# & (df['区县/地方部门']!='宁县')
|
||||
# & (df['区县/地方部门']!='镇原县') ].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == '庆阳市'].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == '庆阳市'].copy()
|
||||
elif "甘肃" in city :
|
||||
dfc = df.copy()
|
||||
dfcw = dfW.copy()
|
||||
dfcs = dfS.copy()
|
||||
subordinate = '市/省局'
|
||||
subordinateName = '市州'
|
||||
|
||||
elif "省直部门" in city :
|
||||
dfc = df.loc[df['市/省局'] == city].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == dictSC[city]].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == dictSC[city]].copy()
|
||||
|
||||
else:
|
||||
dfc = df.loc[(df['市/省局'] == city)].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == city].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == city].copy()
|
||||
|
||||
# -----------------------
|
||||
# 统计结果分析
|
||||
|
||||
dCity = {'1': '2'}
|
||||
#
|
||||
# 县区-监测结果 统计
|
||||
#
|
||||
|
||||
# 透视表, 按县区统计各个监测结果账号数量
|
||||
dfCountyAccount = pd.pivot_table(dfc, index=[subordinate], columns=['监测结果'], values=['账号名称'], aggfunc='count',
|
||||
fill_value='', margins=True)
|
||||
dfCountyAccount.columns = dfCountyAccount.columns.droplevel(0)
|
||||
# 准备模板中的表格
|
||||
tt3_list = []
|
||||
for index, row in dfCountyAccount.iterrows():
|
||||
county = ''
|
||||
if index == 'All':
|
||||
county = '总 计'
|
||||
else:
|
||||
county = index
|
||||
hg = ''
|
||||
u2w = ''
|
||||
un = ''
|
||||
count = ''
|
||||
if '合格' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['合格'], str):
|
||||
hg = int(row['合格'])
|
||||
if '监测期间未更新' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['监测期间未更新'], str):
|
||||
un = int(row['监测期间未更新'])
|
||||
if '超过两周未更新' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['超过两周未更新'], str):
|
||||
u2w = int(row['超过两周未更新'])
|
||||
if 'All' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['All'], str):
|
||||
count = int(row['All'])
|
||||
|
||||
tt3_a = {'county': county, 'hg': hg, 'u2w': u2w, 'un': un, 'count': count}
|
||||
tt3_list.append(tt3_a)
|
||||
context['tt3_contents'] = tt3_list
|
||||
# dfCountyAccount.to_excel(dirTask+strPathCity+'县区监测结果.xlsx')
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按媒体类型统计
|
||||
#
|
||||
# 透视表, 按账号类型统计账号数量
|
||||
dfMedia = pd.pivot_table(dfc, index=['账号类型'], values=['账号名称'], aggfunc='count', fill_value='', margins=True)
|
||||
# 提取该市账号数量
|
||||
dCity['nmCount'] = dfMedia.loc['All', '账号名称']
|
||||
print(' 监测账号数:', dCity['nmCount'])
|
||||
# 提取 账号类型-数量 , 拼成文本串
|
||||
dfMedia = dfMedia.sort_values(by='账号名称', ascending=False)
|
||||
lTableCs1 = []
|
||||
strMedia = ''
|
||||
i = 0
|
||||
tt1_list = []
|
||||
for m in dfMedia.index.tolist()[1:]: # 第一个是总数,不用取
|
||||
strNum = str(dfMedia.iloc[:, 0].tolist()[1:][i])
|
||||
strMedia = strMedia + m + strNum + '个,'
|
||||
tt1_a = {'type': m, 'count': strNum}
|
||||
tt1_list.append(tt1_a)
|
||||
i = i + 1
|
||||
dCity['sMediaCount'] = strMedia[:-1].rstrip(',')
|
||||
context.update({'tt1_contents': tt1_list})
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按县区-更新次数 统计
|
||||
#
|
||||
dfCountyArticle = pd.pivot_table(dfc, index=[subordinate], values=['更新次数'], aggfunc='sum', fill_value='',
|
||||
margins=True)
|
||||
dfCountyArticle = dfCountyArticle.sort_values(by='更新次数', ascending=False).copy()
|
||||
dCity['cityArticleCount'] = "%d" % dfCountyArticle.iloc[0, 0]
|
||||
dCity['countyMostArticle'] = dfCountyArticle.index.tolist()[1]
|
||||
dCity['countyMostArticleCount'] = "%d" % dfCountyArticle.iloc[1, 0]
|
||||
strCountyArticle = ''
|
||||
iiii = 1
|
||||
for cccc in dfCountyArticle.index.tolist()[1:]:
|
||||
strCountyArticle = strCountyArticle + cccc + "%d" % dfCountyArticle.iloc[iiii, 0] + "次,"
|
||||
iiii = iiii + 1
|
||||
dCity['sCountyArticles'] = strCountyArticle.rstrip(',')
|
||||
|
||||
# 市各县区监测结果按总数排序,
|
||||
dfCountyAccount.loc[:, '合格'] = dfCountyAccount['合格'].astype('int')
|
||||
dfCountyAccount = dfCountyAccount.sort_values(by='All', ascending=False).copy()
|
||||
# 计算合格率
|
||||
dfCountyAccount.eval('rate = 合格 / All ', inplace=True)
|
||||
dfResult = dfCountyAccount.copy()
|
||||
# 提取city合格率
|
||||
dCity['cityRatio'] = "{:.1%}".format(dfCountyAccount.loc['All', 'rate'])
|
||||
print(' 合格率:', dCity['cityRatio'])
|
||||
|
||||
# 导出文件
|
||||
# dfCountyAccount.to_excel(dirIntermediate+sFileBase+'县区合格率.xlsx')
|
||||
|
||||
# dfMedia = dfMedia.drop(['All'])
|
||||
# 提取县区名称,县区账号数, 县区合格率,转成字符串
|
||||
dfCountyAccount = dfCountyAccount.drop(['All']) # 删除"All"行
|
||||
counties = dfCountyAccount.index.tolist()
|
||||
countyCounts = dfCountyAccount['All'].values.tolist()
|
||||
countyHeges = dfCountyAccount['合格'].values.tolist()
|
||||
print(countyCounts)
|
||||
print(counties)
|
||||
|
||||
# 按县区账号数量排序
|
||||
strCountyCount = ''
|
||||
strCounties = ''
|
||||
i = 0
|
||||
for c in counties:
|
||||
strCounties = strCounties + c + ','
|
||||
strCountyCount = strCountyCount + c + str(countyCounts[i]) + '个,'
|
||||
i = i + 1
|
||||
dCity['countyCount'] = "%d" % i
|
||||
dCity['sCounties'] = strCounties.rstrip(',')
|
||||
dCity['sCountyCount'] = strCountyCount.rstrip(',')
|
||||
|
||||
# 按合格率排序
|
||||
dfCountyAccount = dfCountyAccount.sort_values(by='rate', ascending=False)
|
||||
countieshege = dfCountyAccount.index.tolist()
|
||||
countyRates = dfCountyAccount['rate']
|
||||
strCountyRatio = ''
|
||||
i = 0
|
||||
tt2_list = []
|
||||
for c in countieshege:
|
||||
strRatio = "%.1f" % (100.0 * countyRates[i])
|
||||
strCountyRatio = strCountyRatio + c + strRatio + '%,'
|
||||
tt2_a = {'county': c, 'ratio': strRatio + '%'}
|
||||
tt2_list.append(tt2_a)
|
||||
i = i + 1
|
||||
dCity['sCountyRatio'] = strCountyRatio.rstrip(',')
|
||||
dCity['tt2_contents'] = tt2_list
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 绘图
|
||||
#
|
||||
print(' 生成图片...')
|
||||
drawAnnulus(dfMedia.iloc[:, 0].tolist()[1:], dfMedia.index.tolist()[1:],
|
||||
'政务新媒体账号类型', os.path.join(dirTemp, city + 'annulusMediaCount.png'))
|
||||
print(countyCounts)
|
||||
print(counties)
|
||||
drawAnnulus(countyCounts, counties,
|
||||
subordinateName + '政务新媒体账号数量', os.path.join(dirTemp, city + 'annulusCountyCount.png'))
|
||||
|
||||
drawAnnulus(dfCountyArticle.iloc[:, 0].tolist()[1:], dfCountyArticle.index.tolist()[1:],
|
||||
subordinateName + '政务新媒体累计更新次数', os.path.join(dirTemp, city + 'annulusCountyArticle.png'))
|
||||
|
||||
# ;{{resultNoUpdated}}个政务新媒体监测期间未更新,占监测总数的{{resultNoUpdatedRatio}}
|
||||
# ;{{resultNoUpdated2W}}个政务新媒体连续未更新时间超过两周,占监测总数的{{resultNoUpdated2WRatio}}
|
||||
# 政务新媒体监测结果
|
||||
dfResult = dfResult.drop('All', axis=1)
|
||||
dfResult = dfResult.drop('rate', axis=1)
|
||||
# 合格数,合格率,不合格数
|
||||
dCity['resultQualified'] = "%d" % (dfResult.loc['All', '合格'])
|
||||
dCity['resultQualifiedRatio'] = "%.1f%%" % (dfResult.loc['All', '合格'] / dCity['nmCount'] * 100.0)
|
||||
dCity['resultUnqualified'] = "%d" % (dCity['nmCount'] - dfResult.loc['All', '合格'])
|
||||
#
|
||||
# numNoupdated = 0
|
||||
if '监测期间未更新' in dfResult.columns.values.tolist():
|
||||
numNoupdated = dfResult.loc['All', '监测期间未更新']
|
||||
dCity['stringResultNoUpdated'] = ";%d个政务新媒体监测期间未更新,占监测总数的%.1f%%" % (
|
||||
numNoupdated, numNoupdated / dCity['nmCount'] * 100.0)
|
||||
dCity['stringNoUpdated'] = "%d个政务新媒体监测期间未更新。" % (numNoupdated)
|
||||
else:
|
||||
dCity['stringResultNoUpdated'] = ''
|
||||
dCity['stringNoUpdated'] = ""
|
||||
# dCity['resultNoUpdated'] = "%d"%(numNoupdated)
|
||||
# dCity['resultNoUpdatedRatio'] = "%.1f%%"%(numNoupdated/dCity['nmCount']*100.0)
|
||||
# numNoupdated2W = 0
|
||||
if '超过两周未更新' in dfResult.columns.values.tolist():
|
||||
numNoupdated2W = dfResult.loc['All', '超过两周未更新']
|
||||
dCity['stringResultNoUpdated2W'] = ";%d个政务新媒体连续未更新时间超过两周,占监测总数的%.1f%%" % (
|
||||
numNoupdated2W, numNoupdated2W / dCity['nmCount'] * 100.0)
|
||||
dCity['stringNoUpdated2W'] = "%d个政务新媒体连续未更新时间超过两周。" % (numNoupdated2W)
|
||||
else:
|
||||
dCity['stringResultNoUpdated2W'] = ''
|
||||
dCity['stringNoUpdated2W'] = ''
|
||||
# dCity['resultNoUpdated2W'] = "%d"%(numNoupdated2W)
|
||||
# dCity['resultNoUpdated2WRatio'] = "%.1f%%"%(numNoupdated2W/dCity['nmCount']*100.0)
|
||||
resultLabels = dfResult.columns.values.tolist()
|
||||
resultCounts = dfResult.loc['All'].values.tolist()
|
||||
drawAnnulus(resultCounts, resultLabels,
|
||||
'政务新媒体监测结果', os.path.join(dirTemp, city + 'annulusResult.png'))
|
||||
|
||||
drawBar(countyRates, countieshege,
|
||||
'政务新媒体管理矩阵发布时效性合格率榜单', os.path.join(dirTemp, city + 'barCountyRatio.png'))
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 准备报告需要的数据
|
||||
#
|
||||
print(' 生成报告...')
|
||||
|
||||
dfCityUnqulified = dfc[dfc['监测结果'] != '合格']
|
||||
dfCityUnqulified = dfCityUnqulified.sort_values(by="监测结果", ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||||
|
||||
#################################################
|
||||
|
||||
dfCityQulified = dfc[dfc['监测结果'] == '合格']
|
||||
dfCityQulified = dfCityQulified.sort_values(by=subordinate, ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||||
|
||||
# 方法一
|
||||
|
||||
tt4_list = []
|
||||
for index, row in dfCityUnqulified.iterrows():
|
||||
count = ''
|
||||
if row['更新次数']:
|
||||
count = "%d" % row['更新次数']
|
||||
days = ''
|
||||
if row['最大静默日数']:
|
||||
days = "%d" % row['最大静默日数']
|
||||
sD1 = ''
|
||||
sD2 = ''
|
||||
if row['静默开始日期']:
|
||||
sD1 = toDate(str(row['静默开始日期']))
|
||||
if row['静默结束日期']:
|
||||
sD2 = toDate(str(row['静默结束日期']))
|
||||
|
||||
tt4_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||||
'days': days, 'start': sD1, 'end': sD2, }
|
||||
tt4_list.append(tt4_a)
|
||||
tt4_results = {'tt4_contents': tt4_list}
|
||||
context.update(tt4_results)
|
||||
|
||||
tt5_list = []
|
||||
for index, row in dfCityQulified.iterrows():
|
||||
count = ''
|
||||
if row['更新次数']:
|
||||
count = "%d" % row['更新次数']
|
||||
days = ''
|
||||
if row['最大静默日数']:
|
||||
days = "%d" % row['最大静默日数']
|
||||
sD1 = ''
|
||||
sD2 = ''
|
||||
if row['静默开始日期']:
|
||||
sD1 = toDate(str(row['静默开始日期']))
|
||||
if row['静默结束日期']:
|
||||
sD2 = toDate(str(row['静默结束日期']))
|
||||
|
||||
tt5_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||||
'days': days, 'start': sD1, 'end': sD2, }
|
||||
tt5_list.append(tt5_a)
|
||||
tt5_results = {'tt5_contents': tt5_list}
|
||||
context.update(tt5_results)
|
||||
|
||||
# 读取添加错别字表格
|
||||
tCbz_list = []
|
||||
dfcw.fillna('')
|
||||
for index, row in dfcw.iterrows():
|
||||
sTitle = ''
|
||||
sDate = toDate(str(row['发文时间']))
|
||||
if '标题' in dfcw.columns:
|
||||
sTitle = row['标题']
|
||||
|
||||
# 去除引号等干扰表格模板输出的字符
|
||||
r = "[——,$%^,。?、~@#¥%……&*《》<>「」{}【】()/\\\[\]'\"]"
|
||||
if pd.isna(row['错误出现位置']):
|
||||
s = ''
|
||||
else:
|
||||
s = re.sub(r, '', row['错误出现位置'])
|
||||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': s, 'type': row['账号类型'], 'name': row['账号名称'],
|
||||
'date': sDate, 'title': sTitle, }
|
||||
tCbz_list.append(a)
|
||||
if dfcw.shape[0] > 0:
|
||||
dCity['stringCbzCount'] = '本次检测发现错别字%d处,详细情况见附表政务新媒体发布内容错别字统计表。' % (dfcw.shape[0])
|
||||
else:
|
||||
dCity['stringCbzCount'] = '本次检测未发现错别字。'
|
||||
tCbz_results = {'tCbz_contents': tCbz_list}
|
||||
context.update(tCbz_results)
|
||||
|
||||
# 读取添加敏感词表格
|
||||
tMgc_list = []
|
||||
dfcs.fillna('')
|
||||
for index, row in dfcs.iterrows():
|
||||
sTitle = ''
|
||||
sDate = toDate(str(row['发文时间']))
|
||||
if '标题' in dfcs.columns:
|
||||
sTitle = row['标题']
|
||||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': row['错误出现位置'], 'type': row['账号类型'], 'name': row['账号名称'],
|
||||
'date': sDate, 'title': sTitle, }
|
||||
tMgc_list.append(a)
|
||||
if dfcs.shape[0] > 0:
|
||||
dCity['stringMgcCount'] = '本次检测发现敏感信息%d处,详细情况见附表政务新媒体发布内容敏感信息统计表。' % (dfcs.shape[0])
|
||||
else:
|
||||
dCity['stringMgcCount'] = '本次检测未发现涉敏内容。'
|
||||
tMgc_results = {'tMgc_contents': tMgc_list}
|
||||
context.update(tMgc_results)
|
||||
|
||||
# table1
|
||||
context.update(dCity)
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按模板生成报告
|
||||
#
|
||||
temp_word(fnTemplate,
|
||||
fnReport,
|
||||
context, dirTemp, city)
|
||||
|
||||
def createDir(dirP, dirS):
|
||||
dirN = dirP
|
||||
if os.path.isdir(dirP):
|
||||
dirN = os.path.join(dirP, dirS)
|
||||
if not (os.path.exists(dirN)):
|
||||
os.mkdir(dirN)
|
||||
if os.path.isdir(dirN):
|
||||
pass
|
||||
else:
|
||||
dirN = dirP
|
||||
print('Directory ' + dirN + ' cannot be created.')
|
||||
return dirN
|
||||
# def createDir(dirP, dirS):
|
||||
|
||||
def summary(info, strFnData, strFnW, strFnS, strfnTemplate, strPathOutput):
|
||||
# 打开监测数据、错别字、敏感词
|
||||
df = pd.read_excel(strFnData)
|
||||
dfW = pd.read_excel(strFnW)
|
||||
dfS = pd.read_excel(strFnS)
|
||||
|
||||
# df.loc[df['账号类型'] == '微信服务号', '账号类型'] = '微信'
|
||||
# df.loc[df['账号类型'] == '微信订阅号', '账号类型'] = '微信'
|
||||
|
||||
# 统一监测结果表述
|
||||
df.loc[df['监测结果'] == '连续两周未更新', '监测结果'] = '超过两周未更新'
|
||||
|
||||
# 过长名称替换为简称,便于绘图
|
||||
df.loc[df['区县/地方部门'] == '积石山保安族东乡族撒拉族自治县', '区县/地方部门'] = '积石山县'
|
||||
df.loc[df['区县/地方部门'] == '阿克塞哈萨克族自治县', '区县/地方部门'] = '阿克塞自治县'
|
||||
|
||||
# 省直、 市直、 州直
|
||||
df['市/省局'] = df['市/省局'].fillna('省直部门')
|
||||
df['区县/地方部门'] = df['区县/地方部门'].fillna('市直部门')
|
||||
df.loc[(df['市/省局'] == '临夏回族自治州') & (df['区县/地方部门'] == '市直部门'), '区县/地方部门'] = '州直部门'
|
||||
|
||||
|
||||
# 数据整理
|
||||
df.replace(r'\s+', '', regex=True, inplace=True) # 去除账号、单位名称中的空格、换行、tab等
|
||||
df.replace(r'^其他\+', '', regex=True, inplace=True) # 去除账号类型中的 "其它" 字样
|
||||
df['更新次数'] = df['更新次数'].fillna(0)
|
||||
df = df.fillna(value='')
|
||||
|
||||
|
||||
#########################################################
|
||||
#
|
||||
# 统计市州范围
|
||||
cities = {'甘肃省', '白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
|
||||
'嘉峪关市', '庆阳市华池县', '庆阳市镇原县', '庆阳市宁县'}
|
||||
#cities = cities | {'甘南藏族自治州', '金昌市', '兰州市', '陇南市', '张掖市', '甘肃省', '省直部门'}
|
||||
#
|
||||
cities = {'甘肃省'} # 只统计特定市州
|
||||
|
||||
# strPathOutput目录下生成报告目录和临时文件目录:Reports 和 Intermediate
|
||||
dirP = os.path.abspath(os.path.dirname(strPathOutput))
|
||||
dirReports = createDir(dirP, 'Reports')
|
||||
dirIntermediate = createDir(dirP, 'Intermediate')
|
||||
for city in cities:
|
||||
summaryCity(info, city, df, dfW, dfS, strfnTemplate, os.path.join(dirReports, city + '.docx'), dirIntermediate)
|
||||
|
||||
# 合并错别字文件
|
||||
def mergeCMC(keyword, strPathCBZ, strFnCbz):
|
||||
# cityShorten
|
||||
cityShorten = {'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
|
||||
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
|
||||
'兰州新区': '兰州新区', '兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
|
||||
'BY': '白银市', 'DX': '定西市', 'JQ': '酒泉市', 'JYG': '嘉峪关市', 'LN': '陇南市',
|
||||
'LX': '临夏回族自治州', 'PL': '平凉市', 'QY': '庆阳市', 'TS': '天水市', 'WW': '武威市', 'XQ': '兰州新区',
|
||||
'LZXQ': '兰州新区', 'LZ': '兰州市', 'ZY': '张掖市', 'GN': '甘南藏族自治州', 'SZ': '省直部门', 'JC': '金昌市', }
|
||||
df = pd.DataFrame()
|
||||
for fn in glob.glob(os.path.join(strPathCBZ, '*'+keyword+'*.xlsx')):
|
||||
p, f = os.path.split(fn)
|
||||
city=''
|
||||
for c in cityShorten.keys():
|
||||
if c in f:
|
||||
city = cityShorten[c]
|
||||
break
|
||||
if len(city)<1:
|
||||
print("!!!!! City Name not matched ( ", f, " )")
|
||||
dfn = pd.read_excel(fn)
|
||||
dfn['市州'] = city
|
||||
df = df.append(dfn, ignore_index=True)
|
||||
print(city, f, dfn.shape[0], '/', df.shape[0])
|
||||
df.to_excel(strFnCbz)
|
||||
#def mergeCMC
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# 运行之前先转换excel文件的日期列
|
||||
|
||||
info = {
|
||||
"year": "2022",
|
||||
"quarter": "三",
|
||||
"dateCN": "二〇二二年九月",
|
||||
"dateStart": "2022年7月1日",
|
||||
"dateEnd": "2022年9月20日",
|
||||
"days": "81",
|
||||
"num": "11",
|
||||
}
|
||||
# 数据根目录,
|
||||
strPath = 'D:/Projects/POM/DATA/2023年S1/'
|
||||
# 监测数据
|
||||
strFnMonitoring = strPath + '汇总/第一季度汇总数据_2023.3.xlsx'
|
||||
# word模板文件
|
||||
strPathTemplate = strPath + 'POM_ReportTemplate0.docx'
|
||||
# 错别字
|
||||
strFnCbz = strPath + '汇总/CBZ.xlsx'
|
||||
if not os.path.exists(strFnCbz):# 汇总错别字
|
||||
strPathCBZ = strPath + '监测/'
|
||||
mergeCMC("错别", strPathCBZ, strFnCbz)
|
||||
# 敏感词
|
||||
strFnMgc = strPath + '汇总/MGC.xlsx'
|
||||
if not os.path.exists(strFnMgc):#汇总敏感词
|
||||
strPathMGC = strPath + '监测/'
|
||||
mergeCMC("敏感", strPathMGC, strFnMgc)
|
||||
# 数据目录
|
||||
strPathOutput = strPath + '统计/'
|
||||
|
||||
summary(info, strFnMonitoring, strFnCbz, strFnMgc, strPathTemplate, strPathOutput)
|
|
@ -0,0 +1,618 @@
|
|||
# 1. 打开监测任务表格
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os, glob, re
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import FuncFormatter
|
||||
import datetime
|
||||
|
||||
from docxtpl import DocxTemplate
|
||||
from docxtpl import InlineImage
|
||||
from docx.shared import Mm
|
||||
|
||||
|
||||
def fetch_chinese(s):
|
||||
pattern =re.compile(r'[^\u4e00-\u9fa5]')
|
||||
sc = re.sub(pattern, '', s)
|
||||
return sc
|
||||
|
||||
def toDate(strDT):
|
||||
dt = pd.to_datetime(strDT, errors='coerce')
|
||||
dts = ''
|
||||
# print('-+-+:', type(dt), dt)
|
||||
if not pd.isna(dt):
|
||||
dts = dt.strftime('%m-%d')
|
||||
return dts
|
||||
|
||||
# word模板替换
|
||||
def temp_word(tmep_path, word_apth, dContext, pathImage, city):
|
||||
tpl = DocxTemplate(tmep_path)
|
||||
dC = {'annulusMediaCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusMediaCount.png'), width=Mm(120)),
|
||||
'annulusCountyCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyCount.png'),
|
||||
width=Mm(120)),
|
||||
'annulusCountyArticle': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyArticle.png'),
|
||||
width=Mm(120)),
|
||||
'annulusResult': InlineImage(tpl, os.path.join(pathImage, city + 'annulusResult.png'), width=Mm(120)),
|
||||
'barCountyRatio': InlineImage(tpl, os.path.join(pathImage, city + 'barCountyRatio.png'), width=Mm(120))
|
||||
}
|
||||
|
||||
dContext.update(dC)
|
||||
tpl.render(dContext)
|
||||
tpl.save(word_apth)
|
||||
|
||||
|
||||
# 画柱状图
|
||||
def drawBar(data, recipe, title='', fn=''):
|
||||
plt.figure(figsize=(6, 4))
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
counties = recipe
|
||||
countyRates = data
|
||||
|
||||
plt.bar(counties, countyRates, width=0.5)
|
||||
plt.xticks(counties, counties, rotation=35)
|
||||
plt.ylim((0, 1))
|
||||
|
||||
def to_percent(temp, position):
|
||||
return '%2.0f' % (100 * temp) + '%'
|
||||
|
||||
plt.gca().yaxis.set_major_formatter(FuncFormatter(to_percent))
|
||||
plt.title(title, fontsize=16)
|
||||
plt.tight_layout()
|
||||
plt.savefig(fn)
|
||||
# plt.show()
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
|
||||
# 画环状图
|
||||
def drawAnnulus(data, recipe, title='', fn=''):
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
xxx = 8 # 画布x,长
|
||||
yyy = 4 # 画布y,高
|
||||
nnncol = 1 # 图例列数
|
||||
fs = 'medium' ## xx--small;x-small;small;medium;large;x-large;xx-large
|
||||
|
||||
# if title == '政务新媒体账号类型':
|
||||
if len(recipe) > 20:
|
||||
if len(recipe) > 40:
|
||||
xxx = 16
|
||||
nnncol = 4
|
||||
fs = 'x-small'
|
||||
else:
|
||||
xxx = 16
|
||||
nnncol = 2
|
||||
fs = 'xmall'
|
||||
|
||||
fig, ax = plt.subplots(figsize=(xxx, yyy), subplot_kw=dict(aspect="equal"))
|
||||
|
||||
"""
|
||||
设置圆环宽度,绘图方向,起始角度
|
||||
|
||||
参数wedgeprops以字典形式传递,设置饼图边界的相关属性,例如圆环宽度0.5
|
||||
饼状图默认从x轴正向沿逆时针绘图,参数startangle可指定新的角(例如负40度)度起画
|
||||
"""
|
||||
wedges, texts = ax.pie(data, radius=1.1, wedgeprops=dict(width=0.4), startangle=0) # 画环,返回扇形列表和每个标注文本对象(坐标,文字,属性)
|
||||
|
||||
if 1:
|
||||
x = 1.2
|
||||
if title == '政务新媒体监测结果':
|
||||
x = 1.0
|
||||
plt.legend(labels=recipe, loc="center left", bbox_to_anchor=(x, 0.5), borderaxespad=0., ncol=nnncol,
|
||||
fontsize=fs) # , ncol=3
|
||||
if len(title) > 0:
|
||||
ax.set_title(title, fontsize=16, fontweight='heavy') # , x=0.6
|
||||
|
||||
plt.tight_layout()
|
||||
if len(fn) > 0:
|
||||
plt.savefig(fn)
|
||||
# plt.show()
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
# summaryCity(city, dfc, dfcw, dfcs, context, strfnTemplate, os.path.join(strPathVerified,'Reports', city+'.docx'), strPathVerified )
|
||||
|
||||
|
||||
# 汇总市州数据,
|
||||
# 市州名称, 监测数据, cbz数据, mgc数据, context(编号、名称), word模板文件名称, 输出word文件名称, 临时文件目录
|
||||
# 需要传入模板文件,数据、错别字、敏感词,单位名称等
|
||||
def summaryCity(info, city, df, dfW, dfS, fnTemplate, fnReport, dirTemp):
|
||||
dCityClient = {
|
||||
'甘肃省': "甘肃省人民政府办公厅",
|
||||
'省直部门': "甘肃省人民政府办公厅",
|
||||
'白银市': "白银市人民政府办公室",
|
||||
'定西市': "定西市人民政府办公室",
|
||||
'临夏回族自治州': "临夏回族自治州人民政府办公室",
|
||||
'平凉市': "中共平凉市委网络安全和信息化委员会办公室",
|
||||
"庆阳市": "庆阳市电子政务与信息资源管理办公室",
|
||||
'庆阳市华池县': "华池县人民政府办公室",
|
||||
'庆阳市宁县': "宁县人民政府办公室",
|
||||
"庆阳市镇原县": "镇原县人民政府办公室",
|
||||
"酒泉市": "酒泉市人民政府办公室",
|
||||
"天水市": "天水市人民政府办公室",
|
||||
"武威市": "武威市人民政府办公室",
|
||||
"金昌市": "金昌市人民政府办公室",
|
||||
"嘉峪关市": "嘉峪关市人民政府办公室",
|
||||
"兰州新区": "兰州新区管委会办公室",
|
||||
"陇南市": "陇南市政务服务中心",
|
||||
"张掖市": "张掖市政务服务中心",
|
||||
"甘南藏族自治州": "甘南藏族自治州政务服务中心",
|
||||
"兰州市": "兰州市政务服务中心",
|
||||
"陇南市": "陇南市政务服务中心",
|
||||
}
|
||||
print("----------------" + city + "----------------")
|
||||
# 报告编号、委托单位
|
||||
strID = "%02d" % (list(dCityClient).index(city))
|
||||
# print(strID)
|
||||
context = {
|
||||
"city": city,
|
||||
"client": dCityClient[city],
|
||||
"reportid": strID + info['num'],
|
||||
}
|
||||
context.update(info)
|
||||
|
||||
subordinate = '区县/地方部门'
|
||||
subordinateName = '县区'
|
||||
# 区县数据筛选
|
||||
if "庆阳市" in city:
|
||||
if "华池县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '华池县')].copy()
|
||||
|
||||
elif "宁县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '宁县')].copy()
|
||||
elif "镇原县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '镇原县')].copy()
|
||||
else:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')].copy()
|
||||
# & (df['区县/地方部门']!='华池县')
|
||||
# & (df['区县/地方部门']!='宁县')
|
||||
# & (df['区县/地方部门']!='镇原县') ].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == '庆阳市'].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == '庆阳市'].copy()
|
||||
elif "甘肃" in city :
|
||||
dfc = df.copy()
|
||||
dfcw = dfW.copy()
|
||||
dfcs = dfS.copy()
|
||||
subordinate = '市/省局'
|
||||
subordinateName = '市州'
|
||||
|
||||
elif "省直部门" in city :
|
||||
dfc = df.loc[df['市/省局'] == city].copy()
|
||||
#dfcw = dfW.loc[dfW['市州'] == dictSC[city]].copy()
|
||||
#dfcs = dfS.loc[dfS['市州'] == dictSC[city]].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == city].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == city].copy()
|
||||
|
||||
else:
|
||||
dfc = df.loc[(df['市/省局'] == city)].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == city].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == city].copy()
|
||||
|
||||
# -----------------------
|
||||
# 统计结果分析
|
||||
|
||||
dCity = {'1': '2'}
|
||||
#
|
||||
# 县区-监测结果 统计
|
||||
#
|
||||
|
||||
# 透视表, 按县区统计各个监测结果账号数量
|
||||
dfCountyAccount = pd.pivot_table(dfc, index=[subordinate], columns=['监测结果'], values=['账号名称'], aggfunc='count',
|
||||
fill_value='', margins=True)
|
||||
dfCountyAccount.columns = dfCountyAccount.columns.droplevel(0)
|
||||
# 准备模板中的表格
|
||||
tt3_list = []
|
||||
for index, row in dfCountyAccount.iterrows():
|
||||
county = ''
|
||||
if index == 'All':
|
||||
county = '总 计'
|
||||
else:
|
||||
county = index
|
||||
hg = ''
|
||||
u2w = ''
|
||||
un = ''
|
||||
count = ''
|
||||
if '合格' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['合格'], str):
|
||||
hg = int(row['合格'])
|
||||
if '监测期间未更新' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['监测期间未更新'], str):
|
||||
un = int(row['监测期间未更新'])
|
||||
if '超过两周未更新' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['超过两周未更新'], str):
|
||||
u2w = int(row['超过两周未更新'])
|
||||
if 'All' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['All'], str):
|
||||
count = int(row['All'])
|
||||
|
||||
tt3_a = {'county': county, 'hg': hg, 'u2w': u2w, 'un': un, 'count': count}
|
||||
tt3_list.append(tt3_a)
|
||||
context['tt3_contents'] = tt3_list
|
||||
# dfCountyAccount.to_excel(dirTask+strPathCity+'县区监测结果.xlsx')
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按媒体类型统计
|
||||
#
|
||||
# 透视表, 按账号类型统计账号数量
|
||||
dfMedia = pd.pivot_table(dfc, index=['账号类型'], values=['账号名称'], aggfunc='count', fill_value='', margins=True)
|
||||
# 提取该市账号数量
|
||||
dCity['nmCount'] = dfMedia.loc['All', '账号名称']
|
||||
print(' 监测账号数:', dCity['nmCount'])
|
||||
# 提取 账号类型-数量 , 拼成文本串
|
||||
dfMedia = dfMedia.sort_values(by='账号名称', ascending=False)
|
||||
lTableCs1 = []
|
||||
strMedia = ''
|
||||
i = 0
|
||||
tt1_list = []
|
||||
for m in dfMedia.index.tolist()[1:]: # 第一个是总数,不用取
|
||||
strNum = str(dfMedia.iloc[:, 0].tolist()[1:][i])
|
||||
strMedia = strMedia + m + strNum + '个,'
|
||||
tt1_a = {'type': m, 'count': strNum}
|
||||
tt1_list.append(tt1_a)
|
||||
i = i + 1
|
||||
dCity['sMediaCount'] = strMedia[:-1].rstrip(',')
|
||||
context.update({'tt1_contents': tt1_list})
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按县区-更新次数 统计
|
||||
#
|
||||
dfCountyArticle = pd.pivot_table(dfc, index=[subordinate], values=['更新次数'], aggfunc='sum', fill_value='',
|
||||
margins=True)
|
||||
dfCountyArticle = dfCountyArticle.sort_values(by='更新次数', ascending=False).copy()
|
||||
dCity['cityArticleCount'] = "%d" % dfCountyArticle.iloc[0, 0]
|
||||
dCity['countyMostArticle'] = dfCountyArticle.index.tolist()[1]
|
||||
dCity['countyMostArticleCount'] = "%d" % dfCountyArticle.iloc[1, 0]
|
||||
strCountyArticle = ''
|
||||
iiii = 1
|
||||
for cccc in dfCountyArticle.index.tolist()[1:]:
|
||||
strCountyArticle = strCountyArticle + cccc + "%d" % dfCountyArticle.iloc[iiii, 0] + "次,"
|
||||
iiii = iiii + 1
|
||||
dCity['sCountyArticles'] = strCountyArticle.rstrip(',')
|
||||
|
||||
# 市各县区监测结果按总数排序,
|
||||
dfCountyAccount.loc[:, '合格'] = dfCountyAccount['合格'].astype('int')
|
||||
dfCountyAccount = dfCountyAccount.sort_values(by='All', ascending=False).copy()
|
||||
# 计算合格率
|
||||
dfCountyAccount.eval('rate = 合格 / All ', inplace=True)
|
||||
dfResult = dfCountyAccount.copy()
|
||||
# 提取city合格率
|
||||
dCity['cityRatio'] = "{:.1%}".format(dfCountyAccount.loc['All', 'rate'])
|
||||
print(' 合格率:', dCity['cityRatio'])
|
||||
|
||||
# 导出文件
|
||||
# dfCountyAccount.to_excel(dirIntermediate+sFileBase+'县区合格率.xlsx')
|
||||
|
||||
# dfMedia = dfMedia.drop(['All'])
|
||||
# 提取县区名称,县区账号数, 县区合格率,转成字符串
|
||||
dfCountyAccount = dfCountyAccount.drop(['All']) # 删除"All"行
|
||||
counties = dfCountyAccount.index.tolist()
|
||||
countyCounts = dfCountyAccount['All'].values.tolist()
|
||||
countyHeges = dfCountyAccount['合格'].values.tolist()
|
||||
print(countyCounts)
|
||||
print(counties)
|
||||
|
||||
# 按县区账号数量排序
|
||||
strCountyCount = ''
|
||||
strCounties = ''
|
||||
i = 0
|
||||
for c in counties:
|
||||
strCounties = strCounties + c + ','
|
||||
strCountyCount = strCountyCount + c + str(countyCounts[i]) + '个,'
|
||||
i = i + 1
|
||||
dCity['countyCount'] = "%d" % i
|
||||
dCity['sCounties'] = strCounties.rstrip(',')
|
||||
dCity['sCountyCount'] = strCountyCount.rstrip(',')
|
||||
|
||||
# 按合格率排序
|
||||
dfCountyAccount = dfCountyAccount.sort_values(by='rate', ascending=False)
|
||||
countieshege = dfCountyAccount.index.tolist()
|
||||
countyRates = dfCountyAccount['rate']
|
||||
strCountyRatio = ''
|
||||
i = 0
|
||||
tt2_list = []
|
||||
for c in countieshege:
|
||||
strRatio = "%.1f" % (100.0 * countyRates[i])
|
||||
strCountyRatio = strCountyRatio + c + strRatio + '%,'
|
||||
tt2_a = {'county': c, 'ratio': strRatio + '%'}
|
||||
tt2_list.append(tt2_a)
|
||||
i = i + 1
|
||||
dCity['sCountyRatio'] = strCountyRatio.rstrip(',')
|
||||
dCity['tt2_contents'] = tt2_list
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 绘图
|
||||
#
|
||||
print(' 生成图片...')
|
||||
drawAnnulus(dfMedia.iloc[:, 0].tolist()[1:], dfMedia.index.tolist()[1:],
|
||||
'政务新媒体账号类型', os.path.join(dirTemp, city + 'annulusMediaCount.png'))
|
||||
print(countyCounts)
|
||||
print(counties)
|
||||
drawAnnulus(countyCounts, counties,
|
||||
subordinateName + '政务新媒体账号数量', os.path.join(dirTemp, city + 'annulusCountyCount.png'))
|
||||
|
||||
drawAnnulus(dfCountyArticle.iloc[:, 0].tolist()[1:], dfCountyArticle.index.tolist()[1:],
|
||||
subordinateName + '政务新媒体累计更新次数', os.path.join(dirTemp, city + 'annulusCountyArticle.png'))
|
||||
|
||||
# ;{{resultNoUpdated}}个政务新媒体监测期间未更新,占监测总数的{{resultNoUpdatedRatio}}
|
||||
# ;{{resultNoUpdated2W}}个政务新媒体连续未更新时间超过两周,占监测总数的{{resultNoUpdated2WRatio}}
|
||||
# 政务新媒体监测结果
|
||||
dfResult = dfResult.drop('All', axis=1)
|
||||
dfResult = dfResult.drop('rate', axis=1)
|
||||
# 合格数,合格率,不合格数
|
||||
dCity['resultQualified'] = "%d" % (dfResult.loc['All', '合格'])
|
||||
dCity['resultQualifiedRatio'] = "%.1f%%" % (dfResult.loc['All', '合格'] / dCity['nmCount'] * 100.0)
|
||||
dCity['resultUnqualified'] = "%d" % (dCity['nmCount'] - dfResult.loc['All', '合格'])
|
||||
#
|
||||
# numNoupdated = 0
|
||||
if '监测期间未更新' in dfResult.columns.values.tolist():
|
||||
numNoupdated = dfResult.loc['All', '监测期间未更新']
|
||||
dCity['stringResultNoUpdated'] = ";%d个政务新媒体监测期间未更新,占监测总数的%.1f%%" % (
|
||||
numNoupdated, numNoupdated / dCity['nmCount'] * 100.0)
|
||||
dCity['stringNoUpdated'] = "%d个政务新媒体监测期间未更新。" % (numNoupdated)
|
||||
else:
|
||||
dCity['stringResultNoUpdated'] = ''
|
||||
dCity['stringNoUpdated'] = ""
|
||||
# dCity['resultNoUpdated'] = "%d"%(numNoupdated)
|
||||
# dCity['resultNoUpdatedRatio'] = "%.1f%%"%(numNoupdated/dCity['nmCount']*100.0)
|
||||
# numNoupdated2W = 0
|
||||
if '超过两周未更新' in dfResult.columns.values.tolist():
|
||||
numNoupdated2W = dfResult.loc['All', '超过两周未更新']
|
||||
dCity['stringResultNoUpdated2W'] = ";%d个政务新媒体连续未更新时间超过两周,占监测总数的%.1f%%" % (
|
||||
numNoupdated2W, numNoupdated2W / dCity['nmCount'] * 100.0)
|
||||
dCity['stringNoUpdated2W'] = "%d个政务新媒体连续未更新时间超过两周。" % (numNoupdated2W)
|
||||
else:
|
||||
dCity['stringResultNoUpdated2W'] = ''
|
||||
dCity['stringNoUpdated2W'] = ''
|
||||
# dCity['resultNoUpdated2W'] = "%d"%(numNoupdated2W)
|
||||
# dCity['resultNoUpdated2WRatio'] = "%.1f%%"%(numNoupdated2W/dCity['nmCount']*100.0)
|
||||
resultLabels = dfResult.columns.values.tolist()
|
||||
resultCounts = dfResult.loc['All'].values.tolist()
|
||||
drawAnnulus(resultCounts, resultLabels,
|
||||
'政务新媒体监测结果', os.path.join(dirTemp, city + 'annulusResult.png'))
|
||||
|
||||
drawBar(countyRates, countieshege,
|
||||
'政务新媒体管理矩阵发布时效性合格率榜单', os.path.join(dirTemp, city + 'barCountyRatio.png'))
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 准备报告需要的数据
|
||||
#
|
||||
print(' 生成报告...')
|
||||
|
||||
dfCityUnqulified = dfc[dfc['监测结果'] != '合格']
|
||||
dfCityUnqulified = dfCityUnqulified.sort_values(by="监测结果", ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||||
|
||||
#################################################
|
||||
|
||||
dfCityQulified = dfc[dfc['监测结果'] == '合格']
|
||||
dfCityQulified = dfCityQulified.sort_values(by=subordinate, ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||||
|
||||
# 方法一
|
||||
|
||||
tt4_list = []
|
||||
for index, row in dfCityUnqulified.iterrows():
|
||||
count = ''
|
||||
if row['更新次数']:
|
||||
count = "%d" % row['更新次数']
|
||||
days = ''
|
||||
if row['最大静默日数']:
|
||||
days = "%d" % row['最大静默日数']
|
||||
sD1 = ''
|
||||
sD2 = ''
|
||||
if row['静默开始日期']:
|
||||
sD1 = toDate(str(row['静默开始日期']))
|
||||
if row['静默结束日期']:
|
||||
sD2 = toDate(str(row['静默结束日期']))
|
||||
|
||||
tt4_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||||
'days': days, 'start': sD1, 'end': sD2, }
|
||||
tt4_list.append(tt4_a)
|
||||
tt4_results = {'tt4_contents': tt4_list}
|
||||
context.update(tt4_results)
|
||||
|
||||
tt5_list = []
|
||||
for index, row in dfCityQulified.iterrows():
|
||||
count = ''
|
||||
if row['更新次数']:
|
||||
count = "%d" % row['更新次数']
|
||||
days = ''
|
||||
if row['最大静默日数']:
|
||||
days = "%d" % row['最大静默日数']
|
||||
sD1 = ''
|
||||
sD2 = ''
|
||||
if row['静默开始日期']:
|
||||
sD1 = toDate(str(row['静默开始日期']))
|
||||
if row['静默结束日期']:
|
||||
sD2 = toDate(str(row['静默结束日期']))
|
||||
|
||||
tt5_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||||
'days': days, 'start': sD1, 'end': sD2, }
|
||||
tt5_list.append(tt5_a)
|
||||
tt5_results = {'tt5_contents': tt5_list}
|
||||
context.update(tt5_results)
|
||||
|
||||
# 读取添加错别字表格
|
||||
tCbz_list = []
|
||||
dfcw.fillna('')
|
||||
for index, row in dfcw.iterrows():
|
||||
sTitle = ''
|
||||
sDate = toDate(str(row['发文时间']))
|
||||
if '标题' in dfcw.columns:
|
||||
sTitle = row['标题']
|
||||
|
||||
# 去除引号等干扰表格模板输出的字符
|
||||
r = "[——,$%^,。?、~@#¥%……&*《》<>「」{}【】()/\\\[\]'\"]"
|
||||
if pd.isna(row['错误出现位置']):
|
||||
s = ''
|
||||
else:
|
||||
s = re.sub(r, '', row['错误出现位置'])
|
||||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': s, 'type': row['账号类型'], 'name': row['账号名称'],
|
||||
'date': sDate, 'title': sTitle, }
|
||||
tCbz_list.append(a)
|
||||
if dfcw.shape[0] > 0:
|
||||
dCity['stringCbzCount'] = '本次检测发现错别字%d处,详细情况见附表政务新媒体发布内容错别字统计表。' % (dfcw.shape[0])
|
||||
else:
|
||||
dCity['stringCbzCount'] = '本次检测未发现错别字。'
|
||||
tCbz_results = {'tCbz_contents': tCbz_list}
|
||||
context.update(tCbz_results)
|
||||
|
||||
# 读取添加敏感词表格
|
||||
tMgc_list = []
|
||||
dfcs.fillna('')
|
||||
for index, row in dfcs.iterrows():
|
||||
sTitle = ''
|
||||
sDate = toDate(str(row['发文时间']))
|
||||
if '标题' in dfcs.columns:
|
||||
sTitle = row['标题']
|
||||
|
||||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': fetch_chinese(str(row['错误出现位置'])), 'type': row['账号类型'], 'name': row['账号名称'],
|
||||
'date': sDate, 'title': fetch_chinese(str(sTitle)), }
|
||||
|
||||
tMgc_list.append(a)
|
||||
if dfcs.shape[0] > 0:
|
||||
dCity['stringMgcCount'] = '本次检测发现敏感信息%d处,详细情况见附表政务新媒体发布内容敏感信息统计表。' % (dfcs.shape[0])
|
||||
else:
|
||||
dCity['stringMgcCount'] = '本次检测未发现涉敏内容。'
|
||||
tMgc_results = {'tMgc_contents': tMgc_list}
|
||||
context.update(tMgc_results)
|
||||
|
||||
# table1
|
||||
context.update(dCity)
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按模板生成报告
|
||||
#
|
||||
temp_word(fnTemplate,
|
||||
fnReport,
|
||||
context, dirTemp, city)
|
||||
|
||||
def createDir(dirP, dirS):
|
||||
dirN = dirP
|
||||
if os.path.isdir(dirP):
|
||||
dirN = os.path.join(dirP, dirS)
|
||||
if not (os.path.exists(dirN)):
|
||||
os.mkdir(dirN)
|
||||
if os.path.isdir(dirN):
|
||||
pass
|
||||
else:
|
||||
dirN = dirP
|
||||
print('Directory ' + dirN + ' cannot be created.')
|
||||
return dirN
|
||||
# def createDir(dirP, dirS):
|
||||
|
||||
def summary(info, strFnData, strFnW, strFnS, strfnTemplate, strPathOutput):
|
||||
# 打开监测数据、错别字、敏感词
|
||||
df = pd.read_excel(strFnData)
|
||||
dfW = pd.read_excel(strFnW)
|
||||
dfS = pd.read_excel(strFnS)
|
||||
|
||||
# df.loc[df['账号类型'] == '微信服务号', '账号类型'] = '微信'
|
||||
# df.loc[df['账号类型'] == '微信订阅号', '账号类型'] = '微信'
|
||||
|
||||
# 统一监测结果表述
|
||||
df.loc[df['监测结果'] == '连续两周未更新', '监测结果'] = '超过两周未更新'
|
||||
|
||||
# 过长名称替换为简称,便于绘图
|
||||
df.loc[df['区县/地方部门'] == '积石山保安族东乡族撒拉族自治县', '区县/地方部门'] = '积石山县'
|
||||
df.loc[df['区县/地方部门'] == '阿克塞哈萨克族自治县', '区县/地方部门'] = '阿克塞自治县'
|
||||
|
||||
# 省直、 市直、 州直
|
||||
df['市/省局'] = df['市/省局'].fillna('省直部门')
|
||||
df['区县/地方部门'] = df['区县/地方部门'].fillna('市直部门')
|
||||
df.loc[(df['市/省局'] == '临夏回族自治州') & (df['区县/地方部门'] == '市直部门'), '区县/地方部门'] = '州直部门'
|
||||
|
||||
|
||||
# 数据整理
|
||||
df.replace(r'\s+', '', regex=True, inplace=True) # 去除账号、单位名称中的空格、换行、tab等
|
||||
df.replace(r'^其他\+', '', regex=True, inplace=True) # 去除账号类型中的 "其它" 字样
|
||||
df['更新次数'] = df['更新次数'].fillna(0)
|
||||
df = df.fillna(value='')
|
||||
|
||||
|
||||
#########################################################
|
||||
#
|
||||
# 统计市州范围
|
||||
cities = {'甘肃省', '白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
|
||||
'嘉峪关市', '庆阳市华池县', '庆阳市镇原县', '庆阳市宁县', '陇南市'}
|
||||
#cities = cities | {'甘南藏族自治州', '金昌市', '兰州市', '陇南市', '张掖市', '甘肃省', '省直部门'}
|
||||
#
|
||||
cities = {'甘肃省'} # 只统计特定市州
|
||||
|
||||
# strPathOutput目录下生成报告目录和临时文件目录:Reports 和 Intermediate
|
||||
dirP = os.path.abspath(os.path.dirname(strPathOutput))
|
||||
dirReports = createDir(dirP, 'Reports')
|
||||
dirIntermediate = createDir(dirP, 'Intermediate')
|
||||
for city in cities:
|
||||
summaryCity(info, city, df, dfW, dfS, strfnTemplate, os.path.join(dirReports, city + '.docx'), dirIntermediate)
|
||||
|
||||
# 合并错别字文件
|
||||
def mergeCMC(keyword, strPathCBZ, strFnCbz):
|
||||
# cityShorten
|
||||
cityShorten = {'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
|
||||
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
|
||||
'兰州新区': '兰州新区', '兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
|
||||
'BY': '白银市', 'DX': '定西市', 'JQ': '酒泉市', 'JYG': '嘉峪关市', 'LN': '陇南市',
|
||||
'LX': '临夏回族自治州', 'PL': '平凉市', 'QY': '庆阳市', 'TS': '天水市', 'WW': '武威市', 'XQ': '兰州新区',
|
||||
'LZXQ': '兰州新区', 'LZ': '兰州市', 'ZY': '张掖市', 'GN': '甘南藏族自治州', 'SZ': '省直部门', 'JC': '金昌市', }
|
||||
df = pd.DataFrame()
|
||||
for fn in glob.glob(os.path.join(strPathCBZ, '*'+keyword+'*.xlsx')):
|
||||
p, f = os.path.split(fn)
|
||||
city=''
|
||||
for c in cityShorten.keys():
|
||||
if c in f:
|
||||
city = cityShorten[c]
|
||||
break
|
||||
if len(city)<1:
|
||||
print("!!!!! City Name not matched ( ", f, " )")
|
||||
dfn = pd.read_excel(fn)
|
||||
dfn['市州'] = city
|
||||
df = df.append(dfn, ignore_index=True)
|
||||
print(city, f, dfn.shape[0], '/', df.shape[0])
|
||||
df.to_excel(strFnCbz)
|
||||
#def mergeCMC
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# 运行之前先转换excel文件的日期列
|
||||
|
||||
info = {
|
||||
"year": "2023",
|
||||
"quarter": "三",
|
||||
"dateCN": "二〇二三年三月",
|
||||
"dateStart": "2023年1月1日",
|
||||
"dateEnd": "2023年3月20日",
|
||||
"days": "79",
|
||||
"num": "4",
|
||||
}
|
||||
# 数据根目录,
|
||||
strPath = 'D:/Projects/POM/DATA/2023年S1/'
|
||||
# 监测数据
|
||||
strFnMonitoring = strPath + '汇总/第一季度汇总数据_2023.3.xlsx'
|
||||
# word模板文件
|
||||
strPathTemplate = strPath + 'POM_ReportTemplate.docx'
|
||||
# 错别字
|
||||
strFnCbz = strPath + '汇总/CBZ.xlsx'
|
||||
if not os.path.exists(strFnCbz):# 汇总错别字
|
||||
strPathCBZ = strPath + '监测/'
|
||||
mergeCMC("错别", strPathCBZ, strFnCbz)
|
||||
# 敏感词
|
||||
strFnMgc = strPath + '汇总/MGC.xlsx'
|
||||
if not os.path.exists(strFnMgc):#汇总敏感词
|
||||
strPathMGC = strPath + '监测/'
|
||||
mergeCMC("敏感", strPathMGC, strFnMgc)
|
||||
# 数据目录
|
||||
strPathOutput = strPath + '统计/'
|
||||
|
||||
summary(info, strFnMonitoring, strFnCbz, strFnMgc, strPathTemplate, strPathOutput)
|
|
@ -0,0 +1,48 @@
|
|||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os, glob, re
|
||||
|
||||
|
||||
|
||||
# 合并错别字文件
|
||||
def mergeCMC(keyword, strPathCBZ, strFnCbz):
|
||||
print(1, keyword, strPathCBZ, strFnCbz)
|
||||
print('glob: ', glob.glob(os.path.join(strPathCBZ, '*'+keyword+'*.xlsx')))
|
||||
# cityShorten
|
||||
cityShorten = {'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
|
||||
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
|
||||
'兰州新区': '兰州新区', '兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
|
||||
'BY': '白银市', 'DX': '定西市', 'JQ': '酒泉市', 'JYG': '嘉峪关市', 'LN': '陇南市',
|
||||
'LX': '临夏回族自治州', 'PL': '平凉市', 'QY': '庆阳市', 'TS': '天水市', 'WW': '武威市', 'XQ': '兰州新区',
|
||||
'LZXQ': '兰州新区', 'LZ': '兰州市', 'ZY': '张掖市', 'GN': '甘南藏族自治州', 'SZ': '省直部门', 'JC': '金昌市', }
|
||||
df = pd.DataFrame()
|
||||
for fn in glob.glob(os.path.join(strPathCBZ, '*'+keyword+'*.xlsx')):
|
||||
p, f = os.path.split(fn)
|
||||
print(f)
|
||||
city=''
|
||||
for c in cityShorten.keys():
|
||||
if c in f:
|
||||
city = cityShorten[c]
|
||||
break
|
||||
if len(city)<1:
|
||||
print("!!!!! City Name not matched ( ", f, " )")
|
||||
dfn = pd.read_excel(fn)
|
||||
dfn['市州'] = city
|
||||
df = df.append(dfn, ignore_index=True)
|
||||
print(city, f, dfn.shape[0], '/', df.shape[0])
|
||||
df.to_excel(strFnCbz)
|
||||
|
||||
strPath = 'D:/Projects/POM/DATA/2023年3月/3月29日错敏词/敏感词/'
|
||||
|
||||
# 错别字
|
||||
strFnCbz = strPath + '../汇总/CBZ.xlsx'
|
||||
if(os.path.isfile(strFnCbz)):
|
||||
os.remove(strFnCbz)
|
||||
mergeCMC("错别", strPath, strFnCbz)
|
||||
|
||||
# 敏感词
|
||||
strFnMgc = strPath + '../汇总/MGC.xlsx'
|
||||
if(os.path.isfile(strFnMgc)):
|
||||
os.remove(strFnMgc)
|
||||
mergeCMC("敏感", strPath, strFnMgc)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,41 @@
|
|||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os, glob, re
|
||||
|
||||
strPath = 'D:/Projects/POM/DATA/2023年3月/两会/敏感词3.8/'
|
||||
strFn = strPath + 'merged.xlsx'
|
||||
if(os.path.isfile(strFn)):
|
||||
os.remove(strFn)
|
||||
|
||||
|
||||
# 合并错别字文件
|
||||
def excelMerge(sPath, sFn):
|
||||
fs = glob.glob(os.path.join(sPath, '*.xlsx'))
|
||||
|
||||
df = pd.DataFrame()
|
||||
for fn in fs:
|
||||
p, f = os.path.split(fn)
|
||||
dfn = pd.read_excel(fn)
|
||||
# 添加市州列
|
||||
if not '市州' in dfn.columns:
|
||||
cityShorten = {'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
|
||||
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
|
||||
'兰州新区': '兰州新区', '兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
|
||||
'BY': '白银市', 'DX': '定西市', 'JQ': '酒泉市', 'JYG': '嘉峪关市', 'LN': '陇南市',
|
||||
'LX': '临夏回族自治州', 'PL': '平凉市', 'QY': '庆阳市', 'TS': '天水市', 'WW': '武威市', 'XQ': '兰州新区',
|
||||
'LZXQ': '兰州新区', 'LZ': '兰州市', 'ZY': '张掖市', 'GN': '甘南藏族自治州', 'SZ': '省直部门', 'JC': '金昌市', }
|
||||
city=''
|
||||
for c in cityShorten.keys():
|
||||
if c in f:
|
||||
city = cityShorten[c]
|
||||
break
|
||||
if len(city)<1:
|
||||
print("!!!!! City Name not matched ( ", f, " )")
|
||||
dfn['市州'] = city
|
||||
|
||||
df = df.append(dfn, ignore_index=True)
|
||||
print(f, ' ', dfn.shape[0], '/', df.shape[0] )
|
||||
df.to_excel(sFn, index=False)
|
||||
|
||||
excelMerge(strPath, strFn)
|
|
@ -0,0 +1,20 @@
|
|||
import pandas as pd
|
||||
s = '2023-03-06_11.34.42'
|
||||
strP = 'D:/Projects/POM/DATA/search/' + s + '/'
|
||||
strFn = s + '_mm.xlsx'
|
||||
|
||||
|
||||
strP = 'D:/Projects/POM/DATA/2023年3月/两会/敏感词3月13日/上报/'
|
||||
strFn = 'MGC2023.3.13.xlsx'
|
||||
|
||||
df = pd.read_excel(strP+strFn)
|
||||
print(strP+strFn)
|
||||
print(df.shape)
|
||||
cities = df['市州'].unique()
|
||||
print(cities)
|
||||
for city in cities:
|
||||
print(city)
|
||||
dft = df[df['市州'].isin([city])]
|
||||
print(dft.shape)
|
||||
# exec("df%s = dft"%cityNum)
|
||||
dft.to_excel(strP + city+'.xlsx', index= False)
|
|
@ -0,0 +1,21 @@
|
|||
import pandas as pd
|
||||
|
||||
strP = 'D:/Projects/POM/DATA/2023年3月/3月13日错敏词/'
|
||||
strFn1 = '敏感词.xlsx'
|
||||
strFn2 = '错别字.xlsx'
|
||||
sheets1 = pd.read_excel(strP+strFn1, sheet_name=None)
|
||||
sheets2 = pd.read_excel(strP+strFn2, sheet_name=None)
|
||||
sheetnames = sheets1.keys()
|
||||
|
||||
if len(sheets2.keys()) > len(sheets1.keys()) :
|
||||
sheets = sheets2
|
||||
d = pd.DataFrame()
|
||||
for name in sheetnames:
|
||||
v = pd.DataFrame()
|
||||
if name in sheets1.keys():
|
||||
v = pd.concat([v, sheets1[name]], axis=0)
|
||||
|
||||
if name in sheets2.keys():
|
||||
v = pd.concat([v, sheets2[name]], axis=0)
|
||||
p = pd.concat([d, ])
|
||||
v.to_excel(strP+name+'.xlsx', index=False)
|
|
@ -0,0 +1,541 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import seaborn as sns
|
||||
import datetime, time
|
||||
import matplotlib.pyplot as plt
|
||||
import re,os
|
||||
|
||||
|
||||
regIDCard = r"\d{18}|\d{17}[X|x]"
|
||||
regCellPhone = r"1[3584]\d{9}"
|
||||
regSTR = '习近平总同志|习近同志|习近总书记|习平总书记|习近平主义|习总同志' + \
|
||||
'|习近平治国理政|中国是现代化' + \
|
||||
'|中华人名|中共民族|名族|中央人名|中华民主' + \
|
||||
'|中共共产党|中国共产党党章' + \
|
||||
'|伟大复习|建档伟业|建档百年' + \
|
||||
'|二十大大|二十精神|二十大开幕式|中国共产党第二十次代表大会|党二十大|第二十次全国人民代表大会' + \
|
||||
'|建党七十三周年|共产党成立七十三周年' + \
|
||||
'|大人代表|大人常委会|人大常委主任' + \
|
||||
'|爱爱服务|抗议英雄|反炸中心'
|
||||
|
||||
paths = [
|
||||
'D:/Projects/POM/DATA/2022年10月/9月报告/全文/',
|
||||
'D:/Projects/POM/DATA/2022年9月/8月报告/全文/',
|
||||
'D:/Projects/POM/DATA/2022年8月/7月报告/全文/',
|
||||
'D:/Projects/POM/DATA/2022年7月/6月报告/全文/',
|
||||
'D:/Projects/POM/DATA/2022年6月/5月报告/全文/',
|
||||
'D:/Projects/POM/DATA/2022年5月/4月报告/全文/',
|
||||
]
|
||||
"""
|
||||
'''
|
||||
#'D:/Projects/POM/DATA/2022年11月/10月报告/全文/',
|
||||
#'D:/Projects/POM/DATA/2022年12月/11月报告/全文/',
|
||||
#'D:/Projects/POM/DATA/2023年1月/12月报告/全文/',
|
||||
#'D:/Projects/POM/DATA/2023年2月/1月报告/全文/',
|
||||
#'D:/Projects/POM/DATA/2023年3月/2月报告/全文/',
|
||||
'''
|
||||
'''
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/全文数据3月6日',
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/全文数据3月7日',
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/全文数据3月8日',
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/全文数据3月9日',
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/全文数据3月9日',
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/全文数据3月10日',
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/全文数据3月11日',
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/全文数据3月12日',
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/全文数据3月13日',
|
||||
''' """
|
||||
pathO = 'D:/Projects/POM/DATA/search/'
|
||||
doWX = True
|
||||
doWB = True
|
||||
doTT = True
|
||||
|
||||
splitByCity = True
|
||||
|
||||
#监测已发现的敏感词
|
||||
fFound = [
|
||||
#'D:/Projects/POM/DATA/2023年3月/2月报告/汇总/mgc.xlsx',
|
||||
#'D:/Projects/POM/DATA/2023年2月/1月报告/汇总/mgc.xlsx',
|
||||
#'D:/Projects/POM/DATA/2023年1月/12月报告/汇总/mgc.xlsx',
|
||||
#'D:/Projects/POM/DATA/2022年12月/11月报告/汇总/mgc.xlsx',
|
||||
#'D:/Projects/POM/DATA/2022年11月/10月报告/汇总/mgc.xlsx',
|
||||
#'D:/Projects/POM/DATA/2022年10月/9月报告/汇总/mgc.xlsx',
|
||||
#'D:/Projects/POM/DATA/2022年9月/8月报告/汇总/mgc.xlsx',
|
||||
#'D:/Projects/POM/DATA/2022年8月/7月报告/汇总/mgc.xlsx',
|
||||
#'D:/Projects/POM/DATA/2022年7月/6月报告/汇总/mgc.xlsx'
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/汇总/敏感词2023.3.5.xlsx',
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/汇总/敏感词2023.3.6.xlsx',
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/汇总/敏感词2023.3.7.xlsx',
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/汇总/敏感词2023.3.9.xlsx',
|
||||
'D:/Projects/POM/DATA/2023年3月/两会/汇总/敏感词扫描结果.xlsx' ]
|
||||
|
||||
#通报结果
|
||||
fInformed = ['D:/Projects/POM/DATA/国办通报/20230303错敏词.xlsx',
|
||||
'D:/Projects/POM/DATA/国办通报/20230304错敏词.xlsx']
|
||||
|
||||
|
||||
######################################################################################
|
||||
def getWBData(path, hasBody=False):
|
||||
dictC = {'LZ':'兰州市', 'LX':'临夏回族自治州', 'JC':'金昌市', 'ZY':'张掖市', 'LN':'陇南市', 'JYG':'嘉峪关市', 'TS':'天水市',
|
||||
'GN':'甘南藏族自治州', 'BY':'白银市', 'JQ':'酒泉市', 'QY':'庆阳市', 'PL':'平凉市', 'DX':'定西市', 'WW':'武威市', 'SZ':'省直部门',
|
||||
'lz': '兰州市', 'lx': '临夏回族自治州', 'jc': '金昌市', 'zy': '张掖市', 'ln': '陇南市', 'jyg': '嘉峪关市', 'ts': '天水市',
|
||||
'gn': '甘南藏族自治州', 'by': '白银市', 'jq': '酒泉市', 'qy': '庆阳市', 'pl': '平凉市', 'dx': '定西市', 'ww': '武威市', 'sz': '省直部门'
|
||||
}
|
||||
strC = ''
|
||||
for k,v in dictC.items():
|
||||
if k in path:
|
||||
strC = v
|
||||
break
|
||||
print('-----------------------------------')
|
||||
print('CITY =', strC )
|
||||
|
||||
dirCs = os.listdir(path)
|
||||
cs = ['微博id', '微博正文', '头条文章url', '原始图片url', '被转发微博原始图片url', '是否为原创微博', '微博视频url', '发布位置', '发布时间', '发布工具', '点赞数',
|
||||
'转发数', '评论数', 'weiboID', '账号名称', '市州']
|
||||
dfWB = pd.DataFrame(columns=cs)
|
||||
for dirC in dirCs:
|
||||
#第一层,不是目录或者目录名里有‘weixin’,跳过;
|
||||
sc = os.path.join(path, dirC)
|
||||
if not os.path.isdir(sc):
|
||||
continue
|
||||
if 'weixin' in dirC.lower():
|
||||
continue
|
||||
if 'tt' in dirC.lower():
|
||||
continue
|
||||
|
||||
# 时段 weibo weibo_1
|
||||
cols = ['微博id', '微博正文', '头条文章url', '原始图片url', '被转发微博原始图片url', '是否为原创微博', '微博视频url', '发布位置', '发布时间', '发布工具', '点赞数',
|
||||
'转发数', '评论数'] #WB下载工具中的格式
|
||||
dfWBC = pd.DataFrame(columns=cols)
|
||||
dirCTs = os.listdir(sc)
|
||||
for dirCT in dirCTs:
|
||||
# 账号名称 清水司法
|
||||
sct = os.path.join(path, dirC, dirCT)
|
||||
if not os.path.isdir(sct):
|
||||
continue
|
||||
if 'weixin' in dirC.lower():
|
||||
continue
|
||||
if 'tt' in dirC.lower():
|
||||
continue
|
||||
#print('--',dirCT)
|
||||
# 账号名称
|
||||
wbName = dirCT
|
||||
dirAs = os.listdir(sct)
|
||||
for dirA in dirAs:
|
||||
scta = os.path.join(path, dirC, dirCT, dirA)
|
||||
# 文件名
|
||||
fileAs = scta
|
||||
if len(fileAs) > 0 and os.path.splitext(fileAs)[1] == '.csv':
|
||||
wbId = dirA[:-4]
|
||||
# 读取文件
|
||||
#########print('----',wbName, wbId)
|
||||
dfdfwb = pd.read_csv(fileAs, sep=',', header=None, names=cols,
|
||||
index_col=None)#, engine='python', encoding='gbk'#utf-8
|
||||
dfdfwb = dfdfwb[1:]
|
||||
dfdfwb["weiboID"] = wbId
|
||||
dfdfwb["账号名称"] = wbName
|
||||
|
||||
dfWBC = dfWBC.append(dfdfwb)
|
||||
print('.', end='')
|
||||
#if len(fileAs)>1:
|
||||
# print(" +=+= ", fileAs)
|
||||
|
||||
#print(dfWBC.shape)
|
||||
#dfWBC.to_excel("D:/Projects/POM/2021年6月/二季度/全文/WB/WB_"+dirC+".xlsx")cl
|
||||
#print(dirC)
|
||||
#print( dictC[dirC])
|
||||
#print( dfWBC['市州'])
|
||||
dfWBC['市州'] = strC
|
||||
dfWB = dfWB.append(dfWBC)
|
||||
print(' ')
|
||||
#print('-',dirC, dfWB.shape[0])
|
||||
print('-',dirC, dfWBC.shape[0])
|
||||
print('',dfWB.shape[0])
|
||||
#dfWB.to_excel("D:/Projects/POM/2021年7月/2021年上半年/WB_ALL.xlsx")
|
||||
return dfWB
|
||||
|
||||
def getWBData_Province(path, hasBody=False):
|
||||
dirCs = os.listdir(path)
|
||||
cs = ['微博id', '微博正文', '头条文章url', '原始图片url', '被转发微博原始图片url', '是否为原创微博', '微博视频url', '发布位置', '发布时间', '发布工具', '点赞数',
|
||||
'转发数', '评论数', 'weiboID', '账号名称', '市州']
|
||||
dfWB = pd.DataFrame(columns=cs)
|
||||
for dirC in dirCs:
|
||||
sc = os.path.join(path, dirC)
|
||||
if not os.path.isdir(sc):
|
||||
continue
|
||||
dfWB = dfWB.append(getWBData(sc, hasBody))
|
||||
dfWB['标题']=''
|
||||
dfWB.rename(columns={"微博正文": "内容", "发布时间": "日期"},inplace=True)
|
||||
return dfWB
|
||||
|
||||
def getWXData_Province(path, hasBody=False):
|
||||
cs = ['公众号', '链接', '日期', '标题', '内容', '阅读数', '在看数', '点赞数', 'get_time', '头条',]
|
||||
dfWX = pd.DataFrame(columns=cs)
|
||||
|
||||
dictC = {'LZ':'兰州市', 'LX':'临夏回族自治州', 'JC':'金昌市', 'ZY':'张掖市', 'LN':'陇南市', 'JYG':'嘉峪关市', 'TS':'天水市',
|
||||
'GN':'甘南藏族自治州', 'BY':'白银市', 'JQ':'酒泉市', 'QY':'庆阳市', 'PL':'平凉市', 'DX':'定西市', 'WW':'武威市', 'SZ':'省直部门',
|
||||
'lz': '兰州市', 'lx': '临夏回族自治州', 'jc': '金昌市', 'zy': '张掖市', 'ln': '陇南市', 'jyg': '嘉峪关市', 'ts': '天水市',
|
||||
'gn': '甘南藏族自治州', 'by': '白银市', 'jq': '酒泉市', 'qy': '庆阳市', 'pl': '平凉市', 'dx': '定西市', 'ww': '武威市', 'sz': '省直部门'
|
||||
}
|
||||
files=[]
|
||||
dirCs = os.listdir(path)
|
||||
for dirC in dirCs:
|
||||
if dirC[:1]=='.':
|
||||
continue
|
||||
sc = os.path.join(path, dirC)
|
||||
# 判断市州名称
|
||||
strC = ''
|
||||
for k,v in dictC.items():
|
||||
if k in sc:
|
||||
strC = v
|
||||
break
|
||||
|
||||
print('-', strC)
|
||||
# 市州 dirC PL
|
||||
if os.path.isdir(sc):
|
||||
dirCCs = os.listdir(sc)
|
||||
for dirCC in dirCCs:
|
||||
scc = os.path.join(sc,dirCC)
|
||||
# 文件
|
||||
if dirCC[:1]=='.':
|
||||
continue
|
||||
if not os.path.isdir(scc):
|
||||
#print(dirCC, dirCC[-5:] )
|
||||
if dirCC[-5:]=='.xlsx' or dirCC[-4:]=='.xls':
|
||||
files.append(scc)
|
||||
dfcc = pd.read_excel(scc)
|
||||
dfcc['市州'] = strC
|
||||
dfWX = dfWX.append(dfcc)
|
||||
print(' ', dirCC, dfcc.shape[0])
|
||||
else:
|
||||
print('something error 01: ', dirCC)
|
||||
else:
|
||||
if dirC[-5:]=='.xlsx' or dirC[-4:]=='.xls':
|
||||
files.append(sc)
|
||||
dfc = pd.read_excel(sc)
|
||||
dfcc['市州'] = strC
|
||||
dfWX = dfWX.append(dfc)
|
||||
print(' ', dirC, dfc.shape[0])
|
||||
else:
|
||||
print('something error 02')
|
||||
print(' ', dfWX.shape[0])
|
||||
print('ALL WX data', dfWX.shape[0])
|
||||
return dfWX
|
||||
|
||||
|
||||
# 从数据目录中读取xlsx文件,拼接到一起
|
||||
def getTTData(path, cities, hasBody=False):
|
||||
# cityShorten
|
||||
cityShorten = {'LZ':'兰州市', 'LX':'临夏回族自治州', 'JC':'金昌市', 'ZY':'张掖市', 'LN':'陇南市', 'JYG':'嘉峪关市',
|
||||
'TS':'天水市', 'GN':'甘南藏族自治州', 'BY':'白银市', 'JQ':'酒泉市', 'QY':'庆阳市', 'PL':'平凉市',
|
||||
'DX':'定西市', 'WW':'武威市', 'SZ':'省直部门', 'XQ': '兰州新区', 'LZXQ': '兰州新区',
|
||||
|
||||
'lz': '兰州市', 'lx': '临夏回族自治州', 'jc': '金昌市', 'zy': '张掖市', 'ln': '陇南市', 'jyg': '嘉峪关市',
|
||||
'ts': '天水市', 'gn': '甘南藏族自治州', 'by': '白银市', 'jq': '酒泉市', 'qy': '庆阳市', 'pl': '平凉市',
|
||||
'dx': '定西市', 'ww': '武威市', 'sz': '省直部门', 'xq': '兰州新区', 'lzxq': '兰州新区',
|
||||
|
||||
'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
|
||||
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
|
||||
'兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
|
||||
}
|
||||
dirCs = os.listdir(path)
|
||||
#account date title nread ncomment content url origin
|
||||
cs = ['account', 'date', 'title', 'nread', 'ncomment', 'content', 'url', 'origin', 'city']
|
||||
|
||||
dfTT = pd.DataFrame(columns=cs)
|
||||
cityCount = 0
|
||||
for dirC in dirCs:
|
||||
if dirC[:1] == '.' \
|
||||
or not os.path.isdir(os.path.join(path, dirC)) \
|
||||
or 'weixin' in dirC.lower() \
|
||||
or 'weibo' in dirC.lower() \
|
||||
or not cityShorten[dirC] in cities:
|
||||
continue
|
||||
cityCount += 1
|
||||
# City LN
|
||||
dfTTC = pd.DataFrame(columns=cs)
|
||||
dirCTs = os.listdir(os.path.join(path, dirC))
|
||||
for dirCT in dirCTs:
|
||||
if dirCT[:1] == '.' \
|
||||
or not os.path.isdir(os.path.join(path, dirC, dirCT)) \
|
||||
or 'weixin' in dirCT.lower() \
|
||||
or 'weibo' in dirCT.lower():
|
||||
continue
|
||||
if 'tt' in dirCT.lower() or dirC.lower() in dirCT.lower():
|
||||
fns = os.listdir(os.path.join(path, dirC, dirCT))
|
||||
numc = 0
|
||||
accounts = set()
|
||||
for fn in fns:
|
||||
if os.path.isdir(os.path.join(path, dirC, dirCT, fn)):
|
||||
print(' >>',fn)
|
||||
sds = os.listdir(os.path.join(path, dirC, dirCT, fn))
|
||||
for sd in sds:
|
||||
if sd[:1] == '.' or not sd[-5:] == '.xlsx' or sd.count('_') < 2:
|
||||
continue
|
||||
|
||||
ttName = sd[sd.index('_')+1:]
|
||||
ttName = ttName[:ttName.index('_')]
|
||||
fileAs = os.path.join(path, dirC, dirCT, fn, sd)
|
||||
#print(' ', ttName, fileAs)
|
||||
if len(fileAs) > 0:
|
||||
dfdftt = pd.read_excel(fileAs)
|
||||
dfTTC = dfTTC.append(dfdftt, ignore_index=True)
|
||||
numc = numc+1
|
||||
accounts.add(ttName)
|
||||
print('.', end='')
|
||||
print(' ')
|
||||
|
||||
#
|
||||
if fn[:1] == '.' or not fn[-5:] == '.xlsx' or fn.count('_') < 2:
|
||||
continue
|
||||
#print('---',fn)
|
||||
# 账号名称
|
||||
ttName = fn[fn.index('_')+1:]
|
||||
ttName = ttName[:ttName.index('_')]
|
||||
fileAs = os.path.join(path, dirC, dirCT, fn)
|
||||
#print(' ', ttName, fileAs)
|
||||
if len(fileAs) > 0:
|
||||
try:
|
||||
dfdftt = pd.read_excel(fileAs)
|
||||
except:
|
||||
print('')
|
||||
print("!!!!!!! 读取头条文件出错: ", fileAs)
|
||||
|
||||
if not dfdftt.empty:
|
||||
dfTTC = dfTTC.append(dfdftt, ignore_index=True)
|
||||
numc = numc+1
|
||||
accounts.add(ttName)
|
||||
print('.', end='')
|
||||
print(' ')
|
||||
print(' +', cityShorten[dirC], 'dir:', dirC, '/', dirCT, '账号数', len(accounts),'文件数', numc, '文章数', dfTTC.shape[0])
|
||||
dfTTC['city'] = cityShorten[dirC]
|
||||
dfTT = dfTT.append(dfTTC)
|
||||
|
||||
print('Read TT DIR finished. cities', cityCount, '; lines', dfTT.shape)
|
||||
#dfTT.to_excel("D:/Projects/POM/2021年7月/2021年上半年/WB_ALL.xlsx")
|
||||
return dfTT
|
||||
|
||||
|
||||
#######################################################
|
||||
#######################################################
|
||||
|
||||
|
||||
t0 = datetime.datetime.now()
|
||||
df = pd.DataFrame()
|
||||
|
||||
# WX
|
||||
if doWX:
|
||||
dfWX = pd.DataFrame()
|
||||
for path in paths:
|
||||
ddff = getWXData_Province(path)
|
||||
print(' read WX data', ddff.shape)
|
||||
dfWX = dfWX.append(ddff)
|
||||
print('WX data ', dfWX.shape)
|
||||
|
||||
# 查找关键词
|
||||
dfwxd = dfWX[['市州', '公众号', '日期', '标题', '链接', '内容', '阅读数']][dfWX['内容'].str.contains(regSTR, regex=True, na=False)]
|
||||
dfwxd['类型'] = '微信'
|
||||
dfwxd['关键词']=''
|
||||
dfwxd['上下文']=''
|
||||
print("Found ", dfwxd.shape)
|
||||
|
||||
# 提取上下文
|
||||
for i,r in dfwxd.iterrows():
|
||||
string = str(r['内容'])
|
||||
its = re.finditer(regSTR, string)
|
||||
sk = ''
|
||||
sp = ''
|
||||
for it in its:
|
||||
s=0
|
||||
e=len(string)
|
||||
d = 5
|
||||
if it.start()>d:
|
||||
s = it.start()-d
|
||||
if (it.end()< e-d):
|
||||
e = it.end()+d
|
||||
sk += it.group() + ';'
|
||||
sp += string[s:e] + ';'
|
||||
|
||||
dfwxd.loc[i,'关键词'] = sk[:-1]
|
||||
dfwxd.loc[i,'上下文'] = sp[:-1]
|
||||
|
||||
dfwxd.rename(columns={"阅读数": "阅读数/评论数", "公众号": "账号名称"},inplace=True)
|
||||
dfwxd = dfwxd[['关键词', '上下文', '日期', '市州', '类型', '账号名称', '链接', '标题', '阅读数/评论数', '内容',]]
|
||||
df = df.append(dfwxd)
|
||||
|
||||
# WB
|
||||
if doWB:
|
||||
dfWB = pd.DataFrame()
|
||||
for path in paths:
|
||||
dfWBff = getWBData_Province(path)
|
||||
print('read WB data', dfWBff.shape)
|
||||
dfWB = dfWB.append(dfWBff)
|
||||
|
||||
print("WB Data ", dfWB.shape)
|
||||
|
||||
|
||||
# 查找关键词
|
||||
dfwbd = dfWB[['市州', '账号名称', '标题', '日期', '评论数', '内容']][dfWB['内容'].str.contains(regSTR, regex=True, na=False)]
|
||||
dfwbd['类型'] = '微博'
|
||||
dfwbd['关键词'] = ''
|
||||
dfwbd['上下文'] = ''
|
||||
print("WB Found ", dfwbd.shape)
|
||||
|
||||
# 提取关键词上下文
|
||||
for i, r in dfwbd.iterrows():
|
||||
string = str(r['内容'])
|
||||
its = re.finditer(regSTR, string)
|
||||
sk = ''
|
||||
sp = ''
|
||||
for it in its:
|
||||
s = 0
|
||||
e = len(string)
|
||||
d = 5
|
||||
if it.start() > d:
|
||||
s = it.start() - d
|
||||
if (it.end() < e - d):
|
||||
e = it.end() + d
|
||||
sk += it.group() + ';'
|
||||
sp += string[s:e] + ';'
|
||||
dfwbd.loc[i, '关键词'] = sk
|
||||
dfwbd.loc[i, '上下文'] = sp
|
||||
|
||||
dfwbd.rename(columns={"评论数": "阅读数/评论数"},inplace=True)
|
||||
dfwbd = dfwbd[['关键词', '上下文', '日期', '市州', '类型', '账号名称', '标题', '阅读数/评论数', '内容',]]
|
||||
df = df.append(dfwbd)
|
||||
|
||||
#######################################################
|
||||
# TT
|
||||
if doTT:
|
||||
cities = [
|
||||
'临夏回族自治州',
|
||||
'白银市',
|
||||
'定西市',
|
||||
'酒泉市',
|
||||
'嘉峪关市',
|
||||
'平凉市',
|
||||
'庆阳市',
|
||||
'天水市',
|
||||
'武威市',
|
||||
'兰州新区',
|
||||
'陇南市',
|
||||
'兰州市', '张掖市', '甘南藏族自治州', '金昌市',
|
||||
'省直部门', # 共12市2州1新区
|
||||
]
|
||||
|
||||
dfTT = pd.DataFrame()
|
||||
for strP in paths:
|
||||
print("read TT data ", strP)
|
||||
ddff = getTTData(strP, cities)
|
||||
dfTT = dfTT.append(ddff)
|
||||
|
||||
print("TT data", dfTT.shape)
|
||||
#account date title nread ncomment content url origin city
|
||||
|
||||
# 查找关键词
|
||||
dfttd = dfTT[['city', 'account', 'date', 'title', 'url', 'content', 'nread']][dfTT['content'].str.contains(regSTR, regex=True, na=False)]
|
||||
dfttd['类型'] = '头条'
|
||||
dfttd['关键词']=''
|
||||
dfttd['上下文']=''
|
||||
print("Found ", dfttd.shape)
|
||||
|
||||
# 提取上下文
|
||||
for i,r in dfttd.iterrows():
|
||||
string = str(r['content'])
|
||||
its = re.finditer(regSTR, string)
|
||||
sk = ''
|
||||
sp = ''
|
||||
for it in its:
|
||||
s=0
|
||||
e=len(string)
|
||||
d = 5
|
||||
if it.start()>d:
|
||||
s = it.start()-d
|
||||
if (it.end()< e-d):
|
||||
e = it.end()+d
|
||||
sk += it.group() + ';'
|
||||
sp += string[s:e] + ';'
|
||||
|
||||
dfttd.loc[i,'关键词'] = sk[:-1]
|
||||
dfttd.loc[i,'上下文'] = sp[:-1]
|
||||
|
||||
dfttd.rename(columns={'city': "市州", 'account': "账号名称", 'date': "日期", 'title': "标题", 'url':'链接', 'content': "内容", "nread": "阅读数/评论数"},inplace=True)
|
||||
dfttd = dfttd[['关键词', '上下文', '日期', '市州', '类型', '账号名称', '链接', '标题', '阅读数/评论数', '内容',]]
|
||||
df = df.append(dfttd)
|
||||
|
||||
|
||||
#################################
|
||||
print('扫描完成,发现敏感词', df.shape[0])
|
||||
|
||||
df['date'] = pd.to_datetime(df['日期'])
|
||||
df['identifier'] = df['账号名称'].map(str) + '_' + df['date'].map(lambda x:str(x.year)+str(x.month).rjust(2,'0')+str(x.day).rjust(2,'0'))
|
||||
|
||||
|
||||
print('读取已发现的敏感词文件')
|
||||
dfFound = pd.DataFrame()
|
||||
for f in fFound:
|
||||
dff = pd.read_excel(f)
|
||||
print(' ', f, dff.shape)
|
||||
dfFound = dfFound.append(dff, ignore_index=True)
|
||||
#错误 建议 账号类型 账号名称 错误出现位置 发文时间 标题 市州
|
||||
dfFound['date'] = pd.to_datetime(dfFound['发文时间'])
|
||||
dfFound['identifier'] = dfFound['账号名称'].map(str) + '_' + dfFound['date'].map(lambda x:str(x.year)+str(x.month).rjust(2,'0')+str(x.day).rjust(2,'0'))
|
||||
print(' 共', dfFound.shape[0])
|
||||
|
||||
|
||||
print('读取国办通报数据')
|
||||
dfInformed = pd.DataFrame()
|
||||
for f in fInformed:
|
||||
dfff = pd.read_excel(f)
|
||||
print(' ', f, dfff.shape[0])
|
||||
dfInformed = dfInformed.append(dfff, ignore_index=True)
|
||||
#序号 所属省份/部委 市州 公众号名称 运营主体 文章标题 文章链接 文章发布时间 是否转办 问题摘要 整改情况 监测情况
|
||||
dfInformed['date'] = pd.to_datetime(dfInformed['文章发布时间'])
|
||||
dfInformed['identifier'] = dfInformed['公众号名称'].map(str) + '_' + dfInformed['date'].map(lambda x:str(x.year)+str(x.month).rjust(2,'0')+str(x.day).rjust(2,'0'))
|
||||
print(' 共', dfInformed.shape[0])
|
||||
|
||||
|
||||
print('标记被监测出的和被通报的')
|
||||
#扫描结果中标记被监测出的和被通报的
|
||||
df['监测'] = df['identifier'].isin(dfFound['identifier'])
|
||||
df['通报'] = df['identifier'].isin(dfInformed['identifier'])
|
||||
|
||||
'''
|
||||
print('监测结果中标记被扫描出的和被通报的')
|
||||
#监测结果中标记被扫描出的和被通报的
|
||||
dfFound['扫描'] = dfFound['identifier'].isin(dfScan['identifier'])
|
||||
dfFound['通报'] = dfFound['identifier'].isin(dfInformed['identifier'])
|
||||
|
||||
|
||||
print('通报结果中标记被扫描出的和被监测出的')
|
||||
#通报结果中标记被扫描出的和被监测出的
|
||||
dfInformed['扫描'] = dfInformed['identifier'].isin(dfScan['identifier'])
|
||||
dfInformed['监测'] = dfInformed['identifier'].isin(dfFound['identifier'])
|
||||
'''
|
||||
print('去掉已标记内容')
|
||||
dfO = df.loc[(df['监测']==False) & (df['通报']==False)].copy()
|
||||
print(dfO.shape)
|
||||
|
||||
|
||||
#################################
|
||||
#输出
|
||||
sss = datetime.datetime.now().strftime("%Y-%m-%d_%H.%M.%S")
|
||||
pathO = pathO + sss + '/'
|
||||
if not os.path.exists(pathO):
|
||||
os.makedirs(pathO)
|
||||
dfO.drop(['date', 'identifier', '监测', '通报'],axis=1, inplace=True)
|
||||
dfO.to_excel(pathO + sss + ".xlsx", index=False)
|
||||
|
||||
|
||||
if splitByCity:
|
||||
print('按市州输出')
|
||||
cities = dfO['市州'].unique()
|
||||
for city in cities:
|
||||
dft = dfO[dfO['市州'].isin([city])]
|
||||
# exec("df%s = dft"%cityNum)
|
||||
dft.to_excel(pathO + city+'.xlsx', index=False)
|
||||
|
||||
|
||||
#计时
|
||||
t = datetime.datetime.now() - t0
|
||||
print('用时{}时{}分{}秒'.format(int(t.seconds/3600),int(t.seconds/60), t.seconds%60))
|
|
@ -0,0 +1,238 @@
|
|||
import http.client
|
||||
from urllib import parse
|
||||
import json
|
||||
|
||||
def tpl_send_sms(apikey, tpl_id, tpl_value, mobile):
|
||||
"""
|
||||
模板接口发短信
|
||||
"""
|
||||
params = parse.urlencode({
|
||||
'apikey': apikey,
|
||||
'tpl_id': tpl_id,
|
||||
'tpl_value': parse.urlencode(tpl_value),
|
||||
'mobile': mobile
|
||||
})
|
||||
headers = {
|
||||
"Content-type": "application/x-www-form-urlencoded",
|
||||
"Accept": "text/plain"
|
||||
}
|
||||
conn = http.client.HTTPSConnection(sms_host, port=port, timeout=30)
|
||||
conn.request("POST", sms_tpl_send_uri, params, headers)
|
||||
response = conn.getresponse()
|
||||
response_str = response.read()
|
||||
conn.close()
|
||||
return response_str
|
||||
|
||||
def send_sms(apikey, text, mobile):
|
||||
"""
|
||||
通用接口发短信
|
||||
"""
|
||||
params = parse.urlencode({'apikey': apikey, 'text': text, 'mobile':mobile})
|
||||
headers = {
|
||||
"Content-type": "application/x-www-form-urlencoded",
|
||||
"Accept": "text/plain"
|
||||
}
|
||||
conn = http.client.HTTPSConnection(sms_host, port=port, timeout=30)
|
||||
conn.request("POST", sms_send_uri, params, headers)
|
||||
response = conn.getresponse()
|
||||
response_str = response.read()
|
||||
conn.close()
|
||||
return response_str
|
||||
|
||||
def sendMessage(apikey = "304eb08353f7ebf00596737acfc31f53"):
|
||||
# 模板
|
||||
tpl_id = 4621614
|
||||
# 【甘肃大未来科技】为提高政务新媒体监测服务效果,提升预警时效性和精准性,
|
||||
# 从7月1日起我公司将预警周期由10日调整为7日。感谢您对甘肃大未来的信任和支持。
|
||||
|
||||
# 电话号码
|
||||
dictGS = {
|
||||
'szq': '13359446622',
|
||||
'zyb': '13609346975'
|
||||
}
|
||||
dictCities = {
|
||||
'天水市': {'王慧': '18706936366', '王肖肖': '17793816150'},
|
||||
'白银市': {'高雅丽': '15393391905', '范小强': '13639306533', '张静静': '13830021006'},
|
||||
'定西市': {'党辉': '18893219695', '高刚': '18993265998'},
|
||||
'酒泉市': {'白苍松白秘书长': '13909371177', '吴建平': '13389370534'},
|
||||
'临夏州': {'周世泽': '13830103221', '马清明': '13993012391', '马静': '13993096392'},
|
||||
'平凉市': {'雷勇': '13809330195', '梁文芬': '13993366938'},
|
||||
'嘉峪关市': {'彭松涛':'18893605128'},
|
||||
'庆阳市': {'孙德勋': '13909342931', '闫红': '18993490882'},
|
||||
'华池县': {'李银粉': '13884192323'},
|
||||
'宁县': {'张虎帅': '13993434900'},
|
||||
'镇原县': {'刘主任': '13994327967', '苟罗文': '15268989815'},
|
||||
}
|
||||
|
||||
# 批量发送
|
||||
# (将通知文本向所有号码逐一发送)
|
||||
dictCities['大未来'] = dictGS
|
||||
for dGSk in dictCities.keys():
|
||||
print('----', dGSk)
|
||||
# dictCities[dGSk].update(dictGS)
|
||||
for dk in dictCities[dGSk].keys():
|
||||
sss = tpl_send_sms(apikey, tpl_id, '', dictCities[dGSk][dk])
|
||||
print(' ', dk, dictCities[dGSk][dk], sss.decode('utf-8'))
|
||||
|
||||
def sendReportMonthly(apikey, sYear, sMon):
|
||||
tpl_id = 4272748
|
||||
# 【甘肃大未来科技】#city#政务新媒体#year#年#month#月份监测报告电子版已发送到业务联系人,请关注。
|
||||
|
||||
|
||||
# 电话号码
|
||||
dictGS = {
|
||||
'szq': '13359446622',
|
||||
'zyb': '13609346975'
|
||||
}
|
||||
dictCities = {
|
||||
'天水市': {'王慧': '18706936366', '王肖肖': '17793816150'},
|
||||
'白银市': {'高雅丽': '15393391905', '张静静': '13830021006'},
|
||||
'定西市': {'张勇':'13993200605', '高刚': '18993265998'},#'党辉': '18893219695',
|
||||
'酒泉市': {'吴建平': '13389370534'}, #'白苍松白秘书长': '13909371177',
|
||||
'临夏州': {'周世泽': '13830103221', '马清明': '13993012391', '任琴霞': '13909300361'},
|
||||
'平凉市': {'雷勇': '13809330195', '万朵': '15193383961'},
|
||||
'庆阳市': {'孙德勋': '13909342931', '闫红': '18993490882'},
|
||||
#'华池县': {},#'李银粉': '13884192323'},
|
||||
#'宁县': {},#'张虎帅': '13993434900'},
|
||||
#'镇原县': {}, #{'刘主任': '13994327967'}, #, '苟罗文': '15268989815'},
|
||||
'嘉峪关市': {'彭松涛': '18893605128'},
|
||||
'武威市': {'马巨龙': '15379291530'},
|
||||
'兰州新区': {'高天晓副主任':'13993685885', '刘玉明科长':'17726983336', '闫鹏':'15117091122', },
|
||||
'陇南市': {'王军主任':'18093988558', '杨帅兵':'13830941310'},
|
||||
'张掖市': {'李伟璟副主任':'13909365376', '张炜':'18993628432'}
|
||||
}
|
||||
|
||||
# 批量发送
|
||||
# (逐市州发送)
|
||||
for dGSk in dictCities.keys():
|
||||
print('----', dGSk)
|
||||
dictCities[dGSk].update(dictGS) # 向该市号码列表中添加大未来的号码
|
||||
for dk in dictCities[dGSk].keys():
|
||||
tpl_value = {'#city#': dGSk, '#year#': sYear, '#month#': sMon}
|
||||
sss = tpl_send_sms(apikey, tpl_id, tpl_value, dictCities[dGSk][dk])
|
||||
print(' ', dk, dictCities[dGSk][dk], sss.decode('utf-8'))
|
||||
|
||||
def sendForewarning(apikey):
|
||||
tpl_id = 4058906
|
||||
# 【甘肃大未来科技】政务新媒体监测预警:#dateStart#至#dateEnd#,监测#city#政务新媒体账号#count#个,
|
||||
# 有#uq#个账号#problem#,具体名单发至相关工作人员,请予以关注。
|
||||
|
||||
|
||||
tpl_id1 = 4348890
|
||||
# 【甘肃大未来科技】 # dateStart#至#dateEnd#,监测#city#政务新媒体账号#amount#个,更新频次和发布内容正常。
|
||||
|
||||
|
||||
# 电话号码
|
||||
dDwlNamePhone = {
|
||||
'szq': '13359446622',
|
||||
'zyb': '13609346975'
|
||||
}
|
||||
dCityNamePhone = {
|
||||
'天水市': {'王慧': '18706936366', '王肖肖': '17793816150'},
|
||||
'白银市': {'高雅丽': '15393391905', '范小强': '13639306533', '张静静': '13830021006'},
|
||||
'定西市': {'张勇':'13993200605', '高刚': '18993265998'},
|
||||
'酒泉市': {'新领导': '13909371177', '吴建平': '13389370534'},
|
||||
'临夏州': {'周世泽': '13830103221', '马清明': '13993012391', '马静': '13993096392'},
|
||||
'平凉市': {'雷勇': '13809330195', '梁文芬': '13993366938'},
|
||||
'嘉峪关市': {'彭松涛':'18893605128'},
|
||||
'庆阳市': {'孙德勋': '13909342931', '闫红': '18993490882'},
|
||||
'华池县': {},#'李银粉': '13884192323'},
|
||||
'宁县': {'张虎帅': '13993434900'},
|
||||
'镇原县': {'刘主任': '13994327967', '苟罗文': '15268989815'},
|
||||
}
|
||||
|
||||
##############################################################################
|
||||
##############################################################################
|
||||
# 检测时间和结果
|
||||
dDate = {
|
||||
'dateStart': '6月24日',
|
||||
'dateEnd': '30日'
|
||||
}
|
||||
dResults = {
|
||||
'白银市': {'账号数量': '361', '预警原因': '无更新', '预警数量': '49'},
|
||||
'定西市': {'账号数量': '406', '预警原因': '无更新', '预警数量': '23'},
|
||||
'酒泉市': {'账号数量': '376', '预警原因': '无更新', '预警数量': '22'},
|
||||
'临夏州': {'账号数量': '295', '预警原因': '无更新', '预警数量': '13'},
|
||||
'平凉市': {'账号数量': '312', '预警原因': '无更新', '预警数量': '16'},
|
||||
'庆阳市': {'账号数量': '303', '预警原因': '无更新', '预警数量': '10'},
|
||||
'天水市': {'账号数量': '225', '预警原因': '无更新', '预警数量': '19'},
|
||||
# '嘉峪关市': {'账号数量': '97', '预警原因': '无更新', '预警数量': '7'},
|
||||
'华池县': {'账号数量': '38', '预警原因': '无更新', '预警数量': '2'},
|
||||
'宁县': {'账号数量': '36', '预警原因': '无更新', '预警数量': '1'},
|
||||
'镇原县': {'账号数量': '33', '预警原因': '', '预警数量': '0'},
|
||||
}
|
||||
##############################################################################
|
||||
##############################################################################
|
||||
|
||||
|
||||
# 批量发送
|
||||
# (逐市州发送)
|
||||
for sCity in dResults.keys():
|
||||
print('----', sCity)
|
||||
if sCity in dCityNamePhone:
|
||||
dCityNamePhone[sCity].update(dDwlNamePhone) # 向该市号码列表中添加大未来的号码
|
||||
for sName in dCityNamePhone[sCity].keys():
|
||||
uq = dResults[sCity]['预警数量']
|
||||
if int(uq) > 0:
|
||||
tpl_value = {'#dateStart#': dDate['dateStart'], '#dateEnd#': dDate['dateEnd'],
|
||||
'#city#': sCity, '#count#': dResults[sCity]['账号数量'],
|
||||
'#uq#': dResults[sCity]['预警数量'], '#problem#': dResults[sCity]['预警原因'], }
|
||||
##sss = tpl_send_sms(apikey, tpl_id, tpl_value, dCityNamePhone[sCity][sName])
|
||||
sss = '【甘肃大未来科技】政务新媒体监测预警:' + dDate['dateStart'] + '至' + dDate['dateEnd']\
|
||||
+ ',' + sCity + '被监测的' + dResults[sCity]['账号数量'] + '个政务新媒体账号中,有'\
|
||||
+ dResults[sCity]['预警数量'] + '个账号' + dResults[sCity]['预警原因'] + ',具体名单将发至相关工作人员,请予以关注提醒。'
|
||||
print(' ', sName, dCityNamePhone[sCity][sName], sss)#.decode('utf-8'))
|
||||
elif int(uq) == 0:
|
||||
tpl_value = {'#dateStart#': dDate['dateStart'], '#dateEnd#': dDate['dateEnd'],
|
||||
'#city#': sCity, '#amount#': dResults[sCity]['账号数量'], }
|
||||
#sss = tpl_send_sms(apikey, tpl_id1, tpl_value, dCityNamePhone[sCity][sName])
|
||||
# 【甘肃大未来科技】 # dateStart#至#dateEnd#,监测#city#政务新媒体账号#amount#个,更新频次和发布内容正常。
|
||||
sss = '【甘肃大未来科技】' + dDate['dateStart'] + '至'+ dDate['dateEnd'] + ',监测'\
|
||||
+ sCity + '政务新媒体账号' + dResults[sCity]['账号数量'] + '个,更新频次和发布内容正常。'
|
||||
print(' ', sName, dCityNamePhone[sCity][sName], sss)#.decode('utf-8'))
|
||||
|
||||
else:
|
||||
print('!!!!! ERROR !!!!!')
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
##########################
|
||||
# 短信平台地址和端口
|
||||
##########################
|
||||
# 服务地址
|
||||
sms_host = "sms.yunpian.com"
|
||||
voice_host = "voice.yunpian.com"
|
||||
# 端口号
|
||||
port = 443
|
||||
# 版本号
|
||||
version = "v2"
|
||||
# 查账户信息的URI
|
||||
user_get_uri = "/" + version + "/user/get.json"
|
||||
# 智能匹配模板短信接口的URI
|
||||
sms_send_uri = "/" + version + "/sms/single_send.json"
|
||||
# 模板短信接口的URI
|
||||
sms_tpl_send_uri = "/" + version + "/sms/tpl_single_send.json"
|
||||
# 语音短信接口的URI
|
||||
sms_voice_send_uri = "/" + version + "/voice/send.json"
|
||||
# 语音验证码
|
||||
voiceCode = 1234
|
||||
|
||||
##########################
|
||||
# 短信平台地址和端口
|
||||
##########################
|
||||
apikey = "304eb08353f7ebf00596737acfc31f53"
|
||||
|
||||
|
||||
# 向所有成员发送通知
|
||||
#sendMessage(apikey)
|
||||
|
||||
# 逐市州发送月报告
|
||||
sendReportMonthly(apikey, '2023', '1')
|
||||
|
||||
# 逐市州发送预警信息
|
||||
#sendForewarning(apikey)
|
||||
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,284 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import glob, os, re, time
|
||||
from datetime import datetime
|
||||
from docx import Document
|
||||
from docx.oxml.ns import qn
|
||||
from docx.shared import Pt,RGBColor
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||
import http.client
|
||||
from urllib import parse
|
||||
################
|
||||
################
|
||||
TEST = False # True为测试状态,不发短信; False为正式状态,发送短信。
|
||||
################
|
||||
################
|
||||
|
||||
dDate = {
|
||||
'dateStart': '3月23日',
|
||||
'dateEnd': '29日'
|
||||
}
|
||||
fn = 'D:/Projects/POM/DATA/2023年3月/3月31日预警/周预警_2023.3.29.xlsx'
|
||||
outPath = 'D:/Projects/POM/DATA/2023年3月/3月31日预警/'
|
||||
################
|
||||
|
||||
cities = {'白银市', '武威市',
|
||||
'庆阳市',
|
||||
'酒泉市',
|
||||
'天水市',
|
||||
'临夏回族自治州', '平凉市', '定西市', '定西市', '嘉峪关市',
|
||||
'兰州新区','陇南市', '张掖市', '庆阳市宁县', '庆阳市镇原县', } #
|
||||
|
||||
#cities = {'酒泉市'}
|
||||
|
||||
# 电话号码
|
||||
contactsDWL = {
|
||||
'szq': '13359446622',
|
||||
'zyb': '13609346975'
|
||||
}
|
||||
contacts = {
|
||||
'天水市': {'王慧': '18706936366', '王肖肖': '17793816150'},
|
||||
'白银市': {'高雅丽': '15393391905', '张静静': '13830021006'},
|
||||
'定西市': {'张勇':'13993200605', '高刚': '18993265998'},
|
||||
'酒泉市': {'吴建平': '13389370534'},
|
||||
'临夏回族自治州': {'周世泽': '13830103221', '马清明': '13993012391', '马静': '13993096392'},
|
||||
'平凉市': {'雷勇': '13809330195', '万朵': '15193383961'},
|
||||
'武威市': {'马巨龙': '15379291530'}, #'孙彪': '17793551918'},
|
||||
'嘉峪关市': {'彭松涛': '18893605128'},
|
||||
'庆阳市': {'孙德勋': '13909342931', '闫红': '18993490882'},
|
||||
#'庆阳市华池县': {'李保宁': '13739343092'},
|
||||
'庆阳市宁县': {'zyb': '13609346975'},
|
||||
'庆阳市镇原县': {'zyb': '13609346975'},#'刘主任': '13994327967', '王怡文': '18219942918'
|
||||
'兰州新区': {'高天晓副主任':'13993685885', '刘玉明科长':'17726983336', '闫鹏':'15117091122', },
|
||||
'陇南市': {'王军主任':'18093988558', '杨帅兵':'13830941310'},
|
||||
'张掖市': {'张炜':'18993628432', '李伟璟':'13909365376'}
|
||||
}
|
||||
|
||||
df = pd.read_excel(fn)
|
||||
df.replace('\s+', '', regex=True, inplace=True)
|
||||
df.loc[df['账号类型']=='微信服务号', '账号类型'] = '微信'
|
||||
df.loc[df['账号类型']=='微信订阅号', '账号类型'] = '微信'
|
||||
df['账号类型'] = df['账号类型'].str.replace(r'^其他\+','')
|
||||
|
||||
# SMS
|
||||
sms_host = "sms.yunpian.com"
|
||||
port = 443
|
||||
sms_tpl_send_uri = "/v2/sms/tpl_single_send.json"
|
||||
apikey = "304eb08353f7ebf00596737acfc31f53"
|
||||
def tpl_send_sms(sms_host, port, sms_tpl_send_uri, apikey, tpl_id, tpl_value, mobile):
|
||||
"""
|
||||
模板接口发短信
|
||||
"""
|
||||
params = parse.urlencode({
|
||||
'apikey': apikey,
|
||||
'tpl_id': tpl_id,
|
||||
'tpl_value': parse.urlencode(tpl_value),
|
||||
'mobile': mobile
|
||||
})
|
||||
headers = {
|
||||
"Content-type": "application/x-www-form-urlencoded",
|
||||
"Accept": "text/plain"
|
||||
}
|
||||
conn = http.client.HTTPSConnection(sms_host, port=port, timeout=30)
|
||||
conn.request("POST", sms_tpl_send_uri, params, headers)
|
||||
response = conn.getresponse()
|
||||
response_str = response.read()
|
||||
conn.close()
|
||||
return response_str
|
||||
aa = 0
|
||||
bb = 0
|
||||
for city in cities:
|
||||
print('~~~~~~~~~~~~~~~~~~~~~~~~')
|
||||
dfC = df.loc[df['市/省局']==city].copy()
|
||||
cc = dfC.shape[0]
|
||||
if city == '庆阳市':
|
||||
dfC = df.loc[(df['市/省局']==city)
|
||||
& (df['区县/地方部门']!='华池县')
|
||||
& (df['区县/地方部门']!='宁县')
|
||||
& (df['区县/地方部门']!='镇原县')
|
||||
].copy()
|
||||
if city == '庆阳市宁县':
|
||||
dfC = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '宁县')].copy()
|
||||
cc = dfC.shape[0]
|
||||
if city == '庆阳市华池县':
|
||||
dfC = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '华池县')].copy()
|
||||
cc = dfC.shape[0]
|
||||
if city == '庆阳市镇原县':
|
||||
dfC = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '镇原县')].copy()
|
||||
cc = dfC.shape[0]
|
||||
|
||||
|
||||
dfCU = dfC.loc[dfC['监测结果']!='合格'].copy()
|
||||
|
||||
warningText = '【甘肃大未来科技】政务新媒体监测预警:{}至{},监测{}政务新媒体账号{}个,更新频次和发布内容正常。'.format(dDate['dateStart'], dDate['dateEnd'], city, cc)
|
||||
warningLists = []
|
||||
if dfCU.shape[0] > 0:
|
||||
warningText = '【甘肃大未来科技】政务新媒体监测预警:{}至{},{}被监测的{}个政务新媒体账号中,有{}个账号无更新,具体名单附后,请予以关注提醒。'.format(dDate['dateStart'], dDate['dateEnd'], city, cc, dfCU.shape[0])
|
||||
print( warningText )
|
||||
if dfCU.shape[0] > 0:
|
||||
group = dfCU.groupby('账号类型')
|
||||
for type, dfa in group:
|
||||
astr = ''
|
||||
for index, row in dfa.iterrows():
|
||||
astr += row['账号名称'] + ', '
|
||||
s = '{}({}个): {}'.format(type, dfa.shape[0], astr[:-2])
|
||||
warningLists.append( s )
|
||||
print(s)
|
||||
print(' ')
|
||||
|
||||
# 生成总览文本
|
||||
|
||||
# 生成市州报告文本,存档
|
||||
if 1:
|
||||
|
||||
doc = Document()
|
||||
doc.styles['Normal'].font.name = u'宋体'
|
||||
doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
|
||||
doc.styles['Normal'].font.size = Pt(16)
|
||||
doc.styles['Normal'].font.color.rgb = RGBColor(0, 0, 0)
|
||||
p1 = doc.add_heading(city + '政务新媒体监测预警', 0)
|
||||
p1.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
p2 = doc.add_paragraph(warningText)
|
||||
p1.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
#p2.paragraph_format.left_indent = 406400
|
||||
p2.paragraph_format.first_line_indent = 406400
|
||||
for s in warningLists:
|
||||
doc.add_paragraph(s)
|
||||
|
||||
doc.save(outPath + city + '.docx')
|
||||
dfCU.shape[0]
|
||||
|
||||
# 发短信
|
||||
if 1:
|
||||
contacts[city].update(contactsDWL) # 向该市号码列表中添加大未来的号码
|
||||
log = ''
|
||||
sss = ''
|
||||
for contact in contacts[city].keys():
|
||||
log += contact + contacts[city][contact] + ', '
|
||||
if dfCU.shape[0] > 0:
|
||||
tpl_id = 4058906 # 【甘肃大未来科技】政务新媒体监测预警:#dateStart#至#dateEnd#,监测#city#政务新媒体账号#count#个,有#uq#个账号#problem#,未发现涉及敏感的错误内容。具体名单发至相关工作人员,请予以关注。
|
||||
tpl_value = {'#dateStart#': dDate['dateStart'], '#dateEnd#': dDate['dateEnd'], '#city#': city,
|
||||
'#count#': cc,
|
||||
'#uq#': dfCU.shape[0], '#problem#': '未更新', }
|
||||
sss = '【甘肃大未来科技】政务新媒体监测预警:' + dDate['dateStart'] + '至' + dDate[
|
||||
'dateEnd'] + ',' + city + '被监测的' + str(cc) + '个政务新媒体账号中,有' \
|
||||
+ str(dfCU.shape[0]) + '个账号' + '未更新' + ',未发现涉及敏感的错误内容。具体名单发至相关工作人员,请予以关注。'
|
||||
if not TEST:
|
||||
sss = tpl_send_sms(sms_host, port, sms_tpl_send_uri, apikey, tpl_id, tpl_value,
|
||||
contacts[city][contact]).decode('utf-8')
|
||||
else:
|
||||
tpl_id = 4348890 # 【甘肃大未来科技】#dateStart#至#dateEnd#,监测#city#政务新媒体账号#amount#个,更新频次和发布内容正常。
|
||||
tpl_value = {'#dateStart#': dDate['dateStart'], '#dateEnd#': dDate['dateEnd'], '#city#': city,
|
||||
'#amount#': cc}
|
||||
sss = '【甘肃大未来科技】政务新媒体监测预警:' + dDate['dateStart'] + '至' + dDate[
|
||||
'dateEnd'] + ',监测' + city + '政务新媒体账号' + str(cc) + '个,更新频次和发布内容正常。'
|
||||
if not TEST:
|
||||
sss = tpl_send_sms(sms_host, port, sms_tpl_send_uri, apikey, tpl_id, tpl_value,
|
||||
contacts[city][contact]).decode('utf-8')
|
||||
|
||||
# print(' sendSMS ', contact, contacts[city][contact], sss)
|
||||
print("sendSMS:", sss)
|
||||
print(" ", log[:-2])
|
||||
|
||||
#写出表格形式
|
||||
if 1:
|
||||
if city in ['陇南市']:
|
||||
dfCU.loc[dfCU['区县/地方部门']=='', '区县/地方部门'] = '市直单位'
|
||||
dfCU['区县/地方部门'] = dfCU['区县/地方部门'].fillna('市直单位')
|
||||
|
||||
# 按县区统计账号数量
|
||||
dfLN = dfCU.groupby('区县/地方部门').agg({"账号名称":"count"})
|
||||
# 按类型汇总账号名称
|
||||
dfLNR = dfCU.groupby(['区县/地方部门','账号类型'])['账号名称'].apply(lambda x:x.str.cat(sep=', ')).reset_index()
|
||||
# 统计各平台账号个数
|
||||
dfLNS = dfCU.groupby(['账号类型']).agg({"账号名称":"count"})
|
||||
|
||||
print('-=-=-=')
|
||||
print(dfLNS)
|
||||
print('-=-=-=')
|
||||
|
||||
# 构建DataFrame
|
||||
#types = list(dfCU['账号类型'].unique())
|
||||
types = ['微信', '新浪微博', '今日头条', '抖音短视频']
|
||||
l = ['区县', '未更新数'] + types
|
||||
dfw = pd.DataFrame([],columns=l)
|
||||
|
||||
# 暂存数据
|
||||
d = dict()
|
||||
|
||||
# 往暂存区写入县区名称和数量
|
||||
for i, r in dfLN.iterrows():
|
||||
d[i] = [r[0], '', '', '', '']
|
||||
#d[i] = [r[0],] + types
|
||||
|
||||
# 区县/地方部门 账号类型 账号名称
|
||||
#0 两当县 抖音短视频 陇南两当兴化乡
|
||||
#1 两当县 新浪微博 陇南两当工信和商务, 陇南市两当地震
|
||||
# 往暂存区分平台写入账号名称
|
||||
for i, r in dfLNR.iterrows():
|
||||
sCounty = r['区县/地方部门']
|
||||
sType = r['账号类型']
|
||||
sAccount = r['账号名称']
|
||||
print(' ', sCounty, sType, sAccount)
|
||||
d[sCounty][1 + types.index(sType)]=sAccount
|
||||
|
||||
print(d)
|
||||
print('--')
|
||||
# 从暂存区写入DataFrame
|
||||
for k in d:
|
||||
print(k, d[k][0], d[k][1], d[k][2], d[k][3], d[k][4])
|
||||
dfw.loc[len(dfw)] = {'区县':k,'未更新数':d[k][0],types[0]:d[k][1],types[1]:d[k][2],types[2]:d[k][3],types[3]:d[k][4]}
|
||||
# 调整输出表格列顺序
|
||||
#order = ['区县', '未更新数', '微信', '新浪微博', '今日头条', '抖音短视频', ]
|
||||
#dfw = dfw[order]
|
||||
|
||||
print('====')
|
||||
|
||||
# 增加 总计 行
|
||||
if types[0] in dfLNS.index:
|
||||
d0 = dfLNS.loc[types[0],'账号名称']
|
||||
else:
|
||||
d0 = 0
|
||||
|
||||
if types[1] in dfLNS.index:
|
||||
d1 = dfLNS.loc[types[1],'账号名称']
|
||||
else:
|
||||
d1 = 0
|
||||
|
||||
if types[2] in dfLNS.index:
|
||||
d2 = dfLNS.loc[types[2],'账号名称']
|
||||
else:
|
||||
d2 = 0
|
||||
|
||||
if types[3] in dfLNS.index:
|
||||
d3 = dfLNS.loc[types[3],'账号名称']
|
||||
else:
|
||||
d3 = 0
|
||||
|
||||
|
||||
dfw.loc[len(dfw)] = { '区县':'总 计', '未更新数':dfw['未更新数'].sum(),
|
||||
types[0]:d0, types[1]:d1,
|
||||
types[2]:d2, types[3]:d3 }
|
||||
|
||||
print('==-==', d0, d1, d2, d3)
|
||||
|
||||
# 写出, 添加标题
|
||||
sFn = outPath + city + '周预警' + datetime.now().strftime('_%Y.%m.%d') + '.xlsx'
|
||||
print('======== write to ', sFn)
|
||||
writer = pd.ExcelWriter(sFn)
|
||||
dfw.to_excel(writer, index=None, startrow=1) # , header=None
|
||||
ws = writer.sheets['Sheet1']
|
||||
title = '政务新媒体周预警未更新账号统计表({}-{})'.format(dDate['dateStart'], dDate['dateEnd'])
|
||||
ws.write_string(0, 0, title)
|
||||
writer.save()
|
||||
#if city in ['陇南市']
|
||||
|
||||
#if 1 # 输出excel
|
||||
|
||||
aa += dfCU.shape[0]
|
||||
bb += cc
|
||||
print('----{}----({}/{})'.format(city, dfCU.shape[0], cc))
|
||||
print('----{}----({}/{})'.format('ALL', aa, bb))
|
Loading…
Reference in New Issue