611 lines
25 KiB
Python
611 lines
25 KiB
Python
# 1. 打开监测任务表格
|
||
import pandas as pd
|
||
import numpy as np
|
||
import os, glob, re
|
||
import matplotlib.pyplot as plt
|
||
from matplotlib.ticker import FuncFormatter
|
||
import datetime
|
||
|
||
from docxtpl import DocxTemplate
|
||
from docxtpl import InlineImage
|
||
from docx.shared import Mm
|
||
|
||
def toDate(strDT):
|
||
dt = pd.to_datetime(strDT, errors='coerce')
|
||
dts = ''
|
||
# print('-+-+:', type(dt), dt)
|
||
if not pd.isna(dt):
|
||
dts = dt.strftime('%m-%d')
|
||
return dts
|
||
|
||
# word模板替换
|
||
def temp_word(tmep_path, word_apth, dContext, pathImage, city):
|
||
tpl = DocxTemplate(tmep_path)
|
||
dC = {'annulusMediaCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusMediaCount.png'), width=Mm(120)),
|
||
'annulusCountyCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyCount.png'),
|
||
width=Mm(120)),
|
||
'annulusCountyArticle': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyArticle.png'),
|
||
width=Mm(120)),
|
||
'annulusResult': InlineImage(tpl, os.path.join(pathImage, city + 'annulusResult.png'), width=Mm(120)),
|
||
'barCountyRatio': InlineImage(tpl, os.path.join(pathImage, city + 'barCountyRatio.png'), width=Mm(120))
|
||
}
|
||
|
||
dContext.update(dC)
|
||
tpl.render(dContext)
|
||
tpl.save(word_apth)
|
||
|
||
|
||
# 画柱状图
|
||
def drawBar(data, recipe, title='', fn=''):
|
||
plt.figure(figsize=(6, 4))
|
||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||
plt.rcParams['axes.unicode_minus'] = False
|
||
counties = recipe
|
||
countyRates = data
|
||
|
||
plt.bar(counties, countyRates, width=0.5)
|
||
plt.xticks(counties, counties, rotation=35)
|
||
plt.ylim((0, 1))
|
||
|
||
def to_percent(temp, position):
|
||
return '%2.0f' % (100 * temp) + '%'
|
||
|
||
plt.gca().yaxis.set_major_formatter(FuncFormatter(to_percent))
|
||
plt.title(title, fontsize=16)
|
||
plt.tight_layout()
|
||
plt.savefig(fn)
|
||
# plt.show()
|
||
plt.cla()
|
||
plt.clf()
|
||
plt.close()
|
||
|
||
|
||
# 画环状图
|
||
def drawAnnulus(data, recipe, title='', fn=''):
|
||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||
plt.rcParams['axes.unicode_minus'] = False
|
||
xxx = 8 # 画布x,长
|
||
yyy = 4 # 画布y,高
|
||
nnncol = 1 # 图例列数
|
||
fs = 'medium' ## xx--small;x-small;small;medium;large;x-large;xx-large
|
||
|
||
# if title == '政务新媒体账号类型':
|
||
if len(recipe) > 20:
|
||
if len(recipe) > 40:
|
||
xxx = 16
|
||
nnncol = 4
|
||
fs = 'x-small'
|
||
else:
|
||
xxx = 16
|
||
nnncol = 2
|
||
fs = 'x-small'
|
||
|
||
fig, ax = plt.subplots(figsize=(xxx, yyy), subplot_kw=dict(aspect="equal"))
|
||
|
||
"""
|
||
设置圆环宽度,绘图方向,起始角度
|
||
|
||
参数wedgeprops以字典形式传递,设置饼图边界的相关属性,例如圆环宽度0.5
|
||
饼状图默认从x轴正向沿逆时针绘图,参数startangle可指定新的角(例如负40度)度起画
|
||
"""
|
||
wedges, texts = ax.pie(data, radius=1.1, wedgeprops=dict(width=0.4), startangle=0) # 画环,返回扇形列表和每个标注文本对象(坐标,文字,属性)
|
||
|
||
if 1:
|
||
x = 1.2
|
||
if title == '政务新媒体监测结果':
|
||
x = 1.0
|
||
plt.legend(labels=recipe, loc="center left", bbox_to_anchor=(x, 0.5), borderaxespad=0., ncol=nnncol,
|
||
fontsize=fs) # , ncol=3
|
||
if len(title) > 0:
|
||
ax.set_title(title, fontsize=16, fontweight='heavy') # , x=0.6
|
||
|
||
plt.tight_layout()
|
||
if len(fn) > 0:
|
||
plt.savefig(fn)
|
||
# plt.show()
|
||
plt.cla()
|
||
plt.clf()
|
||
plt.close()
|
||
|
||
# summaryCity(city, dfc, dfcw, dfcs, context, strfnTemplate, os.path.join(strPathVerified,'Reports', city+'.docx'), strPathVerified )
|
||
|
||
|
||
# 汇总市州数据,
|
||
# 市州名称, 监测数据, cbz数据, mgc数据, context(编号、名称), word模板文件名称, 输出word文件名称, 临时文件目录
|
||
# 需要传入模板文件,数据、错别字、敏感词,单位名称等
|
||
def summaryCity(info, city, df, dfW, dfS, fnTemplate, fnReport, dirTemp):
|
||
dCityClient = {
|
||
'甘肃省': "甘肃省人民政府办公厅",
|
||
'省直部门': "甘肃省人民政府办公厅",
|
||
'白银市': "白银市人民政府办公室",
|
||
'定西市': "定西市人民政府办公室",
|
||
'临夏回族自治州': "临夏回族自治州人民政府办公室",
|
||
'平凉市': "中共平凉市委网络安全和信息化委员会办公室",
|
||
"庆阳市": "庆阳市电子政务与信息资源管理办公室",
|
||
'庆阳市华池县': "华池县人民政府办公室",
|
||
'庆阳市宁县': "宁县人民政府办公室",
|
||
"庆阳市镇原县": "镇原县人民政府办公室",
|
||
"酒泉市": "酒泉市人民政府办公室",
|
||
"天水市": "天水市人民政府办公室",
|
||
"武威市": "武威市人民政府办公室",
|
||
"金昌市": "金昌市人民政府办公室",
|
||
"嘉峪关市": "嘉峪关市人民政府办公室",
|
||
"兰州新区": "兰州新区管委会办公室",
|
||
"陇南市": "陇南市人民政府办公室",
|
||
"张掖市": "张掖市政务服务中心",
|
||
"甘南藏族自治州": "甘南藏族自治州政务服务中心",
|
||
"兰州市": "兰州市政务服务中心",
|
||
"陇南市": "陇南市政务服务中心",
|
||
}
|
||
print("----------------" + city + "----------------")
|
||
# 报告编号、委托单位
|
||
strID = "%02d" % (list(dCityClient).index(city))
|
||
# print(strID)
|
||
context = {
|
||
"city": city,
|
||
"client": dCityClient[city],
|
||
"reportid": strID + info['num'],
|
||
}
|
||
context.update(info)
|
||
|
||
subordinate = '区县/地方部门'
|
||
subordinateName = '县区'
|
||
# 区县数据筛选
|
||
if "庆阳市" in city:
|
||
if "华池县" in city:
|
||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||
& (df['区县/地方部门'] == '华池县')].copy()
|
||
|
||
elif "宁县" in city:
|
||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||
& (df['区县/地方部门'] == '宁县')].copy()
|
||
elif "镇原县" in city:
|
||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||
& (df['区县/地方部门'] == '镇原县')].copy()
|
||
else:
|
||
dfc = df.loc[(df['市/省局'] == '庆阳市')].copy()
|
||
# & (df['区县/地方部门']!='华池县')
|
||
# & (df['区县/地方部门']!='宁县')
|
||
# & (df['区县/地方部门']!='镇原县') ].copy()
|
||
dfcw = dfW.loc[dfW['市州'] == '庆阳市'].copy()
|
||
dfcs = dfS.loc[dfS['市州'] == '庆阳市'].copy()
|
||
elif "甘肃" in city :
|
||
dfc = df.copy()
|
||
dfcw = dfW.copy()
|
||
dfcs = dfS.copy()
|
||
subordinate = '市/省局'
|
||
subordinateName = '市州'
|
||
|
||
elif "省直部门" in city :
|
||
dfc = df.loc[df['市/省局'] == city].copy()
|
||
dfcw = dfW.loc[dfW['市州'] == dictSC[city]].copy()
|
||
dfcs = dfS.loc[dfS['市州'] == dictSC[city]].copy()
|
||
|
||
else:
|
||
dfc = df.loc[(df['市/省局'] == city)].copy()
|
||
dfcw = dfW.loc[dfW['市州'] == city].copy()
|
||
dfcs = dfS.loc[dfS['市州'] == city].copy()
|
||
|
||
# -----------------------
|
||
# 统计结果分析
|
||
|
||
dCity = {'1': '2'}
|
||
#
|
||
# 县区-监测结果 统计
|
||
#
|
||
|
||
# 透视表, 按县区统计各个监测结果账号数量
|
||
dfCountyAccount = pd.pivot_table(dfc, index=[subordinate], columns=['监测结果'], values=['账号名称'], aggfunc='count',
|
||
fill_value='', margins=True)
|
||
dfCountyAccount.columns = dfCountyAccount.columns.droplevel(0)
|
||
# 准备模板中的表格
|
||
tt3_list = []
|
||
for index, row in dfCountyAccount.iterrows():
|
||
county = ''
|
||
if index == 'All':
|
||
county = '总 计'
|
||
else:
|
||
county = index
|
||
hg = ''
|
||
u2w = ''
|
||
un = ''
|
||
count = ''
|
||
if '合格' in dfCountyAccount.columns.values.tolist():
|
||
if not isinstance(row['合格'], str):
|
||
hg = int(row['合格'])
|
||
if '监测期间未更新' in dfCountyAccount.columns.values.tolist():
|
||
if not isinstance(row['监测期间未更新'], str):
|
||
un = int(row['监测期间未更新'])
|
||
if '超过两周未更新' in dfCountyAccount.columns.values.tolist():
|
||
if not isinstance(row['超过两周未更新'], str):
|
||
u2w = int(row['超过两周未更新'])
|
||
if 'All' in dfCountyAccount.columns.values.tolist():
|
||
if not isinstance(row['All'], str):
|
||
count = int(row['All'])
|
||
|
||
tt3_a = {'county': county, 'hg': hg, 'u2w': u2w, 'un': un, 'count': count}
|
||
tt3_list.append(tt3_a)
|
||
context['tt3_contents'] = tt3_list
|
||
# dfCountyAccount.to_excel(dirTask+strPathCity+'县区监测结果.xlsx')
|
||
|
||
# -----------------------
|
||
#
|
||
# 按媒体类型统计
|
||
#
|
||
# 透视表, 按账号类型统计账号数量
|
||
dfMedia = pd.pivot_table(dfc, index=['账号类型'], values=['账号名称'], aggfunc='count', fill_value='', margins=True)
|
||
# 提取该市账号数量
|
||
dCity['nmCount'] = dfMedia.loc['All', '账号名称']
|
||
print(' 监测账号数:', dCity['nmCount'])
|
||
# 提取 账号类型-数量 , 拼成文本串
|
||
dfMedia = dfMedia.sort_values(by='账号名称', ascending=False)
|
||
lTableCs1 = []
|
||
strMedia = ''
|
||
i = 0
|
||
tt1_list = []
|
||
for m in dfMedia.index.tolist()[1:]: # 第一个是总数,不用取
|
||
strNum = str(dfMedia.iloc[:, 0].tolist()[1:][i])
|
||
strMedia = strMedia + m + strNum + '个,'
|
||
tt1_a = {'type': m, 'count': strNum}
|
||
tt1_list.append(tt1_a)
|
||
i = i + 1
|
||
dCity['sMediaCount'] = strMedia[:-1].rstrip(',')
|
||
context.update({'tt1_contents': tt1_list})
|
||
|
||
# -----------------------
|
||
#
|
||
# 按县区-更新次数 统计
|
||
#
|
||
dfCountyArticle = pd.pivot_table(dfc, index=[subordinate], values=['更新次数'], aggfunc='sum', fill_value='',
|
||
margins=True)
|
||
dfCountyArticle = dfCountyArticle.sort_values(by='更新次数', ascending=False).copy()
|
||
dCity['cityArticleCount'] = "%d" % dfCountyArticle.iloc[0, 0]
|
||
dCity['countyMostArticle'] = dfCountyArticle.index.tolist()[1]
|
||
dCity['countyMostArticleCount'] = "%d" % dfCountyArticle.iloc[1, 0]
|
||
strCountyArticle = ''
|
||
iiii = 1
|
||
for cccc in dfCountyArticle.index.tolist()[1:]:
|
||
strCountyArticle = strCountyArticle + cccc + "%d" % dfCountyArticle.iloc[iiii, 0] + "次,"
|
||
iiii = iiii + 1
|
||
dCity['sCountyArticles'] = strCountyArticle.rstrip(',')
|
||
|
||
# 市各县区监测结果按总数排序,
|
||
dfCountyAccount.loc[:, '合格'] = dfCountyAccount['合格'].astype('int')
|
||
dfCountyAccount = dfCountyAccount.sort_values(by='All', ascending=False).copy()
|
||
# 计算合格率
|
||
dfCountyAccount.eval('rate = 合格 / All ', inplace=True)
|
||
dfResult = dfCountyAccount.copy()
|
||
# 提取city合格率
|
||
dCity['cityRatio'] = "{:.1%}".format(dfCountyAccount.loc['All', 'rate'])
|
||
print(' 合格率:', dCity['cityRatio'])
|
||
|
||
# 导出文件
|
||
# dfCountyAccount.to_excel(dirIntermediate+sFileBase+'县区合格率.xlsx')
|
||
|
||
# dfMedia = dfMedia.drop(['All'])
|
||
# 提取县区名称,县区账号数, 县区合格率,转成字符串
|
||
dfCountyAccount = dfCountyAccount.drop(['All']) # 删除"All"行
|
||
counties = dfCountyAccount.index.tolist()
|
||
countyCounts = dfCountyAccount['All'].values.tolist()
|
||
countyHeges = dfCountyAccount['合格'].values.tolist()
|
||
print(countyCounts)
|
||
print(counties)
|
||
|
||
# 按县区账号数量排序
|
||
strCountyCount = ''
|
||
strCounties = ''
|
||
i = 0
|
||
for c in counties:
|
||
strCounties = strCounties + c + ','
|
||
strCountyCount = strCountyCount + c + str(countyCounts[i]) + '个,'
|
||
i = i + 1
|
||
dCity['countyCount'] = "%d" % i
|
||
dCity['sCounties'] = strCounties.rstrip(',')
|
||
dCity['sCountyCount'] = strCountyCount.rstrip(',')
|
||
|
||
# 按合格率排序
|
||
dfCountyAccount = dfCountyAccount.sort_values(by='rate', ascending=False)
|
||
countieshege = dfCountyAccount.index.tolist()
|
||
countyRates = dfCountyAccount['rate']
|
||
strCountyRatio = ''
|
||
i = 0
|
||
tt2_list = []
|
||
for c in countieshege:
|
||
strRatio = "%.1f" % (100.0 * countyRates[i])
|
||
strCountyRatio = strCountyRatio + c + strRatio + '%,'
|
||
tt2_a = {'county': c, 'ratio': strRatio + '%'}
|
||
tt2_list.append(tt2_a)
|
||
i = i + 1
|
||
dCity['sCountyRatio'] = strCountyRatio.rstrip(',')
|
||
dCity['tt2_contents'] = tt2_list
|
||
|
||
# -----------------------
|
||
#
|
||
# 绘图
|
||
#
|
||
print(' 生成图片...')
|
||
drawAnnulus(dfMedia.iloc[:, 0].tolist()[1:], dfMedia.index.tolist()[1:],
|
||
'政务新媒体账号类型', os.path.join(dirTemp, city + 'annulusMediaCount.png'))
|
||
print(countyCounts)
|
||
print(counties)
|
||
drawAnnulus(countyCounts, counties,
|
||
subordinateName + '政务新媒体账号数量', os.path.join(dirTemp, city + 'annulusCountyCount.png'))
|
||
|
||
drawAnnulus(dfCountyArticle.iloc[:, 0].tolist()[1:], dfCountyArticle.index.tolist()[1:],
|
||
subordinateName + '政务新媒体累计更新次数', os.path.join(dirTemp, city + 'annulusCountyArticle.png'))
|
||
|
||
# ;{{resultNoUpdated}}个政务新媒体监测期间未更新,占监测总数的{{resultNoUpdatedRatio}}
|
||
# ;{{resultNoUpdated2W}}个政务新媒体连续未更新时间超过两周,占监测总数的{{resultNoUpdated2WRatio}}
|
||
# 政务新媒体监测结果
|
||
dfResult = dfResult.drop('All', axis=1)
|
||
dfResult = dfResult.drop('rate', axis=1)
|
||
# 合格数,合格率,不合格数
|
||
dCity['resultQualified'] = "%d" % (dfResult.loc['All', '合格'])
|
||
dCity['resultQualifiedRatio'] = "%.1f%%" % (dfResult.loc['All', '合格'] / dCity['nmCount'] * 100.0)
|
||
dCity['resultUnqualified'] = "%d" % (dCity['nmCount'] - dfResult.loc['All', '合格'])
|
||
#
|
||
# numNoupdated = 0
|
||
if '监测期间未更新' in dfResult.columns.values.tolist():
|
||
numNoupdated = dfResult.loc['All', '监测期间未更新']
|
||
dCity['stringResultNoUpdated'] = ";%d个政务新媒体监测期间未更新,占监测总数的%.1f%%" % (
|
||
numNoupdated, numNoupdated / dCity['nmCount'] * 100.0)
|
||
dCity['stringNoUpdated'] = "%d个政务新媒体监测期间未更新。" % (numNoupdated)
|
||
else:
|
||
dCity['stringResultNoUpdated'] = ''
|
||
dCity['stringNoUpdated'] = ""
|
||
# dCity['resultNoUpdated'] = "%d"%(numNoupdated)
|
||
# dCity['resultNoUpdatedRatio'] = "%.1f%%"%(numNoupdated/dCity['nmCount']*100.0)
|
||
# numNoupdated2W = 0
|
||
if '超过两周未更新' in dfResult.columns.values.tolist():
|
||
numNoupdated2W = dfResult.loc['All', '超过两周未更新']
|
||
dCity['stringResultNoUpdated2W'] = ";%d个政务新媒体连续未更新时间超过两周,占监测总数的%.1f%%" % (
|
||
numNoupdated2W, numNoupdated2W / dCity['nmCount'] * 100.0)
|
||
dCity['stringNoUpdated2W'] = "%d个政务新媒体连续未更新时间超过两周。" % (numNoupdated2W)
|
||
else:
|
||
dCity['stringResultNoUpdated2W'] = ''
|
||
dCity['stringNoUpdated2W'] = ''
|
||
# dCity['resultNoUpdated2W'] = "%d"%(numNoupdated2W)
|
||
# dCity['resultNoUpdated2WRatio'] = "%.1f%%"%(numNoupdated2W/dCity['nmCount']*100.0)
|
||
resultLabels = dfResult.columns.values.tolist()
|
||
resultCounts = dfResult.loc['All'].values.tolist()
|
||
drawAnnulus(resultCounts, resultLabels,
|
||
'政务新媒体监测结果', os.path.join(dirTemp, city + 'annulusResult.png'))
|
||
|
||
drawBar(countyRates, countieshege,
|
||
'政务新媒体管理矩阵发布时效性合格率榜单', os.path.join(dirTemp, city + 'barCountyRatio.png'))
|
||
|
||
# -----------------------
|
||
#
|
||
# 准备报告需要的数据
|
||
#
|
||
print(' 生成报告...')
|
||
|
||
dfCityUnqulified = dfc[dfc['监测结果'] != '合格']
|
||
dfCityUnqulified = dfCityUnqulified.sort_values(by="监测结果", ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||
|
||
#################################################
|
||
|
||
dfCityQulified = dfc[dfc['监测结果'] == '合格']
|
||
dfCityQulified = dfCityQulified.sort_values(by=subordinate, ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||
|
||
# 方法一
|
||
|
||
tt4_list = []
|
||
for index, row in dfCityUnqulified.iterrows():
|
||
count = ''
|
||
if row['更新次数']:
|
||
count = "%d" % row['更新次数']
|
||
days = ''
|
||
if row['最大静默日数']:
|
||
days = "%d" % row['最大静默日数']
|
||
sD1 = ''
|
||
sD2 = ''
|
||
if row['静默开始日期']:
|
||
sD1 = toDate(str(row['静默开始日期']))
|
||
if row['静默结束日期']:
|
||
sD2 = toDate(str(row['静默结束日期']))
|
||
|
||
tt4_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||
'days': days, 'start': sD1, 'end': sD2, }
|
||
tt4_list.append(tt4_a)
|
||
tt4_results = {'tt4_contents': tt4_list}
|
||
context.update(tt4_results)
|
||
|
||
tt5_list = []
|
||
for index, row in dfCityQulified.iterrows():
|
||
count = ''
|
||
if row['更新次数']:
|
||
count = "%d" % row['更新次数']
|
||
days = ''
|
||
if row['最大静默日数']:
|
||
days = "%d" % row['最大静默日数']
|
||
sD1 = ''
|
||
sD2 = ''
|
||
if row['静默开始日期']:
|
||
sD1 = toDate(str(row['静默开始日期']))
|
||
if row['静默结束日期']:
|
||
sD2 = toDate(str(row['静默结束日期']))
|
||
|
||
tt5_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||
'days': days, 'start': sD1, 'end': sD2, }
|
||
tt5_list.append(tt5_a)
|
||
tt5_results = {'tt5_contents': tt5_list}
|
||
context.update(tt5_results)
|
||
|
||
# 读取添加错别字表格
|
||
tCbz_list = []
|
||
dfcw.fillna('')
|
||
for index, row in dfcw.iterrows():
|
||
sTitle = ''
|
||
sDate = toDate(str(row['发文时间']))
|
||
if '标题' in dfcw.columns:
|
||
sTitle = row['标题']
|
||
|
||
# 去除引号等干扰表格模板输出的字符
|
||
r = "[——,$%^,。?、~@#¥%……&*《》<>「」{}【】()/\\\[\]'\"]"
|
||
if pd.isna(row['错误出现位置']):
|
||
s = ''
|
||
else:
|
||
s = re.sub(r, '', row['错误出现位置'])
|
||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': s, 'type': row['账号类型'], 'name': row['账号名称'],
|
||
'date': sDate, 'title': sTitle, }
|
||
tCbz_list.append(a)
|
||
if dfcw.shape[0] > 0:
|
||
dCity['stringCbzCount'] = '本次检测发现错别字%d处,详细情况见附表政务新媒体发布内容错别字统计表。' % (dfcw.shape[0])
|
||
else:
|
||
dCity['stringCbzCount'] = '本次检测未发现错别字。'
|
||
tCbz_results = {'tCbz_contents': tCbz_list}
|
||
context.update(tCbz_results)
|
||
|
||
# 读取添加敏感词表格
|
||
tMgc_list = []
|
||
dfcs.fillna('')
|
||
for index, row in dfcs.iterrows():
|
||
sTitle = ''
|
||
sDate = toDate(str(row['发文时间']))
|
||
if '标题' in dfcs.columns:
|
||
sTitle = row['标题']
|
||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': row['错误出现位置'], 'type': row['账号类型'], 'name': row['账号名称'],
|
||
'date': sDate, 'title': sTitle, }
|
||
tMgc_list.append(a)
|
||
if dfcs.shape[0] > 0:
|
||
dCity['stringMgcCount'] = '本次检测发现敏感信息%d处,详细情况见附表政务新媒体发布内容敏感信息统计表。' % (dfcs.shape[0])
|
||
else:
|
||
dCity['stringMgcCount'] = '本次检测未发现涉敏内容。'
|
||
tMgc_results = {'tMgc_contents': tMgc_list}
|
||
context.update(tMgc_results)
|
||
|
||
# table1
|
||
context.update(dCity)
|
||
|
||
# -----------------------
|
||
#
|
||
# 按模板生成报告
|
||
#
|
||
temp_word(fnTemplate,
|
||
fnReport,
|
||
context, dirTemp, city)
|
||
|
||
def createDir(dirP, dirS):
|
||
dirN = dirP
|
||
if os.path.isdir(dirP):
|
||
dirN = os.path.join(dirP, dirS)
|
||
if not (os.path.exists(dirN)):
|
||
os.mkdir(dirN)
|
||
if os.path.isdir(dirN):
|
||
pass
|
||
else:
|
||
dirN = dirP
|
||
print('Directory ' + dirN + ' cannot be created.')
|
||
return dirN
|
||
# def createDir(dirP, dirS):
|
||
|
||
def summary(info, strFnData, strFnW, strFnS, strfnTemplate, strPathOutput):
|
||
# 打开监测数据、错别字、敏感词
|
||
df = pd.read_excel(strFnData)
|
||
dfW = pd.read_excel(strFnW)
|
||
dfS = pd.read_excel(strFnS)
|
||
|
||
# df.loc[df['账号类型'] == '微信服务号', '账号类型'] = '微信'
|
||
# df.loc[df['账号类型'] == '微信订阅号', '账号类型'] = '微信'
|
||
|
||
# 统一监测结果表述
|
||
df.loc[df['监测结果'] == '连续两周未更新', '监测结果'] = '超过两周未更新'
|
||
|
||
# 过长名称替换为简称,便于绘图
|
||
df.loc[df['区县/地方部门'] == '积石山保安族东乡族撒拉族自治县', '区县/地方部门'] = '积石山县'
|
||
df.loc[df['区县/地方部门'] == '阿克塞哈萨克族自治县', '区县/地方部门'] = '阿克塞自治县'
|
||
|
||
# 省直、 市直、 州直
|
||
df['市/省局'] = df['市/省局'].fillna('省直部门')
|
||
df['区县/地方部门'] = df['区县/地方部门'].fillna('市直部门')
|
||
df.loc[(df['市/省局'] == '临夏回族自治州') & (df['区县/地方部门'] == '市直部门'), '区县/地方部门'] = '州直部门'
|
||
|
||
|
||
# 数据整理
|
||
df.replace(r'\s+', '', regex=True, inplace=True) # 去除账号、单位名称中的空格、换行、tab等
|
||
df.replace(r'^其他\+', '', regex=True, inplace=True) # 去除账号类型中的 "其它" 字样
|
||
df['更新次数'] = df['更新次数'].fillna(0)
|
||
df = df.fillna(value='')
|
||
|
||
|
||
#########################################################
|
||
#
|
||
# 统计市州范围
|
||
cities = {'甘肃省', '白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
|
||
'嘉峪关市', '庆阳市华池县', '庆阳市镇原县', '庆阳市宁县'}
|
||
#cities = cities | {'甘南藏族自治州', '金昌市', '兰州市', '陇南市', '张掖市', '甘肃省', '省直部门'}
|
||
#
|
||
cities = {'甘肃省','庆阳市','武威市','临夏回族自治州', '酒泉市'} # 只统计特定市州
|
||
|
||
cities = {'张掖市'}
|
||
|
||
# strPathOutput目录下生成报告目录和临时文件目录:Reports 和 Intermediate
|
||
dirP = os.path.abspath(os.path.dirname(strPathOutput))
|
||
dirReports = createDir(dirP, 'Reports')
|
||
dirIntermediate = createDir(dirP, 'Intermediate')
|
||
for city in cities:
|
||
summaryCity(info, city, df, dfW, dfS, strfnTemplate, os.path.join(dirReports, city + '.docx'), dirIntermediate)
|
||
|
||
# 合并错别字文件
|
||
def mergeCMC(keyword, strPathCBZ, strFnCbz):
|
||
# cityShorten
|
||
cityShorten = {'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
|
||
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
|
||
'兰州新区': '兰州新区', '兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
|
||
'BY': '白银市', 'DX': '定西市', 'JQ': '酒泉市', 'JYG': '嘉峪关市', 'LN': '陇南市',
|
||
'LX': '临夏回族自治州', 'PL': '平凉市', 'QY': '庆阳市', 'TS': '天水市', 'WW': '武威市', 'XQ': '兰州新区',
|
||
'LZXQ': '兰州新区', 'LZ': '兰州市', 'ZY': '张掖市', 'GN': '甘南藏族自治州', 'SZ': '省直部门', 'JC': '金昌市', }
|
||
df = pd.DataFrame()
|
||
for fn in glob.glob(os.path.join(strPathCBZ, '*'+keyword+'*.xlsx')):
|
||
p, f = os.path.split(fn)
|
||
city=''
|
||
for c in cityShorten.keys():
|
||
if c in f:
|
||
city = cityShorten[c]
|
||
break
|
||
if len(city)<1:
|
||
print("!!!!! City Name not matched ( ", f, " )")
|
||
dfn = pd.read_excel(fn)
|
||
dfn['市州'] = city
|
||
df = df.append(dfn, ignore_index=True)
|
||
print(city, f, dfn.shape[0], '/', df.shape[0])
|
||
df.to_excel(strFnCbz)
|
||
#def mergeCMC
|
||
|
||
if __name__ == "__main__":
|
||
|
||
# 运行之前先转换excel文件的日期列
|
||
|
||
info = {
|
||
"year": "2023",
|
||
"quarter": "一",
|
||
"dateCN": "二〇二三年四月",
|
||
"dateStart": "2023年1月1日",
|
||
"dateEnd": "2023年3月20日",
|
||
"days": "79",
|
||
"num": "4",
|
||
}
|
||
# 数据根目录,
|
||
strPath = 'D:/Projects/POM/DATA/2023年S1/'
|
||
# 监测数据
|
||
strFnMonitoring = strPath + '汇总/第一季度汇总数据_2023.3.xlsx'
|
||
# word模板文件
|
||
strPathTemplate = strPath + 'POM_ReportTemplate0.docx'
|
||
# 错别字
|
||
strFnCbz = strPath + '汇总/CBZ.xlsx'
|
||
if not os.path.exists(strFnCbz):# 汇总错别字
|
||
strPathCBZ = strPath + '监测/'
|
||
mergeCMC("错别", strPathCBZ, strFnCbz)
|
||
# 敏感词
|
||
strFnMgc = strPath + '汇总/MGC.xlsx'
|
||
if not os.path.exists(strFnMgc):#汇总敏感词
|
||
strPathMGC = strPath + '监测/'
|
||
mergeCMC("敏感", strPathMGC, strFnMgc)
|
||
# 数据目录
|
||
strPathOutput = strPath + '统计/'
|
||
|
||
summary(info, strFnMonitoring, strFnCbz, strFnMgc, strPathTemplate, strPathOutput)
|