删除旧月份和季度脚本
This commit is contained in:
parent
95575b137a
commit
2c3cc5207e
|
@ -1,768 +0,0 @@
|
|||
# 1. 打开监测任务表格
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os, glob, re
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import FuncFormatter
|
||||
import datetime
|
||||
#word toc
|
||||
import win32com
|
||||
import win32com.client as win32
|
||||
from win32com.client import constants
|
||||
#pdf
|
||||
from pikepdf import Pdf,Page,Rectangle
|
||||
#word
|
||||
from docxtpl import DocxTemplate
|
||||
from docxtpl import InlineImage
|
||||
from docx.shared import Mm
|
||||
|
||||
def addStamp(target_pdf_path, watermark_pdf_path, output_pdf_path, sy=140):
|
||||
#选择需要添加水印的pdf文件
|
||||
target_pdf = Pdf.open(target_pdf_path)
|
||||
#读取水印pdf文件并提取水印
|
||||
watermark_pdf = Pdf.open(watermark_pdf_path)
|
||||
watermark_page_seal = watermark_pdf.pages[0]
|
||||
watermark_page_wyt = watermark_pdf.pages[1]
|
||||
|
||||
#加公章
|
||||
x=240; y=sy; w=115; h=115
|
||||
target_pdf.pages[0].add_overlay(watermark_page_seal, Rectangle(x,y, x+w, y+h))
|
||||
|
||||
#加签字
|
||||
x=163; y=573; w=85; h=50
|
||||
target_pdf.pages[2].add_overlay(watermark_page_wyt, Rectangle(x,y, x+w, y+h))
|
||||
|
||||
#target_pdf.save(target_pdf_path[:6] + '_已签章.pdf')
|
||||
target_pdf.save(output_pdf_path)
|
||||
|
||||
|
||||
def update_toc(docx_file): # word路径
|
||||
word = win32com.client.DispatchEx("Word.Application")
|
||||
word.Visible = 0 # 设置应用可见
|
||||
word.DisplayAlerts = 0
|
||||
doc = word.Documents.Open(docx_file) # 使用微软office打开word
|
||||
toc_count = doc.TablesOfContents.Count # 判断是否有无目录,如果数量是1则代表已经有目录了
|
||||
if toc_count == 0:
|
||||
print("无目录")
|
||||
'''
|
||||
for i, p in enumerate(doc.Paragraphs): # 遍历word中的内容
|
||||
if '目录' in p.Range.Text: # 用于指定目录页面,看下面提示
|
||||
p.Range.InsertParagraphAfter() # 添加新的段落
|
||||
p.Range.InsertAfter("---")
|
||||
parag_range = doc.Paragraphs(i+2).Range
|
||||
doc.TablesOfContents.Add(Range=parag_range,
|
||||
UseHeadingStyles=True,
|
||||
LowerHeadingLevel=2) # 生成目录对象
|
||||
'''
|
||||
elif toc_count == 1:
|
||||
toc = doc.TablesOfContents(1)
|
||||
#toc.Update() # 更新整个目录
|
||||
toc.UpdatePageNumbers() # 更新目录页码
|
||||
|
||||
doc.SaveAs(docx_file.replace('.docx', '.pdf'), FileFormat=17)
|
||||
doc.Close(SaveChanges=True)
|
||||
word.Quit()
|
||||
|
||||
def toDate(strDT):
|
||||
dt = pd.to_datetime(strDT, errors='coerce')
|
||||
dts = ''
|
||||
# print('-+-+:', type(dt), dt)
|
||||
if not pd.isna(dt):
|
||||
dts = dt.strftime('%m-%d')
|
||||
return dts
|
||||
|
||||
# word模板替换
|
||||
def temp_word(tmep_path, word_apth, dContext, pathImage, city):
|
||||
tpl = DocxTemplate(tmep_path)
|
||||
dC = {'annulusMediaCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusMediaCount.png'), width=Mm(120)),
|
||||
'annulusCountyCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyCount.png'),
|
||||
width=Mm(120)),
|
||||
'annulusCountyArticle': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyArticle.png'),
|
||||
width=Mm(120)),
|
||||
'annulusResult': InlineImage(tpl, os.path.join(pathImage, city + 'annulusResult.png'), width=Mm(120)),
|
||||
'barCountyRatio': InlineImage(tpl, os.path.join(pathImage, city + 'barCountyRatio.png'), width=Mm(120))
|
||||
}
|
||||
|
||||
dContext.update(dC)
|
||||
tpl.render(dContext)
|
||||
tpl.save(word_apth)
|
||||
|
||||
|
||||
# 画柱状图
|
||||
def drawBar(data, recipe, title='', fn=''):
|
||||
plt.figure(figsize=(6, 4))
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
counties = recipe
|
||||
countyRates = data
|
||||
|
||||
plt.bar(counties, countyRates, width=0.5)
|
||||
plt.xticks(counties, counties, rotation=35)
|
||||
plt.ylim((0, 1))
|
||||
|
||||
def to_percent(temp, position):
|
||||
return '%2.0f' % (100 * temp) + '%'
|
||||
|
||||
plt.gca().yaxis.set_major_formatter(FuncFormatter(to_percent))
|
||||
plt.title(title, fontsize=16)
|
||||
plt.tight_layout()
|
||||
plt.savefig(fn)
|
||||
# plt.show()
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
|
||||
# 画环状图
|
||||
def drawAnnulus(data, recipe, title='', fn=''):
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
xxx = 8 # 画布x,长
|
||||
yyy = 4 # 画布y,高
|
||||
nnncol = 1 # 图例列数
|
||||
fs = 'medium' ## xx--small;x-small;small;medium;large;x-large;xx-large
|
||||
|
||||
# if title == '政务新媒体账号类型':
|
||||
if len(recipe) > 20:
|
||||
if len(recipe) > 40:
|
||||
xxx = 16
|
||||
nnncol = 4
|
||||
fs = 'small'
|
||||
else:
|
||||
xxx = 16
|
||||
nnncol = 2
|
||||
fs = 'small'
|
||||
|
||||
fig, ax = plt.subplots(figsize=(xxx, yyy), subplot_kw=dict(aspect="equal"))
|
||||
|
||||
"""
|
||||
设置圆环宽度,绘图方向,起始角度
|
||||
|
||||
参数wedgeprops以字典形式传递,设置饼图边界的相关属性,例如圆环宽度0.5
|
||||
饼状图默认从x轴正向沿逆时针绘图,参数startangle可指定新的角(例如负40度)度起画
|
||||
"""
|
||||
wedges, texts = ax.pie(data, radius=1.1, wedgeprops=dict(width=0.4), startangle=0) # 画环,返回扇形列表和每个标注文本对象(坐标,文字,属性)
|
||||
|
||||
if 1:
|
||||
x = 1.2
|
||||
if title == '政务新媒体监测结果':
|
||||
x = 1.0
|
||||
plt.legend(labels=recipe, loc="center left", bbox_to_anchor=(x, 0.5), borderaxespad=0., ncol=nnncol,
|
||||
fontsize=fs) # , ncol=3
|
||||
if len(title) > 0:
|
||||
ax.set_title(title, fontsize=16, fontweight='heavy') # , x=0.6
|
||||
|
||||
plt.tight_layout()
|
||||
if len(fn) > 0:
|
||||
plt.savefig(fn)
|
||||
# plt.show()
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
# summaryCity(city, dfc, dfcw, dfcs, context, strfnTemplate, os.path.join(strPathVerified,'Reports', city+'.docx'), strPathVerified )
|
||||
|
||||
|
||||
# 汇总市州数据,
|
||||
# 市州名称, 监测数据, cbz数据, mgc数据, context(编号、名称), word模板文件名称, 输出word文件名称, 临时文件目录
|
||||
# 需要传入模板文件,数据、错别字、敏感词,单位名称等
|
||||
def summaryCity(info, city, df, dfW, dfS, fnTemplate, fnReport, dirTemp):
|
||||
dCityClient = {
|
||||
'甘肃省': "甘肃省人民政府办公厅",
|
||||
'省直部门': "甘肃省人民政府办公厅",
|
||||
'白银市': "白银市人民政府办公室",
|
||||
'定西市': "定西市人民政府办公室",
|
||||
'临夏回族自治州': "临夏回族自治州人民政府办公室",
|
||||
'平凉市': "中共平凉市委网络安全和信息化委员会办公室",
|
||||
"庆阳市": "庆阳市电子政务与信息资源管理办公室",
|
||||
'庆阳市华池县': "华池县人民政府办公室",
|
||||
'庆阳市宁县': "宁县人民政府办公室",
|
||||
"庆阳市镇原县": "镇原县人民政府办公室",
|
||||
"酒泉市": "酒泉市人民政府办公室",
|
||||
"天水市": "天水市人民政府办公室",
|
||||
"武威市": "武威市人民政府办公室",
|
||||
"金昌市": "金昌市人民政府办公室",
|
||||
"嘉峪关市": "嘉峪关市人民政府办公室",
|
||||
"兰州新区": "兰州新区管委会办公室",
|
||||
"陇南市": "陇南市政务服务中心",
|
||||
"张掖市": "张掖市政务服务中心",
|
||||
"甘南藏族自治州": "甘南藏族自治州政务服务中心",
|
||||
"兰州市": "兰州市政务服务中心",
|
||||
"陇南市": "陇南市政务服务中心",
|
||||
}
|
||||
dHavingSubordinateUnits = {'甘肃省': True, '白银市': True, '定西市': True,
|
||||
'临夏回族自治州': True, '平凉市': True, "庆阳市": True, "酒泉市": True, "天水市": True,
|
||||
"陇南市": True, "张掖市": True, "甘南藏族自治州": True, "兰州市": True, "陇南市": True,
|
||||
"武威市": True, "金昌市": True,
|
||||
'省直部门': False, "兰州新区": False, '庆阳市华池县': False,
|
||||
'庆阳市宁县': False, "庆阳市镇原县": False, "嘉峪关市": False}
|
||||
print("----------------" + city + "----------------")
|
||||
# 报告编号、委托单位
|
||||
strID = "%02d" % (list(dCityClient).index(city))
|
||||
# print(strID)
|
||||
context = {
|
||||
"city": city,
|
||||
"client": dCityClient[city],
|
||||
"reportid": strID + info['serialNum'],
|
||||
'havingSubordinateUnits': dHavingSubordinateUnits[city],
|
||||
'havingBelowStandard': True,
|
||||
'havingUpStandard': True,
|
||||
'havingCbz': True,
|
||||
'havingMgc': True
|
||||
}
|
||||
context.update(info)
|
||||
|
||||
subordinate = '区县/地方部门'
|
||||
subordinateName = '县区'
|
||||
# 区县数据筛选
|
||||
if "庆阳市" in city:
|
||||
if "华池县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '华池县')].copy()
|
||||
|
||||
elif "宁县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '宁县')].copy()
|
||||
elif "镇原县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '镇原县')].copy()
|
||||
else:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')].copy()
|
||||
# & (df['区县/地方部门']!='华池县')
|
||||
# & (df['区县/地方部门']!='宁县')
|
||||
# & (df['区县/地方部门']!='镇原县') ].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == '庆阳市'].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == '庆阳市'].copy()
|
||||
elif "甘肃" in city :
|
||||
#dfc = df.copy()
|
||||
#dfcw = dfW.copy()
|
||||
#dfcs = dfS.copy()
|
||||
|
||||
cities = {'白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
|
||||
'嘉峪关市', '陇南市', '张掖市'}
|
||||
|
||||
dfc = df.loc[ df['市/省局'].isin(cities) ].copy()
|
||||
dfcw = dfW.loc[ dfW['市州'].isin(cities) ].copy()
|
||||
dfcs = dfS.loc[ dfS['市州'].isin(cities) ].copy()
|
||||
subordinate = '市/省局'
|
||||
subordinateName = '市州'
|
||||
|
||||
elif "省直部门" in city :
|
||||
dfc = df.loc[df['市/省局'] == city].copy()
|
||||
#dfcw = dfW.loc[dfW['市州'] == dictSC[city]].copy()
|
||||
#dfcs = dfS.loc[dfS['市州'] == dictSC[city]].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == city].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == city].copy()
|
||||
|
||||
else:
|
||||
dfc = df.loc[(df['市/省局'] == city)].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == city].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == city].copy()
|
||||
|
||||
# -----------------------
|
||||
# 统计结果分析
|
||||
|
||||
dCity = {'1': '2'}
|
||||
#
|
||||
# 县区-监测结果 统计
|
||||
#
|
||||
|
||||
# 透视表, 按县区统计各个监测结果账号数量
|
||||
dfCountyAccount = pd.pivot_table(dfc, index=[subordinate], columns=['监测结果'], values=['账号名称'], aggfunc='count',
|
||||
fill_value='', margins=True)
|
||||
dfCountyAccount.columns = dfCountyAccount.columns.droplevel(0)
|
||||
# 准备模板中的表格
|
||||
tt3_list = []
|
||||
for index, row in dfCountyAccount.iterrows():
|
||||
county = ''
|
||||
if index == 'All':
|
||||
county = '总 计'
|
||||
else:
|
||||
county = index
|
||||
hg = ''
|
||||
u2w = ''
|
||||
un = ''
|
||||
count = ''
|
||||
if '合格' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['合格'], str):
|
||||
hg = int(row['合格'])
|
||||
if '监测期间未更新' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['监测期间未更新'], str):
|
||||
un = int(row['监测期间未更新'])
|
||||
if '超过两周未更新' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['超过两周未更新'], str):
|
||||
u2w = int(row['超过两周未更新'])
|
||||
if 'All' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['All'], str):
|
||||
count = int(row['All'])
|
||||
|
||||
tt3_a = {'county': county, 'hg': hg, 'u2w': u2w, 'un': un, 'count': count}
|
||||
tt3_list.append(tt3_a)
|
||||
context['tt3_contents'] = tt3_list
|
||||
# dfCountyAccount.to_excel(dirTask+strPathCity+'县区监测结果.xlsx')
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按媒体类型统计
|
||||
#
|
||||
# 透视表, 按账号类型统计账号数量
|
||||
dfMedia = pd.pivot_table(dfc, index=['账号类型'], values=['账号名称'], aggfunc='count', fill_value='', margins=True)
|
||||
# 提取该市账号数量
|
||||
dCity['nmCount'] = dfMedia.loc['All', '账号名称']
|
||||
print(' 监测账号数:', dCity['nmCount'])
|
||||
# 提取 账号类型-数量 , 拼成文本串
|
||||
dfMedia = dfMedia.sort_values(by='账号名称', ascending=False)
|
||||
lTableCs1 = []
|
||||
strMedia = ''
|
||||
i = 0
|
||||
tt1_list = []
|
||||
for m in dfMedia.index.tolist()[1:]: # 第一个是总数,不用取
|
||||
strNum = str(dfMedia.iloc[:, 0].tolist()[1:][i])
|
||||
strMedia = strMedia + m + strNum + '个,'
|
||||
tt1_a = {'type': m, 'count': strNum}
|
||||
tt1_list.append(tt1_a)
|
||||
i = i + 1
|
||||
dCity['sMediaCount'] = strMedia[:-1].rstrip(',')
|
||||
context.update({'tt1_contents': tt1_list})
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按县区-更新次数 统计
|
||||
#
|
||||
dfCountyArticle = pd.pivot_table(dfc, index=[subordinate], values=['更新次数'], aggfunc='sum', fill_value='',
|
||||
margins=True)
|
||||
dfCountyArticle = dfCountyArticle.sort_values(by='更新次数', ascending=False).copy()
|
||||
dCity['cityArticleCount'] = "%d" % dfCountyArticle.iloc[0, 0]
|
||||
dCity['countyMostArticle'] = dfCountyArticle.index.tolist()[1]
|
||||
dCity['countyMostArticleCount'] = "%d" % dfCountyArticle.iloc[1, 0]
|
||||
strCountyArticle = ''
|
||||
iiii = 0
|
||||
|
||||
if len(dfCountyArticle.index)>2:
|
||||
for cccc in dfCountyArticle.index.tolist()[1:]:
|
||||
iiii = iiii + 1
|
||||
strCountyArticle = strCountyArticle + cccc + "%d" % dfCountyArticle.iloc[iiii, 0] + "次,"
|
||||
strCountyArticle = strCountyArticle.rstrip(',')
|
||||
|
||||
dCity['sCountyArticles'] = ',按管理矩阵统计,' + strCountyArticle
|
||||
|
||||
|
||||
# 市各县区监测结果按总数排序,
|
||||
dfCountyAccount.loc[:, '合格'] = dfCountyAccount['合格'].astype('int')
|
||||
dfCountyAccount = dfCountyAccount.sort_values(by='All', ascending=False).copy()
|
||||
# 计算合格率
|
||||
dfCountyAccount.eval('rate = 合格 / All ', inplace=True)
|
||||
dfResult = dfCountyAccount.copy()
|
||||
# 提取city合格率
|
||||
dCity['cityRatio'] = "{:.1%}".format(dfCountyAccount.loc['All', 'rate'])
|
||||
print(' 合格率:', dCity['cityRatio'])
|
||||
|
||||
# 导出文件
|
||||
# dfCountyAccount.to_excel(dirIntermediate+sFileBase+'县区合格率.xlsx')
|
||||
|
||||
# dfMedia = dfMedia.drop(['All'])
|
||||
# 提取县区名称,县区账号数, 县区合格率,转成字符串
|
||||
dfCountyAccount = dfCountyAccount.drop(['All']) # 删除"All"行
|
||||
counties = dfCountyAccount.index.tolist()
|
||||
countyCounts = dfCountyAccount['All'].values.tolist()
|
||||
countyHeges = dfCountyAccount['合格'].values.tolist()
|
||||
|
||||
# 按县区账号数量排序
|
||||
strCountyCount = ''
|
||||
strCounties = ''
|
||||
i = 0
|
||||
for c in counties:
|
||||
strCounties = strCounties + c + ','
|
||||
strCountyCount = strCountyCount + c + str(countyCounts[i]) + '个,'
|
||||
i = i + 1
|
||||
dCity['countyCount'] = "%d" % i
|
||||
dCity['sCounties'] = strCounties.rstrip(',')
|
||||
dCity['sCountyCount'] = strCountyCount.rstrip(',')
|
||||
|
||||
# 按合格率排序
|
||||
dfCountyAccount = dfCountyAccount.sort_values(by='rate', ascending=False)
|
||||
countieshege = dfCountyAccount.index.tolist()
|
||||
countyRates = dfCountyAccount['rate']
|
||||
strCountyRatio = ''
|
||||
i = 0
|
||||
tt2_list = []
|
||||
for c in countieshege:
|
||||
strRatio = "%.1f" % (100.0 * countyRates[i])
|
||||
strCountyRatio = strCountyRatio + c + strRatio + '%,'
|
||||
tt2_a = {'county': c, 'ratio': strRatio + '%'}
|
||||
tt2_list.append(tt2_a)
|
||||
i = i + 1
|
||||
dCity['sCountyRatio'] = strCountyRatio.rstrip(',')
|
||||
dCity['tt2_contents'] = tt2_list
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 绘图
|
||||
#
|
||||
print(' 生成图片...')
|
||||
drawAnnulus(dfMedia.iloc[:, 0].tolist()[1:], dfMedia.index.tolist()[1:],
|
||||
'政务新媒体账号类型', os.path.join(dirTemp, city + 'annulusMediaCount.png'))
|
||||
|
||||
drawAnnulus(countyCounts, counties,
|
||||
subordinateName + '政务新媒体账号数量', os.path.join(dirTemp, city + 'annulusCountyCount.png'))
|
||||
|
||||
drawAnnulus(dfCountyArticle.iloc[:, 0].tolist()[1:], dfCountyArticle.index.tolist()[1:],
|
||||
subordinateName + '政务新媒体累计更新次数', os.path.join(dirTemp, city + 'annulusCountyArticle.png'))
|
||||
|
||||
# ;{{resultNoUpdated}}个政务新媒体监测期间未更新,占监测总数的{{resultNoUpdatedRatio}}
|
||||
# ;{{resultNoUpdated2W}}个政务新媒体连续未更新时间超过两周,占监测总数的{{resultNoUpdated2WRatio}}
|
||||
# 政务新媒体监测结果
|
||||
dfResult = dfResult.drop('All', axis=1)
|
||||
dfResult = dfResult.drop('rate', axis=1)
|
||||
# 合格数,合格率,不合格数
|
||||
dCity['resultQualified'] = "%d" % (dfResult.loc['All', '合格'])
|
||||
dCity['resultQualifiedRatio'] = "%.1f%%" % (dfResult.loc['All', '合格'] / dCity['nmCount'] * 100.0)
|
||||
dCity['resultUnqualified'] = "%d" % (dCity['nmCount'] - dfResult.loc['All', '合格'])
|
||||
#
|
||||
# numNoupdated = 0
|
||||
if '监测期间未更新' in dfResult.columns.values.tolist():
|
||||
numNoupdated = dfResult.loc['All', '监测期间未更新']
|
||||
dCity['stringResultNoUpdated'] = ";%d个政务新媒体监测期间未更新,占监测总数的%.1f%%" % (
|
||||
numNoupdated, numNoupdated / dCity['nmCount'] * 100.0)
|
||||
dCity['stringNoUpdated'] = "%d个政务新媒体监测期间未更新。" % (numNoupdated)
|
||||
else:
|
||||
dCity['stringResultNoUpdated'] = ''
|
||||
dCity['stringNoUpdated'] = ""
|
||||
# dCity['resultNoUpdated'] = "%d"%(numNoupdated)
|
||||
# dCity['resultNoUpdatedRatio'] = "%.1f%%"%(numNoupdated/dCity['nmCount']*100.0)
|
||||
# numNoupdated2W = 0
|
||||
if '超过两周未更新' in dfResult.columns.values.tolist():
|
||||
numNoupdated2W = dfResult.loc['All', '超过两周未更新']
|
||||
dCity['stringResultNoUpdated2W'] = ";%d个政务新媒体连续未更新时间超过两周,占监测总数的%.1f%%" % (
|
||||
numNoupdated2W, numNoupdated2W / dCity['nmCount'] * 100.0)
|
||||
dCity['stringNoUpdated2W'] = "%d个政务新媒体连续未更新时间超过两周。" % (numNoupdated2W)
|
||||
else:
|
||||
dCity['stringResultNoUpdated2W'] = ''
|
||||
dCity['stringNoUpdated2W'] = ''
|
||||
# dCity['resultNoUpdated2W'] = "%d"%(numNoupdated2W)
|
||||
# dCity['resultNoUpdated2WRatio'] = "%.1f%%"%(numNoupdated2W/dCity['nmCount']*100.0)
|
||||
resultLabels = dfResult.columns.values.tolist()
|
||||
resultCounts = dfResult.loc['All'].values.tolist()
|
||||
drawAnnulus(resultCounts, resultLabels,
|
||||
'政务新媒体监测结果', os.path.join(dirTemp, city + 'annulusResult.png'))
|
||||
|
||||
drawBar(countyRates, countieshege,
|
||||
'政务新媒体管理矩阵发布时效性合格率榜单', os.path.join(dirTemp, city + 'barCountyRatio.png'))
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 准备报告需要的数据
|
||||
#
|
||||
print(' 生成报告...')
|
||||
|
||||
dfCityUnqulified = dfc[dfc['监测结果'] != '合格']
|
||||
dfCityUnqulified = dfCityUnqulified.sort_values(by="监测结果", ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||||
|
||||
#################################################
|
||||
|
||||
dfCityQulified = dfc[dfc['监测结果'] == '合格']
|
||||
dfCityQulified = dfCityQulified.sort_values(by=subordinate, ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||||
|
||||
#
|
||||
# 不合格账号列表
|
||||
if len(dfCityUnqulified)<1:
|
||||
context.update({'havingBelowStandard':False})
|
||||
else:
|
||||
tt4_list = []
|
||||
for index, row in dfCityUnqulified.iterrows():
|
||||
count = ''
|
||||
if row['更新次数']:
|
||||
count = "%d" % row['更新次数']
|
||||
days = ''
|
||||
if row['最大静默日数']:
|
||||
days = "%d" % row['最大静默日数']
|
||||
sD1 = ''
|
||||
sD2 = ''
|
||||
if row['静默开始日期']:
|
||||
sD1 = toDate(str(row['静默开始日期']))
|
||||
if row['静默结束日期']:
|
||||
sD2 = toDate(str(row['静默结束日期']))
|
||||
|
||||
tt4_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||||
'days': days, 'start': sD1, 'end': sD2, }
|
||||
tt4_list.append(tt4_a)
|
||||
tt4_results = {'tt4_contents': tt4_list}
|
||||
context.update(tt4_results)
|
||||
|
||||
#
|
||||
# 合格账号列表
|
||||
if len(dfCityQulified)<1:
|
||||
context.update({'havingUpStandard':False})
|
||||
else:
|
||||
tt5_list = []
|
||||
for index, row in dfCityQulified.iterrows():
|
||||
count = ''
|
||||
if row['更新次数']:
|
||||
count = "%d" % row['更新次数']
|
||||
days = ''
|
||||
if row['最大静默日数']:
|
||||
days = "%d" % row['最大静默日数']
|
||||
sD1 = ''
|
||||
sD2 = ''
|
||||
if row['静默开始日期']:
|
||||
sD1 = toDate(str(row['静默开始日期']))
|
||||
if row['静默结束日期']:
|
||||
sD2 = toDate(str(row['静默结束日期']))
|
||||
|
||||
tt5_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||||
'days': days, 'start': sD1, 'end': sD2, }
|
||||
tt5_list.append(tt5_a)
|
||||
tt5_results = {'tt5_contents': tt5_list}
|
||||
context.update(tt5_results)
|
||||
|
||||
#
|
||||
# 错别字表格
|
||||
|
||||
if dfcw.shape[0]<1:
|
||||
context.update({'havingCbz':False})
|
||||
else:
|
||||
tCbz_list = []
|
||||
dfcw.fillna('')
|
||||
for index, row in dfcw.iterrows():
|
||||
sTitle = ''
|
||||
sDate = toDate(str(row['发文时间']))
|
||||
if '标题' in dfcw.columns:
|
||||
sTitle = row['标题']
|
||||
|
||||
# 去除引号等干扰表格模板输出的字符
|
||||
r = "[——,$%^,。?、~@#¥%……&*《》<>「」{}【】()/\\\[\]'\"]"
|
||||
if pd.isna(row['错误出现位置']):
|
||||
s = ''
|
||||
else:
|
||||
s = re.sub(r, '', row['错误出现位置'])
|
||||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': s, 'type': row['账号类型'], 'name': row['账号名称'],
|
||||
'date': sDate, 'title': sTitle, }
|
||||
tCbz_list.append(a)
|
||||
if dfcw.shape[0] > 0:
|
||||
dCity['stringCbzCount'] = '本次检测发现错别字%d处,详细情况见附表政务新媒体发布内容错别字统计表。' % (dfcw.shape[0])
|
||||
else:
|
||||
dCity['stringCbzCount'] = '本次检测未发现错别字。'
|
||||
tCbz_results = {'tCbz_contents': tCbz_list}
|
||||
context.update(tCbz_results)
|
||||
|
||||
# 读取添加敏感词表格
|
||||
if dfcs.shape[0]<1:
|
||||
context.update({'havingMgc':False})
|
||||
else:
|
||||
tMgc_list = []
|
||||
dfcs.fillna('')
|
||||
for index, row in dfcs.iterrows():
|
||||
sTitle = ''
|
||||
sDate = toDate(str(row['发文时间']))
|
||||
if '标题' in dfcs.columns:
|
||||
sTitle = row['标题']
|
||||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': row['错误出现位置'], 'type': row['账号类型'], 'name': row['账号名称'],
|
||||
'date': sDate, 'title': sTitle, }
|
||||
tMgc_list.append(a)
|
||||
if dfcs.shape[0] > 0:
|
||||
dCity['stringMgcCount'] = '本次检测发现敏感信息%d处,详细情况见附表政务新媒体发布内容敏感信息统计表。' % (dfcs.shape[0])
|
||||
else:
|
||||
dCity['stringMgcCount'] = '本次检测未发现涉敏内容。'
|
||||
tMgc_results = {'tMgc_contents': tMgc_list}
|
||||
context.update(tMgc_results)
|
||||
|
||||
# table1
|
||||
context.update(dCity)
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按模板生成报告
|
||||
#
|
||||
temp_word(fnTemplate,
|
||||
fnReport,
|
||||
context, dirTemp, city)
|
||||
|
||||
#更新目录并另存为pdf
|
||||
print(' 更新目录,转换为PDF...')
|
||||
update_toc( fnReport )
|
||||
|
||||
#签章
|
||||
print(' 签章...')
|
||||
if city in {'庆阳市', '平凉市', '临夏回族自治州'}:
|
||||
addStamp(fnReport.replace('.docx', '.pdf'),
|
||||
'D:/Projects/POM/DEV/SCRIPTS/stamps_dwl.pdf' ,
|
||||
fnReport.replace('.docx', '_Stamp.pdf'), 115)
|
||||
else:
|
||||
addStamp(fnReport.replace('.docx', '.pdf'),
|
||||
'D:/Projects/POM/DEV/SCRIPTS/stamps_dwl.pdf' ,
|
||||
fnReport.replace('.docx', '_Stamp.pdf'))
|
||||
|
||||
|
||||
|
||||
def createDir(dirP, dirS):
|
||||
dirN = dirP
|
||||
if os.path.isdir(dirP):
|
||||
dirN = os.path.join(dirP, dirS)
|
||||
if not (os.path.exists(dirN)):
|
||||
os.mkdir(dirN)
|
||||
if os.path.isdir(dirN):
|
||||
pass
|
||||
else:
|
||||
dirN = dirP
|
||||
print('Directory ' + dirN + ' cannot be created.')
|
||||
return dirN
|
||||
# def createDir(dirP, dirS):
|
||||
|
||||
def summary(info, strFnData, strFnW, strFnS, strfnTemplate, strPathOutput):
|
||||
# 打开监测数据、错别字、敏感词
|
||||
df = pd.read_excel(strFnData)
|
||||
dfW = pd.read_excel(strFnW)
|
||||
dfS = pd.read_excel(strFnS)
|
||||
|
||||
# df.loc[df['账号类型'] == '微信服务号', '账号类型'] = '微信'
|
||||
# df.loc[df['账号类型'] == '微信订阅号', '账号类型'] = '微信'
|
||||
|
||||
# 统一监测结果表述
|
||||
df.loc[df['监测结果'] == '连续两周未更新', '监测结果'] = '超过两周未更新'
|
||||
|
||||
# 过长名称替换为简称,便于绘图
|
||||
df.loc[df['区县/地方部门'] == '积石山保安族东乡族撒拉族自治县', '区县/地方部门'] = '积石山县'
|
||||
df.loc[df['区县/地方部门'] == '阿克塞哈萨克族自治县', '区县/地方部门'] = '阿克塞自治县'
|
||||
|
||||
# 省直、 市直、 州直
|
||||
df['市/省局'] = df['市/省局'].fillna('省直部门')
|
||||
df['区县/地方部门'] = df['区县/地方部门'].fillna('市直部门')
|
||||
df.loc[(df['市/省局'] == '临夏回族自治州') & (df['区县/地方部门'] == '市直部门'), '区县/地方部门'] = '州直部门'
|
||||
|
||||
|
||||
# 数据整理
|
||||
df.replace(r'\s+', '', regex=True, inplace=True) # 去除账号、单位名称中的空格、换行、tab等
|
||||
df.replace(r'^其他\+', '', regex=True, inplace=True) # 去除账号类型中的 "其它" 字样
|
||||
df['更新次数'] = df['更新次数'].fillna(0)
|
||||
df = df.fillna(value='')
|
||||
|
||||
|
||||
#########################################################
|
||||
#
|
||||
# 统计市州范围
|
||||
cities = {'白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
|
||||
'嘉峪关市', '庆阳市华池县', '庆阳市镇原县', '庆阳市宁县', '陇南市', '张掖市', '甘肃省'}
|
||||
#cities = cities | {'甘肃省'}#, '省直部门'}
|
||||
#cities = cities | {'陇南市'}#, '兰州市'}, '省直部门'}
|
||||
#cities = cities | {'甘南藏族自治州', '金昌市', '兰州市', '张掖市', '甘肃省', '省直部门'}
|
||||
#
|
||||
cities = {'庆阳市', '庆阳市宁县', '甘肃省'} # 只统计特定市州
|
||||
|
||||
# strPathOutput目录下生成报告目录和临时文件目录:Reports 和 Intermediate
|
||||
dirP = os.path.abspath(os.path.dirname(strPathOutput))
|
||||
dirReports = createDir(dirP, 'Reports')
|
||||
dirIntermediate = createDir(dirP, 'Intermediate')
|
||||
for city in cities:
|
||||
summaryCity(info, city, df, dfW, dfS, strfnTemplate, os.path.join(dirReports, city + '.docx'), dirIntermediate)
|
||||
|
||||
# 合并错别字文件
|
||||
def mergeCMC(keyword, strPathCBZ, strFnCbz):
|
||||
# cityShorten
|
||||
cityShorten = {'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
|
||||
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
|
||||
'兰州新区': '兰州新区', '兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
|
||||
'BY': '白银市', 'DX': '定西市', 'JQ': '酒泉市', 'JYG': '嘉峪关市', 'LN': '陇南市',
|
||||
'LX': '临夏回族自治州', 'PL': '平凉市', 'QY': '庆阳市', 'TS': '天水市', 'WW': '武威市', 'XQ': '兰州新区',
|
||||
'LZXQ': '兰州新区', 'LZ': '兰州市', 'ZY': '张掖市', 'GN': '甘南藏族自治州', 'SZ': '省直部门', 'JC': '金昌市', }
|
||||
df = pd.DataFrame()
|
||||
for fn in glob.glob(os.path.join(strPathCBZ, '*'+keyword+'*.xlsx')):
|
||||
p, f = os.path.split(fn)
|
||||
city=''
|
||||
for c in cityShorten.keys():
|
||||
if c in f:
|
||||
city = cityShorten[c]
|
||||
break
|
||||
if len(city)<1:
|
||||
print("!!!!! City Name not matched ( ", f, " )")
|
||||
dfn = pd.read_excel(fn)
|
||||
dfn['市州'] = city
|
||||
df = df.append(dfn, ignore_index=True)
|
||||
print(city, f, dfn.shape[0], '/', df.shape[0])
|
||||
df.to_excel(strFnCbz)
|
||||
#def mergeCMC
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# 运行之前先转换excel文件的日期列
|
||||
|
||||
info = {
|
||||
"year": "2023",
|
||||
"month": "3",
|
||||
"datePub": "二〇二三年四月",
|
||||
"dateStart": "2023年3月1日",
|
||||
"dateEnd": "2023年3月31日",
|
||||
"days": "31",
|
||||
"serialNum": "4",
|
||||
}
|
||||
# 数据根目录,
|
||||
strPath = 'D:/Projects/POM/DATA/2023年4月/3月报告/'
|
||||
createDir(strPath, '全文')
|
||||
createDir(strPath, '转发')
|
||||
createDir(strPath, '报告')
|
||||
createDir(strPath, '汇总')
|
||||
createDir(strPath, '监测')
|
||||
# 监测数据
|
||||
strFnMonitoring = strPath + '汇总/3月汇总数据_2023.3.xlsx'
|
||||
# word模板文件
|
||||
strPathTemplate = strPath + 'POM_ReportTemplate.docx'
|
||||
# 错别字
|
||||
strFnCbz = strPath + '汇总/CBZ.xlsx'
|
||||
if not os.path.exists(strFnCbz):# 汇总错别字
|
||||
strPathCBZ = strPath + '监测/'
|
||||
mergeCMC("错别", strPathCBZ, strFnCbz)
|
||||
# 敏感词
|
||||
strFnMgc = strPath + '汇总/MGC.xlsx'
|
||||
if not os.path.exists(strFnMgc):#汇总敏感词
|
||||
strPathMGC = strPath + '监测/'
|
||||
mergeCMC("敏感", strPathMGC, strFnMgc)
|
||||
# 数据目录
|
||||
strPathOutput = strPath
|
||||
|
||||
|
||||
# 打开监测数据、错别字、敏感词
|
||||
df = pd.read_excel(strFnMonitoring)
|
||||
dfW = pd.read_excel(strFnCbz)
|
||||
dfS = pd.read_excel(strFnMgc)
|
||||
|
||||
# df.loc[df['账号类型'] == '微信服务号', '账号类型'] = '微信'
|
||||
# df.loc[df['账号类型'] == '微信订阅号', '账号类型'] = '微信'
|
||||
|
||||
# 统一监测结果表述
|
||||
df.loc[df['监测结果'] == '连续两周未更新', '监测结果'] = '超过两周未更新'
|
||||
|
||||
# 过长名称替换为简称,便于绘图
|
||||
df.loc[df['区县/地方部门'] == '积石山保安族东乡族撒拉族自治县', '区县/地方部门'] = '积石山县'
|
||||
df.loc[df['区县/地方部门'] == '阿克塞哈萨克族自治县', '区县/地方部门'] = '阿克塞自治县'
|
||||
|
||||
# 省直、 市直、 州直
|
||||
df['市/省局'] = df['市/省局'].fillna('省直部门')
|
||||
df['区县/地方部门'] = df['区县/地方部门'].fillna('市直部门')
|
||||
df.loc[(df['市/省局'] == '临夏回族自治州') & (df['区县/地方部门'] == '市直部门'), '区县/地方部门'] = '州直部门'
|
||||
|
||||
|
||||
# 数据整理
|
||||
df.replace(r'\s+', '', regex=True, inplace=True) # 去除账号、单位名称中的空格、换行、tab等
|
||||
df.replace(r'^其他\+', '', regex=True, inplace=True) # 去除账号类型中的 "其它" 字样
|
||||
df['更新次数'] = df['更新次数'].fillna(0)
|
||||
df = df.fillna(value='')
|
||||
|
||||
|
||||
#########################################################
|
||||
#
|
||||
# 统计市州范围
|
||||
cities = {'白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
|
||||
'嘉峪关市', '庆阳市华池县', '庆阳市镇原县', '庆阳市宁县', '陇南市', '张掖市', '甘肃省'}
|
||||
#cities = cities | {'甘肃省'}#, '省直部门'}
|
||||
#cities = cities | {'陇南市'}#, '兰州市'}, '省直部门'}
|
||||
#cities = cities | {'甘南藏族自治州', '金昌市', '兰州市', '张掖市', '甘肃省', '省直部门'}
|
||||
#
|
||||
#cities = {'定西市'} # 只统计特定市州
|
||||
|
||||
# strPathOutput目录下生成报告目录和临时文件目录:Reports 和 Intermediate
|
||||
dirP = os.path.abspath(os.path.dirname(strPathOutput))
|
||||
dirReports = createDir(dirP, 'Reports')
|
||||
dirIntermediate = createDir(dirP, 'Intermediate')
|
||||
for city in cities:
|
||||
summaryCity(info, city, df, dfW, dfS, strPathTemplate, os.path.join(dirReports, city + '.docx'), dirIntermediate)
|
610
StatSeasonly3.py
610
StatSeasonly3.py
|
@ -1,610 +0,0 @@
|
|||
# 1. 打开监测任务表格
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os, glob, re
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import FuncFormatter
|
||||
import datetime
|
||||
|
||||
from docxtpl import DocxTemplate
|
||||
from docxtpl import InlineImage
|
||||
from docx.shared import Mm
|
||||
|
||||
def toDate(strDT):
|
||||
dt = pd.to_datetime(strDT, errors='coerce')
|
||||
dts = ''
|
||||
# print('-+-+:', type(dt), dt)
|
||||
if not pd.isna(dt):
|
||||
dts = dt.strftime('%m-%d')
|
||||
return dts
|
||||
|
||||
# word模板替换
|
||||
def temp_word(tmep_path, word_apth, dContext, pathImage, city):
|
||||
tpl = DocxTemplate(tmep_path)
|
||||
dC = {'annulusMediaCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusMediaCount.png'), width=Mm(120)),
|
||||
'annulusCountyCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyCount.png'),
|
||||
width=Mm(120)),
|
||||
'annulusCountyArticle': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyArticle.png'),
|
||||
width=Mm(120)),
|
||||
'annulusResult': InlineImage(tpl, os.path.join(pathImage, city + 'annulusResult.png'), width=Mm(120)),
|
||||
'barCountyRatio': InlineImage(tpl, os.path.join(pathImage, city + 'barCountyRatio.png'), width=Mm(120))
|
||||
}
|
||||
|
||||
dContext.update(dC)
|
||||
tpl.render(dContext)
|
||||
tpl.save(word_apth)
|
||||
|
||||
|
||||
# 画柱状图
|
||||
def drawBar(data, recipe, title='', fn=''):
|
||||
plt.figure(figsize=(6, 4))
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
counties = recipe
|
||||
countyRates = data
|
||||
|
||||
plt.bar(counties, countyRates, width=0.5)
|
||||
plt.xticks(counties, counties, rotation=35)
|
||||
plt.ylim((0, 1))
|
||||
|
||||
def to_percent(temp, position):
|
||||
return '%2.0f' % (100 * temp) + '%'
|
||||
|
||||
plt.gca().yaxis.set_major_formatter(FuncFormatter(to_percent))
|
||||
plt.title(title, fontsize=16)
|
||||
plt.tight_layout()
|
||||
plt.savefig(fn)
|
||||
# plt.show()
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
|
||||
# 画环状图
|
||||
def drawAnnulus(data, recipe, title='', fn=''):
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
xxx = 8 # 画布x,长
|
||||
yyy = 4 # 画布y,高
|
||||
nnncol = 1 # 图例列数
|
||||
fs = 'medium' ## xx--small;x-small;small;medium;large;x-large;xx-large
|
||||
|
||||
# if title == '政务新媒体账号类型':
|
||||
if len(recipe) > 20:
|
||||
if len(recipe) > 40:
|
||||
xxx = 16
|
||||
nnncol = 4
|
||||
fs = 'x-small'
|
||||
else:
|
||||
xxx = 16
|
||||
nnncol = 2
|
||||
fs = 'x-small'
|
||||
|
||||
fig, ax = plt.subplots(figsize=(xxx, yyy), subplot_kw=dict(aspect="equal"))
|
||||
|
||||
"""
|
||||
设置圆环宽度,绘图方向,起始角度
|
||||
|
||||
参数wedgeprops以字典形式传递,设置饼图边界的相关属性,例如圆环宽度0.5
|
||||
饼状图默认从x轴正向沿逆时针绘图,参数startangle可指定新的角(例如负40度)度起画
|
||||
"""
|
||||
wedges, texts = ax.pie(data, radius=1.1, wedgeprops=dict(width=0.4), startangle=0) # 画环,返回扇形列表和每个标注文本对象(坐标,文字,属性)
|
||||
|
||||
if 1:
|
||||
x = 1.2
|
||||
if title == '政务新媒体监测结果':
|
||||
x = 1.0
|
||||
plt.legend(labels=recipe, loc="center left", bbox_to_anchor=(x, 0.5), borderaxespad=0., ncol=nnncol,
|
||||
fontsize=fs) # , ncol=3
|
||||
if len(title) > 0:
|
||||
ax.set_title(title, fontsize=16, fontweight='heavy') # , x=0.6
|
||||
|
||||
plt.tight_layout()
|
||||
if len(fn) > 0:
|
||||
plt.savefig(fn)
|
||||
# plt.show()
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
# summaryCity(city, dfc, dfcw, dfcs, context, strfnTemplate, os.path.join(strPathVerified,'Reports', city+'.docx'), strPathVerified )
|
||||
|
||||
|
||||
# 汇总市州数据,
|
||||
# 市州名称, 监测数据, cbz数据, mgc数据, context(编号、名称), word模板文件名称, 输出word文件名称, 临时文件目录
|
||||
# 需要传入模板文件,数据、错别字、敏感词,单位名称等
|
||||
def summaryCity(info, city, df, dfW, dfS, fnTemplate, fnReport, dirTemp):
|
||||
dCityClient = {
|
||||
'甘肃省': "甘肃省人民政府办公厅",
|
||||
'省直部门': "甘肃省人民政府办公厅",
|
||||
'白银市': "白银市人民政府办公室",
|
||||
'定西市': "定西市人民政府办公室",
|
||||
'临夏回族自治州': "临夏回族自治州人民政府办公室",
|
||||
'平凉市': "中共平凉市委网络安全和信息化委员会办公室",
|
||||
"庆阳市": "庆阳市电子政务与信息资源管理办公室",
|
||||
'庆阳市华池县': "华池县人民政府办公室",
|
||||
'庆阳市宁县': "宁县人民政府办公室",
|
||||
"庆阳市镇原县": "镇原县人民政府办公室",
|
||||
"酒泉市": "酒泉市人民政府办公室",
|
||||
"天水市": "天水市人民政府办公室",
|
||||
"武威市": "武威市人民政府办公室",
|
||||
"金昌市": "金昌市人民政府办公室",
|
||||
"嘉峪关市": "嘉峪关市人民政府办公室",
|
||||
"兰州新区": "兰州新区管委会办公室",
|
||||
"陇南市": "陇南市人民政府办公室",
|
||||
"张掖市": "张掖市政务服务中心",
|
||||
"甘南藏族自治州": "甘南藏族自治州政务服务中心",
|
||||
"兰州市": "兰州市政务服务中心",
|
||||
"陇南市": "陇南市政务服务中心",
|
||||
}
|
||||
print("----------------" + city + "----------------")
|
||||
# 报告编号、委托单位
|
||||
strID = "%02d" % (list(dCityClient).index(city))
|
||||
# print(strID)
|
||||
context = {
|
||||
"city": city,
|
||||
"client": dCityClient[city],
|
||||
"reportid": strID + info['num'],
|
||||
}
|
||||
context.update(info)
|
||||
|
||||
subordinate = '区县/地方部门'
|
||||
subordinateName = '县区'
|
||||
# 区县数据筛选
|
||||
if "庆阳市" in city:
|
||||
if "华池县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '华池县')].copy()
|
||||
|
||||
elif "宁县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '宁县')].copy()
|
||||
elif "镇原县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '镇原县')].copy()
|
||||
else:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')].copy()
|
||||
# & (df['区县/地方部门']!='华池县')
|
||||
# & (df['区县/地方部门']!='宁县')
|
||||
# & (df['区县/地方部门']!='镇原县') ].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == '庆阳市'].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == '庆阳市'].copy()
|
||||
elif "甘肃" in city :
|
||||
dfc = df.copy()
|
||||
dfcw = dfW.copy()
|
||||
dfcs = dfS.copy()
|
||||
subordinate = '市/省局'
|
||||
subordinateName = '市州'
|
||||
|
||||
elif "省直部门" in city :
|
||||
dfc = df.loc[df['市/省局'] == city].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == dictSC[city]].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == dictSC[city]].copy()
|
||||
|
||||
else:
|
||||
dfc = df.loc[(df['市/省局'] == city)].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == city].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == city].copy()
|
||||
|
||||
# -----------------------
|
||||
# 统计结果分析
|
||||
|
||||
dCity = {'1': '2'}
|
||||
#
|
||||
# 县区-监测结果 统计
|
||||
#
|
||||
|
||||
# 透视表, 按县区统计各个监测结果账号数量
|
||||
dfCountyAccount = pd.pivot_table(dfc, index=[subordinate], columns=['监测结果'], values=['账号名称'], aggfunc='count',
|
||||
fill_value='', margins=True)
|
||||
dfCountyAccount.columns = dfCountyAccount.columns.droplevel(0)
|
||||
# 准备模板中的表格
|
||||
tt3_list = []
|
||||
for index, row in dfCountyAccount.iterrows():
|
||||
county = ''
|
||||
if index == 'All':
|
||||
county = '总 计'
|
||||
else:
|
||||
county = index
|
||||
hg = ''
|
||||
u2w = ''
|
||||
un = ''
|
||||
count = ''
|
||||
if '合格' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['合格'], str):
|
||||
hg = int(row['合格'])
|
||||
if '监测期间未更新' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['监测期间未更新'], str):
|
||||
un = int(row['监测期间未更新'])
|
||||
if '超过两周未更新' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['超过两周未更新'], str):
|
||||
u2w = int(row['超过两周未更新'])
|
||||
if 'All' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['All'], str):
|
||||
count = int(row['All'])
|
||||
|
||||
tt3_a = {'county': county, 'hg': hg, 'u2w': u2w, 'un': un, 'count': count}
|
||||
tt3_list.append(tt3_a)
|
||||
context['tt3_contents'] = tt3_list
|
||||
# dfCountyAccount.to_excel(dirTask+strPathCity+'县区监测结果.xlsx')
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按媒体类型统计
|
||||
#
|
||||
# 透视表, 按账号类型统计账号数量
|
||||
dfMedia = pd.pivot_table(dfc, index=['账号类型'], values=['账号名称'], aggfunc='count', fill_value='', margins=True)
|
||||
# 提取该市账号数量
|
||||
dCity['nmCount'] = dfMedia.loc['All', '账号名称']
|
||||
print(' 监测账号数:', dCity['nmCount'])
|
||||
# 提取 账号类型-数量 , 拼成文本串
|
||||
dfMedia = dfMedia.sort_values(by='账号名称', ascending=False)
|
||||
lTableCs1 = []
|
||||
strMedia = ''
|
||||
i = 0
|
||||
tt1_list = []
|
||||
for m in dfMedia.index.tolist()[1:]: # 第一个是总数,不用取
|
||||
strNum = str(dfMedia.iloc[:, 0].tolist()[1:][i])
|
||||
strMedia = strMedia + m + strNum + '个,'
|
||||
tt1_a = {'type': m, 'count': strNum}
|
||||
tt1_list.append(tt1_a)
|
||||
i = i + 1
|
||||
dCity['sMediaCount'] = strMedia[:-1].rstrip(',')
|
||||
context.update({'tt1_contents': tt1_list})
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按县区-更新次数 统计
|
||||
#
|
||||
dfCountyArticle = pd.pivot_table(dfc, index=[subordinate], values=['更新次数'], aggfunc='sum', fill_value='',
|
||||
margins=True)
|
||||
dfCountyArticle = dfCountyArticle.sort_values(by='更新次数', ascending=False).copy()
|
||||
dCity['cityArticleCount'] = "%d" % dfCountyArticle.iloc[0, 0]
|
||||
dCity['countyMostArticle'] = dfCountyArticle.index.tolist()[1]
|
||||
dCity['countyMostArticleCount'] = "%d" % dfCountyArticle.iloc[1, 0]
|
||||
strCountyArticle = ''
|
||||
iiii = 1
|
||||
for cccc in dfCountyArticle.index.tolist()[1:]:
|
||||
strCountyArticle = strCountyArticle + cccc + "%d" % dfCountyArticle.iloc[iiii, 0] + "次,"
|
||||
iiii = iiii + 1
|
||||
dCity['sCountyArticles'] = strCountyArticle.rstrip(',')
|
||||
|
||||
# 市各县区监测结果按总数排序,
|
||||
dfCountyAccount.loc[:, '合格'] = dfCountyAccount['合格'].astype('int')
|
||||
dfCountyAccount = dfCountyAccount.sort_values(by='All', ascending=False).copy()
|
||||
# 计算合格率
|
||||
dfCountyAccount.eval('rate = 合格 / All ', inplace=True)
|
||||
dfResult = dfCountyAccount.copy()
|
||||
# 提取city合格率
|
||||
dCity['cityRatio'] = "{:.1%}".format(dfCountyAccount.loc['All', 'rate'])
|
||||
print(' 合格率:', dCity['cityRatio'])
|
||||
|
||||
# 导出文件
|
||||
# dfCountyAccount.to_excel(dirIntermediate+sFileBase+'县区合格率.xlsx')
|
||||
|
||||
# dfMedia = dfMedia.drop(['All'])
|
||||
# 提取县区名称,县区账号数, 县区合格率,转成字符串
|
||||
dfCountyAccount = dfCountyAccount.drop(['All']) # 删除"All"行
|
||||
counties = dfCountyAccount.index.tolist()
|
||||
countyCounts = dfCountyAccount['All'].values.tolist()
|
||||
countyHeges = dfCountyAccount['合格'].values.tolist()
|
||||
print(countyCounts)
|
||||
print(counties)
|
||||
|
||||
# 按县区账号数量排序
|
||||
strCountyCount = ''
|
||||
strCounties = ''
|
||||
i = 0
|
||||
for c in counties:
|
||||
strCounties = strCounties + c + ','
|
||||
strCountyCount = strCountyCount + c + str(countyCounts[i]) + '个,'
|
||||
i = i + 1
|
||||
dCity['countyCount'] = "%d" % i
|
||||
dCity['sCounties'] = strCounties.rstrip(',')
|
||||
dCity['sCountyCount'] = strCountyCount.rstrip(',')
|
||||
|
||||
# 按合格率排序
|
||||
dfCountyAccount = dfCountyAccount.sort_values(by='rate', ascending=False)
|
||||
countieshege = dfCountyAccount.index.tolist()
|
||||
countyRates = dfCountyAccount['rate']
|
||||
strCountyRatio = ''
|
||||
i = 0
|
||||
tt2_list = []
|
||||
for c in countieshege:
|
||||
strRatio = "%.1f" % (100.0 * countyRates[i])
|
||||
strCountyRatio = strCountyRatio + c + strRatio + '%,'
|
||||
tt2_a = {'county': c, 'ratio': strRatio + '%'}
|
||||
tt2_list.append(tt2_a)
|
||||
i = i + 1
|
||||
dCity['sCountyRatio'] = strCountyRatio.rstrip(',')
|
||||
dCity['tt2_contents'] = tt2_list
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 绘图
|
||||
#
|
||||
print(' 生成图片...')
|
||||
drawAnnulus(dfMedia.iloc[:, 0].tolist()[1:], dfMedia.index.tolist()[1:],
|
||||
'政务新媒体账号类型', os.path.join(dirTemp, city + 'annulusMediaCount.png'))
|
||||
print(countyCounts)
|
||||
print(counties)
|
||||
drawAnnulus(countyCounts, counties,
|
||||
subordinateName + '政务新媒体账号数量', os.path.join(dirTemp, city + 'annulusCountyCount.png'))
|
||||
|
||||
drawAnnulus(dfCountyArticle.iloc[:, 0].tolist()[1:], dfCountyArticle.index.tolist()[1:],
|
||||
subordinateName + '政务新媒体累计更新次数', os.path.join(dirTemp, city + 'annulusCountyArticle.png'))
|
||||
|
||||
# ;{{resultNoUpdated}}个政务新媒体监测期间未更新,占监测总数的{{resultNoUpdatedRatio}}
|
||||
# ;{{resultNoUpdated2W}}个政务新媒体连续未更新时间超过两周,占监测总数的{{resultNoUpdated2WRatio}}
|
||||
# 政务新媒体监测结果
|
||||
dfResult = dfResult.drop('All', axis=1)
|
||||
dfResult = dfResult.drop('rate', axis=1)
|
||||
# 合格数,合格率,不合格数
|
||||
dCity['resultQualified'] = "%d" % (dfResult.loc['All', '合格'])
|
||||
dCity['resultQualifiedRatio'] = "%.1f%%" % (dfResult.loc['All', '合格'] / dCity['nmCount'] * 100.0)
|
||||
dCity['resultUnqualified'] = "%d" % (dCity['nmCount'] - dfResult.loc['All', '合格'])
|
||||
#
|
||||
# numNoupdated = 0
|
||||
if '监测期间未更新' in dfResult.columns.values.tolist():
|
||||
numNoupdated = dfResult.loc['All', '监测期间未更新']
|
||||
dCity['stringResultNoUpdated'] = ";%d个政务新媒体监测期间未更新,占监测总数的%.1f%%" % (
|
||||
numNoupdated, numNoupdated / dCity['nmCount'] * 100.0)
|
||||
dCity['stringNoUpdated'] = "%d个政务新媒体监测期间未更新。" % (numNoupdated)
|
||||
else:
|
||||
dCity['stringResultNoUpdated'] = ''
|
||||
dCity['stringNoUpdated'] = ""
|
||||
# dCity['resultNoUpdated'] = "%d"%(numNoupdated)
|
||||
# dCity['resultNoUpdatedRatio'] = "%.1f%%"%(numNoupdated/dCity['nmCount']*100.0)
|
||||
# numNoupdated2W = 0
|
||||
if '超过两周未更新' in dfResult.columns.values.tolist():
|
||||
numNoupdated2W = dfResult.loc['All', '超过两周未更新']
|
||||
dCity['stringResultNoUpdated2W'] = ";%d个政务新媒体连续未更新时间超过两周,占监测总数的%.1f%%" % (
|
||||
numNoupdated2W, numNoupdated2W / dCity['nmCount'] * 100.0)
|
||||
dCity['stringNoUpdated2W'] = "%d个政务新媒体连续未更新时间超过两周。" % (numNoupdated2W)
|
||||
else:
|
||||
dCity['stringResultNoUpdated2W'] = ''
|
||||
dCity['stringNoUpdated2W'] = ''
|
||||
# dCity['resultNoUpdated2W'] = "%d"%(numNoupdated2W)
|
||||
# dCity['resultNoUpdated2WRatio'] = "%.1f%%"%(numNoupdated2W/dCity['nmCount']*100.0)
|
||||
resultLabels = dfResult.columns.values.tolist()
|
||||
resultCounts = dfResult.loc['All'].values.tolist()
|
||||
drawAnnulus(resultCounts, resultLabels,
|
||||
'政务新媒体监测结果', os.path.join(dirTemp, city + 'annulusResult.png'))
|
||||
|
||||
drawBar(countyRates, countieshege,
|
||||
'政务新媒体管理矩阵发布时效性合格率榜单', os.path.join(dirTemp, city + 'barCountyRatio.png'))
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 准备报告需要的数据
|
||||
#
|
||||
print(' 生成报告...')
|
||||
|
||||
dfCityUnqulified = dfc[dfc['监测结果'] != '合格']
|
||||
dfCityUnqulified = dfCityUnqulified.sort_values(by="监测结果", ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||||
|
||||
#################################################
|
||||
|
||||
dfCityQulified = dfc[dfc['监测结果'] == '合格']
|
||||
dfCityQulified = dfCityQulified.sort_values(by=subordinate, ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||||
|
||||
# 方法一
|
||||
|
||||
tt4_list = []
|
||||
for index, row in dfCityUnqulified.iterrows():
|
||||
count = ''
|
||||
if row['更新次数']:
|
||||
count = "%d" % row['更新次数']
|
||||
days = ''
|
||||
if row['最大静默日数']:
|
||||
days = "%d" % row['最大静默日数']
|
||||
sD1 = ''
|
||||
sD2 = ''
|
||||
if row['静默开始日期']:
|
||||
sD1 = toDate(str(row['静默开始日期']))
|
||||
if row['静默结束日期']:
|
||||
sD2 = toDate(str(row['静默结束日期']))
|
||||
|
||||
tt4_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||||
'days': days, 'start': sD1, 'end': sD2, }
|
||||
tt4_list.append(tt4_a)
|
||||
tt4_results = {'tt4_contents': tt4_list}
|
||||
context.update(tt4_results)
|
||||
|
||||
tt5_list = []
|
||||
for index, row in dfCityQulified.iterrows():
|
||||
count = ''
|
||||
if row['更新次数']:
|
||||
count = "%d" % row['更新次数']
|
||||
days = ''
|
||||
if row['最大静默日数']:
|
||||
days = "%d" % row['最大静默日数']
|
||||
sD1 = ''
|
||||
sD2 = ''
|
||||
if row['静默开始日期']:
|
||||
sD1 = toDate(str(row['静默开始日期']))
|
||||
if row['静默结束日期']:
|
||||
sD2 = toDate(str(row['静默结束日期']))
|
||||
|
||||
tt5_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||||
'days': days, 'start': sD1, 'end': sD2, }
|
||||
tt5_list.append(tt5_a)
|
||||
tt5_results = {'tt5_contents': tt5_list}
|
||||
context.update(tt5_results)
|
||||
|
||||
# 读取添加错别字表格
|
||||
tCbz_list = []
|
||||
dfcw.fillna('')
|
||||
for index, row in dfcw.iterrows():
|
||||
sTitle = ''
|
||||
sDate = toDate(str(row['发文时间']))
|
||||
if '标题' in dfcw.columns:
|
||||
sTitle = row['标题']
|
||||
|
||||
# 去除引号等干扰表格模板输出的字符
|
||||
r = "[——,$%^,。?、~@#¥%……&*《》<>「」{}【】()/\\\[\]'\"]"
|
||||
if pd.isna(row['错误出现位置']):
|
||||
s = ''
|
||||
else:
|
||||
s = re.sub(r, '', row['错误出现位置'])
|
||||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': s, 'type': row['账号类型'], 'name': row['账号名称'],
|
||||
'date': sDate, 'title': sTitle, }
|
||||
tCbz_list.append(a)
|
||||
if dfcw.shape[0] > 0:
|
||||
dCity['stringCbzCount'] = '本次检测发现错别字%d处,详细情况见附表政务新媒体发布内容错别字统计表。' % (dfcw.shape[0])
|
||||
else:
|
||||
dCity['stringCbzCount'] = '本次检测未发现错别字。'
|
||||
tCbz_results = {'tCbz_contents': tCbz_list}
|
||||
context.update(tCbz_results)
|
||||
|
||||
# 读取添加敏感词表格
|
||||
tMgc_list = []
|
||||
dfcs.fillna('')
|
||||
for index, row in dfcs.iterrows():
|
||||
sTitle = ''
|
||||
sDate = toDate(str(row['发文时间']))
|
||||
if '标题' in dfcs.columns:
|
||||
sTitle = row['标题']
|
||||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': row['错误出现位置'], 'type': row['账号类型'], 'name': row['账号名称'],
|
||||
'date': sDate, 'title': sTitle, }
|
||||
tMgc_list.append(a)
|
||||
if dfcs.shape[0] > 0:
|
||||
dCity['stringMgcCount'] = '本次检测发现敏感信息%d处,详细情况见附表政务新媒体发布内容敏感信息统计表。' % (dfcs.shape[0])
|
||||
else:
|
||||
dCity['stringMgcCount'] = '本次检测未发现涉敏内容。'
|
||||
tMgc_results = {'tMgc_contents': tMgc_list}
|
||||
context.update(tMgc_results)
|
||||
|
||||
# table1
|
||||
context.update(dCity)
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按模板生成报告
|
||||
#
|
||||
temp_word(fnTemplate,
|
||||
fnReport,
|
||||
context, dirTemp, city)
|
||||
|
||||
def createDir(dirP, dirS):
|
||||
dirN = dirP
|
||||
if os.path.isdir(dirP):
|
||||
dirN = os.path.join(dirP, dirS)
|
||||
if not (os.path.exists(dirN)):
|
||||
os.mkdir(dirN)
|
||||
if os.path.isdir(dirN):
|
||||
pass
|
||||
else:
|
||||
dirN = dirP
|
||||
print('Directory ' + dirN + ' cannot be created.')
|
||||
return dirN
|
||||
# def createDir(dirP, dirS):
|
||||
|
||||
def summary(info, strFnData, strFnW, strFnS, strfnTemplate, strPathOutput):
|
||||
# 打开监测数据、错别字、敏感词
|
||||
df = pd.read_excel(strFnData)
|
||||
dfW = pd.read_excel(strFnW)
|
||||
dfS = pd.read_excel(strFnS)
|
||||
|
||||
# df.loc[df['账号类型'] == '微信服务号', '账号类型'] = '微信'
|
||||
# df.loc[df['账号类型'] == '微信订阅号', '账号类型'] = '微信'
|
||||
|
||||
# 统一监测结果表述
|
||||
df.loc[df['监测结果'] == '连续两周未更新', '监测结果'] = '超过两周未更新'
|
||||
|
||||
# 过长名称替换为简称,便于绘图
|
||||
df.loc[df['区县/地方部门'] == '积石山保安族东乡族撒拉族自治县', '区县/地方部门'] = '积石山县'
|
||||
df.loc[df['区县/地方部门'] == '阿克塞哈萨克族自治县', '区县/地方部门'] = '阿克塞自治县'
|
||||
|
||||
# 省直、 市直、 州直
|
||||
df['市/省局'] = df['市/省局'].fillna('省直部门')
|
||||
df['区县/地方部门'] = df['区县/地方部门'].fillna('市直部门')
|
||||
df.loc[(df['市/省局'] == '临夏回族自治州') & (df['区县/地方部门'] == '市直部门'), '区县/地方部门'] = '州直部门'
|
||||
|
||||
|
||||
# 数据整理
|
||||
df.replace(r'\s+', '', regex=True, inplace=True) # 去除账号、单位名称中的空格、换行、tab等
|
||||
df.replace(r'^其他\+', '', regex=True, inplace=True) # 去除账号类型中的 "其它" 字样
|
||||
df['更新次数'] = df['更新次数'].fillna(0)
|
||||
df = df.fillna(value='')
|
||||
|
||||
|
||||
#########################################################
|
||||
#
|
||||
# 统计市州范围
|
||||
cities = {'甘肃省', '白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
|
||||
'嘉峪关市', '庆阳市华池县', '庆阳市镇原县', '庆阳市宁县'}
|
||||
#cities = cities | {'甘南藏族自治州', '金昌市', '兰州市', '陇南市', '张掖市', '甘肃省', '省直部门'}
|
||||
#
|
||||
cities = {'甘肃省','庆阳市','武威市','临夏回族自治州', '酒泉市'} # 只统计特定市州
|
||||
|
||||
cities = {'张掖市'}
|
||||
|
||||
# strPathOutput目录下生成报告目录和临时文件目录:Reports 和 Intermediate
|
||||
dirP = os.path.abspath(os.path.dirname(strPathOutput))
|
||||
dirReports = createDir(dirP, 'Reports')
|
||||
dirIntermediate = createDir(dirP, 'Intermediate')
|
||||
for city in cities:
|
||||
summaryCity(info, city, df, dfW, dfS, strfnTemplate, os.path.join(dirReports, city + '.docx'), dirIntermediate)
|
||||
|
||||
# 合并错别字文件
|
||||
def mergeCMC(keyword, strPathCBZ, strFnCbz):
|
||||
# cityShorten
|
||||
cityShorten = {'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
|
||||
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
|
||||
'兰州新区': '兰州新区', '兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
|
||||
'BY': '白银市', 'DX': '定西市', 'JQ': '酒泉市', 'JYG': '嘉峪关市', 'LN': '陇南市',
|
||||
'LX': '临夏回族自治州', 'PL': '平凉市', 'QY': '庆阳市', 'TS': '天水市', 'WW': '武威市', 'XQ': '兰州新区',
|
||||
'LZXQ': '兰州新区', 'LZ': '兰州市', 'ZY': '张掖市', 'GN': '甘南藏族自治州', 'SZ': '省直部门', 'JC': '金昌市', }
|
||||
df = pd.DataFrame()
|
||||
for fn in glob.glob(os.path.join(strPathCBZ, '*'+keyword+'*.xlsx')):
|
||||
p, f = os.path.split(fn)
|
||||
city=''
|
||||
for c in cityShorten.keys():
|
||||
if c in f:
|
||||
city = cityShorten[c]
|
||||
break
|
||||
if len(city)<1:
|
||||
print("!!!!! City Name not matched ( ", f, " )")
|
||||
dfn = pd.read_excel(fn)
|
||||
dfn['市州'] = city
|
||||
df = df.append(dfn, ignore_index=True)
|
||||
print(city, f, dfn.shape[0], '/', df.shape[0])
|
||||
df.to_excel(strFnCbz)
|
||||
#def mergeCMC
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# 运行之前先转换excel文件的日期列
|
||||
|
||||
info = {
|
||||
"year": "2023",
|
||||
"quarter": "一",
|
||||
"dateCN": "二〇二三年四月",
|
||||
"dateStart": "2023年1月1日",
|
||||
"dateEnd": "2023年3月20日",
|
||||
"days": "79",
|
||||
"num": "4",
|
||||
}
|
||||
# 数据根目录,
|
||||
strPath = 'D:/Projects/POM/DATA/2023年S1/'
|
||||
# 监测数据
|
||||
strFnMonitoring = strPath + '汇总/第一季度汇总数据_2023.3.xlsx'
|
||||
# word模板文件
|
||||
strPathTemplate = strPath + 'POM_ReportTemplate0.docx'
|
||||
# 错别字
|
||||
strFnCbz = strPath + '汇总/CBZ.xlsx'
|
||||
if not os.path.exists(strFnCbz):# 汇总错别字
|
||||
strPathCBZ = strPath + '监测/'
|
||||
mergeCMC("错别", strPathCBZ, strFnCbz)
|
||||
# 敏感词
|
||||
strFnMgc = strPath + '汇总/MGC.xlsx'
|
||||
if not os.path.exists(strFnMgc):#汇总敏感词
|
||||
strPathMGC = strPath + '监测/'
|
||||
mergeCMC("敏感", strPathMGC, strFnMgc)
|
||||
# 数据目录
|
||||
strPathOutput = strPath + '统计/'
|
||||
|
||||
summary(info, strFnMonitoring, strFnCbz, strFnMgc, strPathTemplate, strPathOutput)
|
618
StatSeasonly4.py
618
StatSeasonly4.py
|
@ -1,618 +0,0 @@
|
|||
# 1. 打开监测任务表格
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os, glob, re
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import FuncFormatter
|
||||
import datetime
|
||||
|
||||
from docxtpl import DocxTemplate
|
||||
from docxtpl import InlineImage
|
||||
from docx.shared import Mm
|
||||
|
||||
|
||||
def fetch_chinese(s):
|
||||
pattern =re.compile(r'[^\u4e00-\u9fa5]')
|
||||
sc = re.sub(pattern, '', s)
|
||||
return sc
|
||||
|
||||
def toDate(strDT):
|
||||
dt = pd.to_datetime(strDT, errors='coerce')
|
||||
dts = ''
|
||||
# print('-+-+:', type(dt), dt)
|
||||
if not pd.isna(dt):
|
||||
dts = dt.strftime('%m-%d')
|
||||
return dts
|
||||
|
||||
# word模板替换
|
||||
def temp_word(tmep_path, word_apth, dContext, pathImage, city):
|
||||
tpl = DocxTemplate(tmep_path)
|
||||
dC = {'annulusMediaCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusMediaCount.png'), width=Mm(120)),
|
||||
'annulusCountyCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyCount.png'),
|
||||
width=Mm(120)),
|
||||
'annulusCountyArticle': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyArticle.png'),
|
||||
width=Mm(120)),
|
||||
'annulusResult': InlineImage(tpl, os.path.join(pathImage, city + 'annulusResult.png'), width=Mm(120)),
|
||||
'barCountyRatio': InlineImage(tpl, os.path.join(pathImage, city + 'barCountyRatio.png'), width=Mm(120))
|
||||
}
|
||||
|
||||
dContext.update(dC)
|
||||
tpl.render(dContext)
|
||||
tpl.save(word_apth)
|
||||
|
||||
|
||||
# 画柱状图
|
||||
def drawBar(data, recipe, title='', fn=''):
|
||||
plt.figure(figsize=(6, 4))
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
counties = recipe
|
||||
countyRates = data
|
||||
|
||||
plt.bar(counties, countyRates, width=0.5)
|
||||
plt.xticks(counties, counties, rotation=35)
|
||||
plt.ylim((0, 1))
|
||||
|
||||
def to_percent(temp, position):
|
||||
return '%2.0f' % (100 * temp) + '%'
|
||||
|
||||
plt.gca().yaxis.set_major_formatter(FuncFormatter(to_percent))
|
||||
plt.title(title, fontsize=16)
|
||||
plt.tight_layout()
|
||||
plt.savefig(fn)
|
||||
# plt.show()
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
|
||||
# 画环状图
|
||||
def drawAnnulus(data, recipe, title='', fn=''):
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
xxx = 8 # 画布x,长
|
||||
yyy = 4 # 画布y,高
|
||||
nnncol = 1 # 图例列数
|
||||
fs = 'medium' ## xx--small;x-small;small;medium;large;x-large;xx-large
|
||||
|
||||
# if title == '政务新媒体账号类型':
|
||||
if len(recipe) > 20:
|
||||
if len(recipe) > 40:
|
||||
xxx = 16
|
||||
nnncol = 4
|
||||
fs = 'x-small'
|
||||
else:
|
||||
xxx = 16
|
||||
nnncol = 2
|
||||
fs = 'xmall'
|
||||
|
||||
fig, ax = plt.subplots(figsize=(xxx, yyy), subplot_kw=dict(aspect="equal"))
|
||||
|
||||
"""
|
||||
设置圆环宽度,绘图方向,起始角度
|
||||
|
||||
参数wedgeprops以字典形式传递,设置饼图边界的相关属性,例如圆环宽度0.5
|
||||
饼状图默认从x轴正向沿逆时针绘图,参数startangle可指定新的角(例如负40度)度起画
|
||||
"""
|
||||
wedges, texts = ax.pie(data, radius=1.1, wedgeprops=dict(width=0.4), startangle=0) # 画环,返回扇形列表和每个标注文本对象(坐标,文字,属性)
|
||||
|
||||
if 1:
|
||||
x = 1.2
|
||||
if title == '政务新媒体监测结果':
|
||||
x = 1.0
|
||||
plt.legend(labels=recipe, loc="center left", bbox_to_anchor=(x, 0.5), borderaxespad=0., ncol=nnncol,
|
||||
fontsize=fs) # , ncol=3
|
||||
if len(title) > 0:
|
||||
ax.set_title(title, fontsize=16, fontweight='heavy') # , x=0.6
|
||||
|
||||
plt.tight_layout()
|
||||
if len(fn) > 0:
|
||||
plt.savefig(fn)
|
||||
# plt.show()
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
# summaryCity(city, dfc, dfcw, dfcs, context, strfnTemplate, os.path.join(strPathVerified,'Reports', city+'.docx'), strPathVerified )
|
||||
|
||||
|
||||
# 汇总市州数据,
|
||||
# 市州名称, 监测数据, cbz数据, mgc数据, context(编号、名称), word模板文件名称, 输出word文件名称, 临时文件目录
|
||||
# 需要传入模板文件,数据、错别字、敏感词,单位名称等
|
||||
def summaryCity(info, city, df, dfW, dfS, fnTemplate, fnReport, dirTemp):
|
||||
dCityClient = {
|
||||
'甘肃省': "甘肃省人民政府办公厅",
|
||||
'省直部门': "甘肃省人民政府办公厅",
|
||||
'白银市': "白银市人民政府办公室",
|
||||
'定西市': "定西市人民政府办公室",
|
||||
'临夏回族自治州': "临夏回族自治州人民政府办公室",
|
||||
'平凉市': "中共平凉市委网络安全和信息化委员会办公室",
|
||||
"庆阳市": "庆阳市电子政务与信息资源管理办公室",
|
||||
'庆阳市华池县': "华池县人民政府办公室",
|
||||
'庆阳市宁县': "宁县人民政府办公室",
|
||||
"庆阳市镇原县": "镇原县人民政府办公室",
|
||||
"酒泉市": "酒泉市人民政府办公室",
|
||||
"天水市": "天水市人民政府办公室",
|
||||
"武威市": "武威市人民政府办公室",
|
||||
"金昌市": "金昌市人民政府办公室",
|
||||
"嘉峪关市": "嘉峪关市人民政府办公室",
|
||||
"兰州新区": "兰州新区管委会办公室",
|
||||
"陇南市": "陇南市政务服务中心",
|
||||
"张掖市": "张掖市政务服务中心",
|
||||
"甘南藏族自治州": "甘南藏族自治州政务服务中心",
|
||||
"兰州市": "兰州市政务服务中心",
|
||||
"陇南市": "陇南市政务服务中心",
|
||||
}
|
||||
print("----------------" + city + "----------------")
|
||||
# 报告编号、委托单位
|
||||
strID = "%02d" % (list(dCityClient).index(city))
|
||||
# print(strID)
|
||||
context = {
|
||||
"city": city,
|
||||
"client": dCityClient[city],
|
||||
"reportid": strID + info['num'],
|
||||
}
|
||||
context.update(info)
|
||||
|
||||
subordinate = '区县/地方部门'
|
||||
subordinateName = '县区'
|
||||
# 区县数据筛选
|
||||
if "庆阳市" in city:
|
||||
if "华池县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '华池县')].copy()
|
||||
|
||||
elif "宁县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '宁县')].copy()
|
||||
elif "镇原县" in city:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')
|
||||
& (df['区县/地方部门'] == '镇原县')].copy()
|
||||
else:
|
||||
dfc = df.loc[(df['市/省局'] == '庆阳市')].copy()
|
||||
# & (df['区县/地方部门']!='华池县')
|
||||
# & (df['区县/地方部门']!='宁县')
|
||||
# & (df['区县/地方部门']!='镇原县') ].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == '庆阳市'].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == '庆阳市'].copy()
|
||||
elif "甘肃" in city :
|
||||
dfc = df.copy()
|
||||
dfcw = dfW.copy()
|
||||
dfcs = dfS.copy()
|
||||
subordinate = '市/省局'
|
||||
subordinateName = '市州'
|
||||
|
||||
elif "省直部门" in city :
|
||||
dfc = df.loc[df['市/省局'] == city].copy()
|
||||
#dfcw = dfW.loc[dfW['市州'] == dictSC[city]].copy()
|
||||
#dfcs = dfS.loc[dfS['市州'] == dictSC[city]].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == city].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == city].copy()
|
||||
|
||||
else:
|
||||
dfc = df.loc[(df['市/省局'] == city)].copy()
|
||||
dfcw = dfW.loc[dfW['市州'] == city].copy()
|
||||
dfcs = dfS.loc[dfS['市州'] == city].copy()
|
||||
|
||||
# -----------------------
|
||||
# 统计结果分析
|
||||
|
||||
dCity = {'1': '2'}
|
||||
#
|
||||
# 县区-监测结果 统计
|
||||
#
|
||||
|
||||
# 透视表, 按县区统计各个监测结果账号数量
|
||||
dfCountyAccount = pd.pivot_table(dfc, index=[subordinate], columns=['监测结果'], values=['账号名称'], aggfunc='count',
|
||||
fill_value='', margins=True)
|
||||
dfCountyAccount.columns = dfCountyAccount.columns.droplevel(0)
|
||||
# 准备模板中的表格
|
||||
tt3_list = []
|
||||
for index, row in dfCountyAccount.iterrows():
|
||||
county = ''
|
||||
if index == 'All':
|
||||
county = '总 计'
|
||||
else:
|
||||
county = index
|
||||
hg = ''
|
||||
u2w = ''
|
||||
un = ''
|
||||
count = ''
|
||||
if '合格' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['合格'], str):
|
||||
hg = int(row['合格'])
|
||||
if '监测期间未更新' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['监测期间未更新'], str):
|
||||
un = int(row['监测期间未更新'])
|
||||
if '超过两周未更新' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['超过两周未更新'], str):
|
||||
u2w = int(row['超过两周未更新'])
|
||||
if 'All' in dfCountyAccount.columns.values.tolist():
|
||||
if not isinstance(row['All'], str):
|
||||
count = int(row['All'])
|
||||
|
||||
tt3_a = {'county': county, 'hg': hg, 'u2w': u2w, 'un': un, 'count': count}
|
||||
tt3_list.append(tt3_a)
|
||||
context['tt3_contents'] = tt3_list
|
||||
# dfCountyAccount.to_excel(dirTask+strPathCity+'县区监测结果.xlsx')
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按媒体类型统计
|
||||
#
|
||||
# 透视表, 按账号类型统计账号数量
|
||||
dfMedia = pd.pivot_table(dfc, index=['账号类型'], values=['账号名称'], aggfunc='count', fill_value='', margins=True)
|
||||
# 提取该市账号数量
|
||||
dCity['nmCount'] = dfMedia.loc['All', '账号名称']
|
||||
print(' 监测账号数:', dCity['nmCount'])
|
||||
# 提取 账号类型-数量 , 拼成文本串
|
||||
dfMedia = dfMedia.sort_values(by='账号名称', ascending=False)
|
||||
lTableCs1 = []
|
||||
strMedia = ''
|
||||
i = 0
|
||||
tt1_list = []
|
||||
for m in dfMedia.index.tolist()[1:]: # 第一个是总数,不用取
|
||||
strNum = str(dfMedia.iloc[:, 0].tolist()[1:][i])
|
||||
strMedia = strMedia + m + strNum + '个,'
|
||||
tt1_a = {'type': m, 'count': strNum}
|
||||
tt1_list.append(tt1_a)
|
||||
i = i + 1
|
||||
dCity['sMediaCount'] = strMedia[:-1].rstrip(',')
|
||||
context.update({'tt1_contents': tt1_list})
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按县区-更新次数 统计
|
||||
#
|
||||
dfCountyArticle = pd.pivot_table(dfc, index=[subordinate], values=['更新次数'], aggfunc='sum', fill_value='',
|
||||
margins=True)
|
||||
dfCountyArticle = dfCountyArticle.sort_values(by='更新次数', ascending=False).copy()
|
||||
dCity['cityArticleCount'] = "%d" % dfCountyArticle.iloc[0, 0]
|
||||
dCity['countyMostArticle'] = dfCountyArticle.index.tolist()[1]
|
||||
dCity['countyMostArticleCount'] = "%d" % dfCountyArticle.iloc[1, 0]
|
||||
strCountyArticle = ''
|
||||
iiii = 1
|
||||
for cccc in dfCountyArticle.index.tolist()[1:]:
|
||||
strCountyArticle = strCountyArticle + cccc + "%d" % dfCountyArticle.iloc[iiii, 0] + "次,"
|
||||
iiii = iiii + 1
|
||||
dCity['sCountyArticles'] = strCountyArticle.rstrip(',')
|
||||
|
||||
# 市各县区监测结果按总数排序,
|
||||
dfCountyAccount.loc[:, '合格'] = dfCountyAccount['合格'].astype('int')
|
||||
dfCountyAccount = dfCountyAccount.sort_values(by='All', ascending=False).copy()
|
||||
# 计算合格率
|
||||
dfCountyAccount.eval('rate = 合格 / All ', inplace=True)
|
||||
dfResult = dfCountyAccount.copy()
|
||||
# 提取city合格率
|
||||
dCity['cityRatio'] = "{:.1%}".format(dfCountyAccount.loc['All', 'rate'])
|
||||
print(' 合格率:', dCity['cityRatio'])
|
||||
|
||||
# 导出文件
|
||||
# dfCountyAccount.to_excel(dirIntermediate+sFileBase+'县区合格率.xlsx')
|
||||
|
||||
# dfMedia = dfMedia.drop(['All'])
|
||||
# 提取县区名称,县区账号数, 县区合格率,转成字符串
|
||||
dfCountyAccount = dfCountyAccount.drop(['All']) # 删除"All"行
|
||||
counties = dfCountyAccount.index.tolist()
|
||||
countyCounts = dfCountyAccount['All'].values.tolist()
|
||||
countyHeges = dfCountyAccount['合格'].values.tolist()
|
||||
print(countyCounts)
|
||||
print(counties)
|
||||
|
||||
# 按县区账号数量排序
|
||||
strCountyCount = ''
|
||||
strCounties = ''
|
||||
i = 0
|
||||
for c in counties:
|
||||
strCounties = strCounties + c + ','
|
||||
strCountyCount = strCountyCount + c + str(countyCounts[i]) + '个,'
|
||||
i = i + 1
|
||||
dCity['countyCount'] = "%d" % i
|
||||
dCity['sCounties'] = strCounties.rstrip(',')
|
||||
dCity['sCountyCount'] = strCountyCount.rstrip(',')
|
||||
|
||||
# 按合格率排序
|
||||
dfCountyAccount = dfCountyAccount.sort_values(by='rate', ascending=False)
|
||||
countieshege = dfCountyAccount.index.tolist()
|
||||
countyRates = dfCountyAccount['rate']
|
||||
strCountyRatio = ''
|
||||
i = 0
|
||||
tt2_list = []
|
||||
for c in countieshege:
|
||||
strRatio = "%.1f" % (100.0 * countyRates[i])
|
||||
strCountyRatio = strCountyRatio + c + strRatio + '%,'
|
||||
tt2_a = {'county': c, 'ratio': strRatio + '%'}
|
||||
tt2_list.append(tt2_a)
|
||||
i = i + 1
|
||||
dCity['sCountyRatio'] = strCountyRatio.rstrip(',')
|
||||
dCity['tt2_contents'] = tt2_list
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 绘图
|
||||
#
|
||||
print(' 生成图片...')
|
||||
drawAnnulus(dfMedia.iloc[:, 0].tolist()[1:], dfMedia.index.tolist()[1:],
|
||||
'政务新媒体账号类型', os.path.join(dirTemp, city + 'annulusMediaCount.png'))
|
||||
print(countyCounts)
|
||||
print(counties)
|
||||
drawAnnulus(countyCounts, counties,
|
||||
subordinateName + '政务新媒体账号数量', os.path.join(dirTemp, city + 'annulusCountyCount.png'))
|
||||
|
||||
drawAnnulus(dfCountyArticle.iloc[:, 0].tolist()[1:], dfCountyArticle.index.tolist()[1:],
|
||||
subordinateName + '政务新媒体累计更新次数', os.path.join(dirTemp, city + 'annulusCountyArticle.png'))
|
||||
|
||||
# ;{{resultNoUpdated}}个政务新媒体监测期间未更新,占监测总数的{{resultNoUpdatedRatio}}
|
||||
# ;{{resultNoUpdated2W}}个政务新媒体连续未更新时间超过两周,占监测总数的{{resultNoUpdated2WRatio}}
|
||||
# 政务新媒体监测结果
|
||||
dfResult = dfResult.drop('All', axis=1)
|
||||
dfResult = dfResult.drop('rate', axis=1)
|
||||
# 合格数,合格率,不合格数
|
||||
dCity['resultQualified'] = "%d" % (dfResult.loc['All', '合格'])
|
||||
dCity['resultQualifiedRatio'] = "%.1f%%" % (dfResult.loc['All', '合格'] / dCity['nmCount'] * 100.0)
|
||||
dCity['resultUnqualified'] = "%d" % (dCity['nmCount'] - dfResult.loc['All', '合格'])
|
||||
#
|
||||
# numNoupdated = 0
|
||||
if '监测期间未更新' in dfResult.columns.values.tolist():
|
||||
numNoupdated = dfResult.loc['All', '监测期间未更新']
|
||||
dCity['stringResultNoUpdated'] = ";%d个政务新媒体监测期间未更新,占监测总数的%.1f%%" % (
|
||||
numNoupdated, numNoupdated / dCity['nmCount'] * 100.0)
|
||||
dCity['stringNoUpdated'] = "%d个政务新媒体监测期间未更新。" % (numNoupdated)
|
||||
else:
|
||||
dCity['stringResultNoUpdated'] = ''
|
||||
dCity['stringNoUpdated'] = ""
|
||||
# dCity['resultNoUpdated'] = "%d"%(numNoupdated)
|
||||
# dCity['resultNoUpdatedRatio'] = "%.1f%%"%(numNoupdated/dCity['nmCount']*100.0)
|
||||
# numNoupdated2W = 0
|
||||
if '超过两周未更新' in dfResult.columns.values.tolist():
|
||||
numNoupdated2W = dfResult.loc['All', '超过两周未更新']
|
||||
dCity['stringResultNoUpdated2W'] = ";%d个政务新媒体连续未更新时间超过两周,占监测总数的%.1f%%" % (
|
||||
numNoupdated2W, numNoupdated2W / dCity['nmCount'] * 100.0)
|
||||
dCity['stringNoUpdated2W'] = "%d个政务新媒体连续未更新时间超过两周。" % (numNoupdated2W)
|
||||
else:
|
||||
dCity['stringResultNoUpdated2W'] = ''
|
||||
dCity['stringNoUpdated2W'] = ''
|
||||
# dCity['resultNoUpdated2W'] = "%d"%(numNoupdated2W)
|
||||
# dCity['resultNoUpdated2WRatio'] = "%.1f%%"%(numNoupdated2W/dCity['nmCount']*100.0)
|
||||
resultLabels = dfResult.columns.values.tolist()
|
||||
resultCounts = dfResult.loc['All'].values.tolist()
|
||||
drawAnnulus(resultCounts, resultLabels,
|
||||
'政务新媒体监测结果', os.path.join(dirTemp, city + 'annulusResult.png'))
|
||||
|
||||
drawBar(countyRates, countieshege,
|
||||
'政务新媒体管理矩阵发布时效性合格率榜单', os.path.join(dirTemp, city + 'barCountyRatio.png'))
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 准备报告需要的数据
|
||||
#
|
||||
print(' 生成报告...')
|
||||
|
||||
dfCityUnqulified = dfc[dfc['监测结果'] != '合格']
|
||||
dfCityUnqulified = dfCityUnqulified.sort_values(by="监测结果", ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||||
|
||||
#################################################
|
||||
|
||||
dfCityQulified = dfc[dfc['监测结果'] == '合格']
|
||||
dfCityQulified = dfCityQulified.sort_values(by=subordinate, ascending=True) # by指定按哪列排序。ascending表示是否升序=False
|
||||
|
||||
# 方法一
|
||||
|
||||
tt4_list = []
|
||||
for index, row in dfCityUnqulified.iterrows():
|
||||
count = ''
|
||||
if row['更新次数']:
|
||||
count = "%d" % row['更新次数']
|
||||
days = ''
|
||||
if row['最大静默日数']:
|
||||
days = "%d" % row['最大静默日数']
|
||||
sD1 = ''
|
||||
sD2 = ''
|
||||
if row['静默开始日期']:
|
||||
sD1 = toDate(str(row['静默开始日期']))
|
||||
if row['静默结束日期']:
|
||||
sD2 = toDate(str(row['静默结束日期']))
|
||||
|
||||
tt4_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||||
'days': days, 'start': sD1, 'end': sD2, }
|
||||
tt4_list.append(tt4_a)
|
||||
tt4_results = {'tt4_contents': tt4_list}
|
||||
context.update(tt4_results)
|
||||
|
||||
tt5_list = []
|
||||
for index, row in dfCityQulified.iterrows():
|
||||
count = ''
|
||||
if row['更新次数']:
|
||||
count = "%d" % row['更新次数']
|
||||
days = ''
|
||||
if row['最大静默日数']:
|
||||
days = "%d" % row['最大静默日数']
|
||||
sD1 = ''
|
||||
sD2 = ''
|
||||
if row['静默开始日期']:
|
||||
sD1 = toDate(str(row['静默开始日期']))
|
||||
if row['静默结束日期']:
|
||||
sD2 = toDate(str(row['静默结束日期']))
|
||||
|
||||
tt5_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
|
||||
'county': row[subordinate], 'result': row['监测结果'], 'num': count,
|
||||
'days': days, 'start': sD1, 'end': sD2, }
|
||||
tt5_list.append(tt5_a)
|
||||
tt5_results = {'tt5_contents': tt5_list}
|
||||
context.update(tt5_results)
|
||||
|
||||
# 读取添加错别字表格
|
||||
tCbz_list = []
|
||||
dfcw.fillna('')
|
||||
for index, row in dfcw.iterrows():
|
||||
sTitle = ''
|
||||
sDate = toDate(str(row['发文时间']))
|
||||
if '标题' in dfcw.columns:
|
||||
sTitle = row['标题']
|
||||
|
||||
# 去除引号等干扰表格模板输出的字符
|
||||
r = "[——,$%^,。?、~@#¥%……&*《》<>「」{}【】()/\\\[\]'\"]"
|
||||
if pd.isna(row['错误出现位置']):
|
||||
s = ''
|
||||
else:
|
||||
s = re.sub(r, '', row['错误出现位置'])
|
||||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': s, 'type': row['账号类型'], 'name': row['账号名称'],
|
||||
'date': sDate, 'title': sTitle, }
|
||||
tCbz_list.append(a)
|
||||
if dfcw.shape[0] > 0:
|
||||
dCity['stringCbzCount'] = '本次检测发现错别字%d处,详细情况见附表政务新媒体发布内容错别字统计表。' % (dfcw.shape[0])
|
||||
else:
|
||||
dCity['stringCbzCount'] = '本次检测未发现错别字。'
|
||||
tCbz_results = {'tCbz_contents': tCbz_list}
|
||||
context.update(tCbz_results)
|
||||
|
||||
# 读取添加敏感词表格
|
||||
tMgc_list = []
|
||||
dfcs.fillna('')
|
||||
for index, row in dfcs.iterrows():
|
||||
sTitle = ''
|
||||
sDate = toDate(str(row['发文时间']))
|
||||
if '标题' in dfcs.columns:
|
||||
sTitle = row['标题']
|
||||
|
||||
a = {'error': row['错误'], 'tips': row['建议'], 'sentence': fetch_chinese(str(row['错误出现位置'])), 'type': row['账号类型'], 'name': row['账号名称'],
|
||||
'date': sDate, 'title': fetch_chinese(str(sTitle)), }
|
||||
|
||||
tMgc_list.append(a)
|
||||
if dfcs.shape[0] > 0:
|
||||
dCity['stringMgcCount'] = '本次检测发现敏感信息%d处,详细情况见附表政务新媒体发布内容敏感信息统计表。' % (dfcs.shape[0])
|
||||
else:
|
||||
dCity['stringMgcCount'] = '本次检测未发现涉敏内容。'
|
||||
tMgc_results = {'tMgc_contents': tMgc_list}
|
||||
context.update(tMgc_results)
|
||||
|
||||
# table1
|
||||
context.update(dCity)
|
||||
|
||||
# -----------------------
|
||||
#
|
||||
# 按模板生成报告
|
||||
#
|
||||
temp_word(fnTemplate,
|
||||
fnReport,
|
||||
context, dirTemp, city)
|
||||
|
||||
def createDir(dirP, dirS):
|
||||
dirN = dirP
|
||||
if os.path.isdir(dirP):
|
||||
dirN = os.path.join(dirP, dirS)
|
||||
if not (os.path.exists(dirN)):
|
||||
os.mkdir(dirN)
|
||||
if os.path.isdir(dirN):
|
||||
pass
|
||||
else:
|
||||
dirN = dirP
|
||||
print('Directory ' + dirN + ' cannot be created.')
|
||||
return dirN
|
||||
# def createDir(dirP, dirS):
|
||||
|
||||
def summary(info, strFnData, strFnW, strFnS, strfnTemplate, strPathOutput):
|
||||
# 打开监测数据、错别字、敏感词
|
||||
df = pd.read_excel(strFnData)
|
||||
dfW = pd.read_excel(strFnW)
|
||||
dfS = pd.read_excel(strFnS)
|
||||
|
||||
# df.loc[df['账号类型'] == '微信服务号', '账号类型'] = '微信'
|
||||
# df.loc[df['账号类型'] == '微信订阅号', '账号类型'] = '微信'
|
||||
|
||||
# 统一监测结果表述
|
||||
df.loc[df['监测结果'] == '连续两周未更新', '监测结果'] = '超过两周未更新'
|
||||
|
||||
# 过长名称替换为简称,便于绘图
|
||||
df.loc[df['区县/地方部门'] == '积石山保安族东乡族撒拉族自治县', '区县/地方部门'] = '积石山县'
|
||||
df.loc[df['区县/地方部门'] == '阿克塞哈萨克族自治县', '区县/地方部门'] = '阿克塞自治县'
|
||||
|
||||
# 省直、 市直、 州直
|
||||
df['市/省局'] = df['市/省局'].fillna('省直部门')
|
||||
df['区县/地方部门'] = df['区县/地方部门'].fillna('市直部门')
|
||||
df.loc[(df['市/省局'] == '临夏回族自治州') & (df['区县/地方部门'] == '市直部门'), '区县/地方部门'] = '州直部门'
|
||||
|
||||
|
||||
# 数据整理
|
||||
df.replace(r'\s+', '', regex=True, inplace=True) # 去除账号、单位名称中的空格、换行、tab等
|
||||
df.replace(r'^其他\+', '', regex=True, inplace=True) # 去除账号类型中的 "其它" 字样
|
||||
df['更新次数'] = df['更新次数'].fillna(0)
|
||||
df = df.fillna(value='')
|
||||
|
||||
|
||||
#########################################################
|
||||
#
|
||||
# 统计市州范围
|
||||
cities = {'甘肃省', '白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
|
||||
'嘉峪关市', '庆阳市华池县', '庆阳市镇原县', '庆阳市宁县', '陇南市'}
|
||||
#cities = cities | {'甘南藏族自治州', '金昌市', '兰州市', '陇南市', '张掖市', '甘肃省', '省直部门'}
|
||||
#
|
||||
cities = {'甘肃省','庆阳市','武威市','临夏回族自治州'} # 只统计特定市州
|
||||
|
||||
# strPathOutput目录下生成报告目录和临时文件目录:Reports 和 Intermediate
|
||||
dirP = os.path.abspath(os.path.dirname(strPathOutput))
|
||||
dirReports = createDir(dirP, 'Reports')
|
||||
dirIntermediate = createDir(dirP, 'Intermediate')
|
||||
for city in cities:
|
||||
summaryCity(info, city, df, dfW, dfS, strfnTemplate, os.path.join(dirReports, city + '.docx'), dirIntermediate)
|
||||
|
||||
# 合并错别字文件
|
||||
def mergeCMC(keyword, strPathCBZ, strFnCbz):
|
||||
# cityShorten
|
||||
cityShorten = {'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
|
||||
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
|
||||
'兰州新区': '兰州新区', '兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
|
||||
'BY': '白银市', 'DX': '定西市', 'JQ': '酒泉市', 'JYG': '嘉峪关市', 'LN': '陇南市',
|
||||
'LX': '临夏回族自治州', 'PL': '平凉市', 'QY': '庆阳市', 'TS': '天水市', 'WW': '武威市', 'XQ': '兰州新区',
|
||||
'LZXQ': '兰州新区', 'LZ': '兰州市', 'ZY': '张掖市', 'GN': '甘南藏族自治州', 'SZ': '省直部门', 'JC': '金昌市', }
|
||||
df = pd.DataFrame()
|
||||
for fn in glob.glob(os.path.join(strPathCBZ, '*'+keyword+'*.xlsx')):
|
||||
p, f = os.path.split(fn)
|
||||
city=''
|
||||
for c in cityShorten.keys():
|
||||
if c in f:
|
||||
city = cityShorten[c]
|
||||
break
|
||||
if len(city)<1:
|
||||
print("!!!!! City Name not matched ( ", f, " )")
|
||||
dfn = pd.read_excel(fn)
|
||||
dfn['市州'] = city
|
||||
df = df.append(dfn, ignore_index=True)
|
||||
print(city, f, dfn.shape[0], '/', df.shape[0])
|
||||
df.to_excel(strFnCbz)
|
||||
#def mergeCMC
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# 运行之前先转换excel文件的日期列
|
||||
|
||||
info = {
|
||||
"year": "2023",
|
||||
"quarter": "三",
|
||||
"dateCN": "二〇二三年三月",
|
||||
"dateStart": "2023年1月1日",
|
||||
"dateEnd": "2023年3月20日",
|
||||
"days": "79",
|
||||
"num": "4",
|
||||
}
|
||||
# 数据根目录,
|
||||
strPath = 'D:/Projects/POM/DATA/2023年S1/'
|
||||
# 监测数据
|
||||
strFnMonitoring = strPath + '汇总/第一季度汇总数据_2023.3.xlsx'
|
||||
# word模板文件
|
||||
strPathTemplate = strPath + 'POM_ReportTemplate.docx'
|
||||
# 错别字
|
||||
strFnCbz = strPath + '汇总/CBZ.xlsx'
|
||||
if not os.path.exists(strFnCbz):# 汇总错别字
|
||||
strPathCBZ = strPath + '监测/'
|
||||
mergeCMC("错别", strPathCBZ, strFnCbz)
|
||||
# 敏感词
|
||||
strFnMgc = strPath + '汇总/MGC.xlsx'
|
||||
if not os.path.exists(strFnMgc):#汇总敏感词
|
||||
strPathMGC = strPath + '监测/'
|
||||
mergeCMC("敏感", strPathMGC, strFnMgc)
|
||||
# 数据目录
|
||||
strPathOutput = strPath + '统计/'
|
||||
|
||||
summary(info, strFnMonitoring, strFnCbz, strFnMgc, strPathTemplate, strPathOutput)
|
1092
statForward202303.py
1092
statForward202303.py
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,32 @@
|
|||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os, glob, re
|
||||
|
||||
fn = 'D:/Projects/POM/DATA/2023年6月/6.26关键词检测/2023年数据检索结果2023-06-26_23.43.45/武威市.xlsx'
|
||||
|
||||
df = pd.read_excel(fn)
|
||||
|
||||
df = df.reset_index()
|
||||
|
||||
print(df.shape)
|
||||
|
||||
for i,r in df.iterrows():
|
||||
#print(i)
|
||||
if str(r['链接']).startswith('http') and i>658 :
|
||||
resp = requests.get(str(r['链接']))
|
||||
html = resp.text
|
||||
print(html)
|
||||
d = '该内容已被发布者删除' in html or '内容无法查看' in html or '你访问的内容不存在' in html
|
||||
if d:
|
||||
sd = '删除'
|
||||
else:
|
||||
sd = '存在'
|
||||
df.loc[i,'状态'] = sd
|
||||
print(i, sd)
|
||||
else:
|
||||
print(i)
|
||||
|
||||
df.to_excel('D:/Projects/POM/DATA/2023年6月/6.26关键词检测/武威市.xlsx')
|
Loading…
Reference in New Issue