pomscripts/StatMonthly.py

# 1. 打开监测任务表格
import pandas as pd
import numpy as np
import os, glob, re
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import datetime
#word toc
import win32com
import win32com.client as win32
from win32com.client import constants
#pdf
from pikepdf import Pdf,Page,Rectangle
#word
from docxtpl import DocxTemplate
from docxtpl import InlineImage
from docx.shared import Mm

def addStamp(target_pdf_path, watermark_pdf_path, output_pdf_path, sy=140):
    #选择需要添加水印的pdf文件
    target_pdf = Pdf.open(target_pdf_path)
    #读取水印pdf文件并提取水印
    watermark_pdf = Pdf.open(watermark_pdf_path)
    watermark_page_seal = watermark_pdf.pages[0]
    watermark_page_wyt = watermark_pdf.pages[1]
    
    #加公章
    x=240; y=sy; w=115; h=115
    target_pdf.pages[0].add_overlay(watermark_page_seal, Rectangle(x,y, x+w, y+h))
    
    #加签字
    x=163; y=573; w=85; h=50
    target_pdf.pages[2].add_overlay(watermark_page_wyt, Rectangle(x,y, x+w, y+h))

    #target_pdf.save(target_pdf_path[:6] + '_已签章.pdf')
    target_pdf.save(output_pdf_path)
    

def update_toc(docx_file):  # word路径
    word = win32com.client.DispatchEx("Word.Application")
    word.Visible = 0  # 设置应用可见
    word.DisplayAlerts = 0
    doc = word.Documents.Open(docx_file)  # 使用微软office打开word
    toc_count = doc.TablesOfContents.Count  # 判断是否有无目录，如果数量是1则代表已经有目录了
    if toc_count == 0:
        print("无目录")
        '''
        for i, p in enumerate(doc.Paragraphs):  # 遍历word中的内容
            if '目录' in p.Range.Text:  # 用于指定目录页面，看下面提示
                p.Range.InsertParagraphAfter()  # 添加新的段落
                p.Range.InsertAfter("---")
                parag_range = doc.Paragraphs(i+2).Range
                doc.TablesOfContents.Add(Range=parag_range,
                                         UseHeadingStyles=True,
                                         LowerHeadingLevel=2)  # 生成目录对象
        '''
    elif toc_count == 1:
        toc = doc.TablesOfContents(1)
        #toc.Update() # 更新整个目录
        toc.UpdatePageNumbers()  # 更新目录页码

    doc.SaveAs(docx_file.replace('.docx', '_.pdf'), FileFormat=17)
    doc.Close(SaveChanges=True)
    word.Quit()

def toDate(strDT):
    dt = pd.to_datetime(strDT, errors='coerce')
    dts = ''
    # print('-+-+:', type(dt), dt)
    if not pd.isna(dt):
        dts = dt.strftime('%m-%d')
    return dts

# word模板替换
def temp_word(tmep_path, word_apth, dContext, pathImage, city):
    tpl = DocxTemplate(tmep_path)
    dC = {'annulusMediaCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusMediaCount.png'), width=Mm(120)),
          'annulusCountyCount': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyCount.png'),
                                            width=Mm(120)),
          'annulusCountyArticle': InlineImage(tpl, os.path.join(pathImage, city + 'annulusCountyArticle.png'),
                                              width=Mm(120)),
          'annulusResult': InlineImage(tpl, os.path.join(pathImage, city + 'annulusResult.png'), width=Mm(120)),
          'barCountyRatio': InlineImage(tpl, os.path.join(pathImage, city + 'barCountyRatio.png'), width=Mm(120))
          }

    dContext.update(dC)
    tpl.render(dContext)
    tpl.save(word_apth)


# 画柱状图
def drawBar(data, recipe, title='', fn=''):
    plt.figure(figsize=(6, 4))
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    counties = recipe
    countyRates = data

    plt.bar(counties, countyRates, width=0.5)
    plt.xticks(counties, counties, rotation=35)
    plt.ylim((0, 1))

    def to_percent(temp, position):
        return '%2.0f' % (100 * temp) + '%'

    plt.gca().yaxis.set_major_formatter(FuncFormatter(to_percent))
    plt.title(title, fontsize=16)
    plt.tight_layout()
    plt.savefig(fn)
    # plt.show()
    plt.cla()
    plt.clf()
    plt.close()


# 画环状图
def drawAnnulus(data, recipe, title='', fn=''):
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    xxx = 8  # 画布x，长
    yyy = 4  # 画布y，高
    nnncol = 1  # 图例列数
    fs = 'medium'  ## xx--small;x-small;small;medium;large;x-large;xx-large

    # if title == '政务新媒体账号类型':
    if len(recipe) > 20:
        if len(recipe) > 40:
            xxx = 16
            nnncol = 4
            fs = 'small'
        else:
            xxx = 16
            nnncol = 2
            fs = 'small'

    fig, ax = plt.subplots(figsize=(xxx, yyy), subplot_kw=dict(aspect="equal"))

    """
    设置圆环宽度，绘图方向，起始角度

    参数wedgeprops以字典形式传递，设置饼图边界的相关属性，例如圆环宽度0.5
    饼状图默认从x轴正向沿逆时针绘图，参数startangle可指定新的角（例如负40度）度起画
    """
    wedges, texts = ax.pie(data, radius=1.1, wedgeprops=dict(width=0.4), startangle=0)  # 画环,返回扇形列表和每个标注文本对象(坐标，文字，属性)

    if 1:
        x = 1.2
        if title == '政务新媒体监测结果':
            x = 1.0
        plt.legend(labels=recipe, loc="center left", bbox_to_anchor=(x, 0.5), borderaxespad=0., ncol=nnncol,
                   fontsize=fs)  # , ncol=3
    if len(title) > 0:
        ax.set_title(title, fontsize=16, fontweight='heavy')  # , x=0.6

    plt.tight_layout()
    if len(fn) > 0:
        plt.savefig(fn)
    # plt.show()
    plt.cla()
    plt.clf()
    plt.close()

    # summaryCity(city, dfc, dfcw, dfcs, context, strfnTemplate, os.path.join(strPathVerified,'Reports', city+'.docx'), strPathVerified )


# 汇总市州数据，
#  市州名称， 监测数据， cbz数据， mgc数据， context(编号、名称)， word模板文件名称， 输出word文件名称， 临时文件目录
# 需要传入模板文件，数据、错别字、敏感词，单位名称等
def summaryCity(info, city, df, dfW, dfS, fnTemplate, fnReport, dirTemp):
    dCityClient = {
        '甘肃省': "甘肃省人民政府办公厅",
        '省直部门': "甘肃省人民政府办公厅",
        '白银市': "白银市人民政府办公室",
        '定西市': "定西市人民政府办公室",
        '临夏回族自治州': "临夏回族自治州人民政府办公室",
        '平凉市': "中共平凉市委网络安全和信息化委员会办公室",
        "庆阳市": "庆阳市电子政务与信息资源管理办公室",
        '庆阳市华池县': "华池县人民政府办公室",
        '庆阳市宁县': "宁县人民政府办公室",
        "庆阳市镇原县": "镇原县人民政府办公室",
        "酒泉市": "酒泉市人民政府办公室",
        "天水市": "天水市人民政府办公室",
        "武威市": "武威市人民政府办公室",
        "金昌市": "金昌市人民政府办公室",
        "嘉峪关市": "嘉峪关市人民政府办公室",
        "兰州新区": "兰州新区管委会办公室",
        "陇南市": "陇南市政务服务中心",
        "张掖市": "张掖市政务服务中心",
        "甘南藏族自治州": "甘南藏族自治州政务服务中心",
        "兰州市": "兰州市政务服务中心",
        "陇南市": "陇南市政务服务中心",
    }
    dHavingSubordinateUnits = {'甘肃省': True, '白银市': True, '定西市': True,
        '临夏回族自治州': True, '平凉市': True, "庆阳市": True, "酒泉市": True, "天水市": True,
        "陇南市": True, "张掖市": True, "甘南藏族自治州": True, "兰州市": True, "陇南市": True,
        "武威市": True, "金昌市": True,
        '省直部门': False, "兰州新区": False, '庆阳市华池县': False,
        '庆阳市宁县': False, "庆阳市镇原县": False, "嘉峪关市": False}
    print("----------------" + city + "----------------")
    # 报告编号、委托单位
    strID = "%02d" % (list(dCityClient).index(city))
    # print(strID)
    context = {
        "city": city,
        "client": dCityClient[city],
        "reportid": strID + info['serialNum'],
        'havingSubordinateUnits': dHavingSubordinateUnits[city],
        'havingBelowStandard': True,
        'havingUpStandard': True,
        'havingCbz': True,
        'havingMgc': True
    }
    context.update(info)

    subordinate = '区县/地方部门'
    subordinateName = '县区'
    # 区县数据筛选
    if "庆阳市" in city:
        if "华池县" in city:
            dfc = df.loc[(df['市/省局'] == '庆阳市')
                         & (df['区县/地方部门'] == '华池县')].copy()

        elif "宁县" in city:
            dfc = df.loc[(df['市/省局'] == '庆阳市')
                         & (df['区县/地方部门'] == '宁县')].copy()
        elif "镇原县" in city:
            dfc = df.loc[(df['市/省局'] == '庆阳市')
                         & (df['区县/地方部门'] == '镇原县')].copy()
        else:
            dfc = df.loc[(df['市/省局'] == '庆阳市')].copy()
            # & (df['区县/地方部门']!='华池县')
            # & (df['区县/地方部门']!='宁县')
            # & (df['区县/地方部门']!='镇原县') ].copy()
        dfcw = dfW.loc[dfW['市州'] == '庆阳市'].copy()
        dfcs = dfS.loc[dfS['市州'] == '庆阳市'].copy()
    elif "甘肃" in city :
        #dfc = df.copy()
        #dfcw = dfW.copy()
        #dfcs = dfS.copy()
        
        cities = {'白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
              '嘉峪关市', '陇南市', '张掖市', '省直部门', '金昌市', '甘南藏族自治州'}

        dfc = df.loc[ df['市/省局'].isin(cities) ].copy()
        dfcw = dfW.loc[ dfW['市州'].isin(cities) ].copy()
        dfcs = dfS.loc[ dfS['市州'].isin(cities) ].copy()
        subordinate = '市/省局'
        subordinateName = '市州'

    elif "省直部门" in city :        
        dfc = df.loc[df['市/省局'] == city].copy()
        #dfcw = dfW.loc[dfW['市州'] == dictSC[city]].copy()
        #dfcs = dfS.loc[dfS['市州'] == dictSC[city]].copy()
        dfcw = dfW.loc[dfW['市州'] == city].copy()
        dfcs = dfS.loc[dfS['市州'] == city].copy()

    else:
        dfc = df.loc[(df['市/省局'] == city)].copy()
        dfcw = dfW.loc[dfW['市州'] == city].copy()
        dfcs = dfS.loc[dfS['市州'] == city].copy()

    # -----------------------
    # 统计结果分析

    dCity = {'1': '2'}
    #
    # 县区-监测结果 统计
    #

    #   透视表， 按县区统计各个监测结果账号数量
    dfCountyAccount = pd.pivot_table(dfc, index=[subordinate], columns=['监测结果'], values=['账号名称'], aggfunc='count',
                                     fill_value='', margins=True)
    dfCountyAccount.columns = dfCountyAccount.columns.droplevel(0)
    #   准备模板中的表格
    tt3_list = []
    for index, row in dfCountyAccount.iterrows():
        county = ''
        if index == 'All':
            county = '总  计'
        else:
            county = index
        if not dHavingSubordinateUnits[city] and county=='市直部门':
            county = city
        hg = ''
        u2w = ''
        un = ''
        count = ''
        if '合格' in dfCountyAccount.columns.values.tolist():
            if not isinstance(row['合格'], str):
                hg = int(row['合格'])
        if '监测期间未更新' in dfCountyAccount.columns.values.tolist():
            if not isinstance(row['监测期间未更新'], str):
                un = int(row['监测期间未更新'])
        if '超过两周未更新' in dfCountyAccount.columns.values.tolist():
            if not isinstance(row['超过两周未更新'], str):
                u2w = int(row['超过两周未更新'])
        if 'All' in dfCountyAccount.columns.values.tolist():
            if not isinstance(row['All'], str):
                count = int(row['All'])

        tt3_a = {'county': county, 'hg': hg, 'u2w': u2w, 'un': un, 'count': count}
        tt3_list.append(tt3_a)
    context['tt3_contents'] = tt3_list
    # dfCountyAccount.to_excel(dirTask+strPathCity+'县区监测结果.xlsx')

    # -----------------------
    #
    # 按媒体类型统计
    #
    #   透视表， 按账号类型统计账号数量
    dfMedia = pd.pivot_table(dfc, index=['账号类型'], values=['账号名称'], aggfunc='count', fill_value='', margins=True)
    #  提取该市账号数量
    dCity['nmCount'] = dfMedia.loc['All', '账号名称']
    print('  监测账号数：', dCity['nmCount'])
    #  提取 账号类型-数量 ， 拼成文本串
    dfMedia = dfMedia.sort_values(by='账号名称', ascending=False)
    lTableCs1 = []
    strMedia = ''
    i = 0
    tt1_list = []
    for m in dfMedia.index.tolist()[1:]:  # 第一个是总数，不用取
        strNum = str(dfMedia.iloc[:, 0].tolist()[1:][i])
        strMedia = strMedia + m + strNum + '个，'
        tt1_a = {'type': m, 'count': strNum}
        tt1_list.append(tt1_a)
        i = i + 1
    dCity['sMediaCount'] = strMedia[:-1].rstrip('，')
    context.update({'tt1_contents': tt1_list})

    # -----------------------
    #
    # 按县区-更新次数 统计
    #
    dfCountyArticle = pd.pivot_table(dfc, index=[subordinate], values=['更新次数'], aggfunc='sum', fill_value='',
                                     margins=True)
    dfCountyArticle = dfCountyArticle.sort_values(by='更新次数', ascending=False).copy()
    dCity['cityArticleCount'] = "%d" % dfCountyArticle.iloc[0, 0]
    dCity['countyMostArticle'] = dfCountyArticle.index.tolist()[1]
    dCity['countyMostArticleCount'] = "%d" % dfCountyArticle.iloc[1, 0]
    strCountyArticle = ''
    iiii = 0

    if len(dfCountyArticle.index)>2:
        for cccc in dfCountyArticle.index.tolist()[1:]:
            iiii = iiii + 1
            strCountyArticle = strCountyArticle + cccc + "%d" % dfCountyArticle.iloc[iiii, 0] + "次，"
        strCountyArticle = strCountyArticle.rstrip('，')

        dCity['sCountyArticles'] = '，按管理矩阵统计，' + strCountyArticle


    #  市各县区监测结果按总数排序，
    dfCountyAccount.loc[:, '合格'] = dfCountyAccount['合格'].astype('int')
    dfCountyAccount = dfCountyAccount.sort_values(by='All', ascending=False).copy()
    #  计算合格率
    dfCountyAccount.eval('rate = 合格 / All ', inplace=True)
    dfResult = dfCountyAccount.copy()
    #  提取city合格率
    dCity['cityRatio'] = "{:.1%}".format(dfCountyAccount.loc['All', 'rate'])
    print('     合格率：', dCity['cityRatio'])

    #  导出文件
    # dfCountyAccount.to_excel(dirIntermediate+sFileBase+'县区合格率.xlsx')

    #  dfMedia = dfMedia.drop(['All'])
    #  提取县区名称，县区账号数, 县区合格率，转成字符串
    dfCountyAccount = dfCountyAccount.drop(['All'])  # 删除"All"行
    counties = dfCountyAccount.index.tolist()
    countyCounts = dfCountyAccount['All'].values.tolist()
    countyHeges = dfCountyAccount['合格'].values.tolist()

    #  按县区账号数量排序
    strCountyCount = ''
    strCounties = ''
    i = 0
    for c in counties:
        strCounties = strCounties + c + '，'
        strCountyCount = strCountyCount + c + str(countyCounts[i]) + '个，'
        i = i + 1
    dCity['countyCount'] = "%d" % i
    dCity['sCounties'] = strCounties.rstrip('，')
    dCity['sCountyCount'] = strCountyCount.rstrip('，')

    #  按合格率排序
    dfCountyAccount = dfCountyAccount.sort_values(by='rate', ascending=False)
    countieshege = dfCountyAccount.index.tolist()
    countyRates = dfCountyAccount['rate']
    strCountyRatio = ''
    i = 0
    tt2_list = []
    for c in countieshege:
        strRatio = "%.1f" % (100.0 * countyRates[i])
        strCountyRatio = strCountyRatio + c + strRatio + '%，'
        tt2_a = {'county': c, 'ratio': strRatio + '%'}
        tt2_list.append(tt2_a)
        i = i + 1
    dCity['sCountyRatio'] = strCountyRatio.rstrip('，')
    dCity['tt2_contents'] = tt2_list

    # -----------------------
    #
    # 绘图
    #
    print('  生成图片...')
    drawAnnulus(dfMedia.iloc[:, 0].tolist()[1:], dfMedia.index.tolist()[1:],
                '政务新媒体账号类型', os.path.join(dirTemp, city + 'annulusMediaCount.png'))

    drawAnnulus(countyCounts, counties,
                subordinateName + '政务新媒体账号数量', os.path.join(dirTemp, city + 'annulusCountyCount.png'))

    drawAnnulus(dfCountyArticle.iloc[:, 0].tolist()[1:], dfCountyArticle.index.tolist()[1:],
                subordinateName + '政务新媒体累计更新次数', os.path.join(dirTemp, city + 'annulusCountyArticle.png'))

    # ；{{resultNoUpdated}}个政务新媒体监测期间未更新，占监测总数的{{resultNoUpdatedRatio}}
    # ；{{resultNoUpdated2W}}个政务新媒体连续未更新时间超过两周，占监测总数的{{resultNoUpdated2WRatio}}
    # 政务新媒体监测结果
    dfResult = dfResult.drop('All', axis=1)
    dfResult = dfResult.drop('rate', axis=1)
    # 合格数，合格率，不合格数
    dCity['resultQualified'] = "%d" % (dfResult.loc['All', '合格'])
    dCity['resultQualifiedRatio'] = "%.1f%%" % (dfResult.loc['All', '合格'] / dCity['nmCount'] * 100.0)
    dCity['resultUnqualified'] = "%d" % (dCity['nmCount'] - dfResult.loc['All', '合格'])
    #
    # numNoupdated = 0
    if '监测期间未更新' in dfResult.columns.values.tolist():
        numNoupdated = dfResult.loc['All', '监测期间未更新']
        dCity['stringResultNoUpdated'] = "；%d个政务新媒体监测期间未更新，占监测总数的%.1f%%" % (
        numNoupdated, numNoupdated / dCity['nmCount'] * 100.0)
        dCity['stringNoUpdated'] = "%d个政务新媒体监测期间未更新。" % (numNoupdated)
    else:
        dCity['stringResultNoUpdated'] = ''
        dCity['stringNoUpdated'] = ""
    # dCity['resultNoUpdated'] = "%d"%(numNoupdated)
    # dCity['resultNoUpdatedRatio'] = "%.1f%%"%(numNoupdated/dCity['nmCount']*100.0)
    # numNoupdated2W = 0
    if '超过两周未更新' in dfResult.columns.values.tolist():
        numNoupdated2W = dfResult.loc['All', '超过两周未更新']
        dCity['stringResultNoUpdated2W'] = "；%d个政务新媒体连续未更新时间超过两周，占监测总数的%.1f%%" % (
        numNoupdated2W, numNoupdated2W / dCity['nmCount'] * 100.0)
        dCity['stringNoUpdated2W'] = "%d个政务新媒体连续未更新时间超过两周。" % (numNoupdated2W)
    else:
        dCity['stringResultNoUpdated2W'] = ''
        dCity['stringNoUpdated2W'] = ''
    # dCity['resultNoUpdated2W'] = "%d"%(numNoupdated2W)
    # dCity['resultNoUpdated2WRatio'] = "%.1f%%"%(numNoupdated2W/dCity['nmCount']*100.0)
    resultLabels = dfResult.columns.values.tolist()
    resultCounts = dfResult.loc['All'].values.tolist()
    drawAnnulus(resultCounts, resultLabels,
                '政务新媒体监测结果', os.path.join(dirTemp, city + 'annulusResult.png'))

    drawBar(countyRates, countieshege,
            '政务新媒体管理矩阵发布时效性合格率榜单', os.path.join(dirTemp, city + 'barCountyRatio.png'))

    # -----------------------
    #
    # 准备报告需要的数据
    #
    print('  生成报告...')

    dfCityUnqulified = dfc[dfc['监测结果'] != '合格']
    dfCityUnqulified = dfCityUnqulified.sort_values(by="监测结果", ascending=True)  # by指定按哪列排序。ascending表示是否升序=False

    #################################################

    dfCityQulified = dfc[dfc['监测结果'] == '合格']
    dfCityQulified = dfCityQulified.sort_values(by=subordinate, ascending=True)  # by指定按哪列排序。ascending表示是否升序=False

    # 
    # 不合格账号列表
    if len(dfCityUnqulified)<1:
        context.update({'havingBelowStandard':False})
    else:
        tt4_list = []
        for index, row in dfCityUnqulified.iterrows():
            count = ''
            if row['更新次数']:
                count = "%d" % row['更新次数']
            days = ''
            if row['静默日数']:
                days = "%d" % row['静默日数']
            sD1 = ''
            sD2 = ''
            if row['静默开始日期']:
                sD1 = toDate(str(row['静默开始日期']))
            if row['静默结束日期']:
                sD2 = toDate(str(row['静默结束日期']))

            tt4_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
                    'county': row[subordinate], 'result': row['监测结果'], 'num': count,
                    'days': days, 'start': sD1, 'end': sD2, }
            tt4_list.append(tt4_a)
        tt4_results = {'tt4_contents': tt4_list}
        context.update(tt4_results)

    # 
    # 合格账号列表
    if len(dfCityQulified)<1:
        context.update({'havingUpStandard':False})
    else:
        tt5_list = []
        for index, row in dfCityQulified.iterrows():
            count = ''
            if row['更新次数']:
                count = "%d" % row['更新次数']
            days = ''
            if row['静默日数']:
                days = "%d" % row['静默日数']
            sD1 = ''
            sD2 = ''
            if row['静默开始日期']:
                sD1 = toDate(str(row['静默开始日期']))
            if row['静默结束日期']:
                sD2 = toDate(str(row['静默结束日期']))

            tt5_a = {'name': row['账号名称'], 'type': row['账号类型'], 'unit': row['开设主体'],
                    'county': row[subordinate], 'result': row['监测结果'], 'num': count,
                    'days': days, 'start': sD1, 'end': sD2, }
            tt5_list.append(tt5_a)
        tt5_results = {'tt5_contents': tt5_list}
        context.update(tt5_results)

    #
    # 错别字表格
    
    if dfcw.shape[0]<1:
        context.update({'havingCbz':False})
    else:
        tCbz_list = []
        dfcw.fillna('')
        for index, row in dfcw.iterrows():
            sTitle = ''
            sDate = toDate(str(row['发文时间']))
            if '标题' in dfcw.columns:
                sTitle = row['标题']

            # 去除引号等干扰表格模板输出的字符
            r = "[——,$%^，。？、~@#￥%……&*《》<>「」{}【】()/\\\[\]'\"]"
            if pd.isna(row['错误出现位置']):
                s = ''
            else:
                s = re.sub(r, '', row['错误出现位置'])
            a = {'error': row['错误'], 'tips': row['建议'], 'sentence': s, 'type': row['账号类型'], 'name': row['账号名称'],
                'date': sDate, 'title': sTitle, }
            tCbz_list.append(a)
        if dfcw.shape[0] > 0:
            dCity['stringCbzCount'] = '本次检测发现错别字%d处，详细情况见附表政务新媒体发布内容错别字统计表。' % (dfcw.shape[0])
        else:
            dCity['stringCbzCount'] = '本次检测未发现错别字。'
        tCbz_results = {'tCbz_contents': tCbz_list}
        context.update(tCbz_results)

    # 读取添加敏感词表格
    if dfcs.shape[0]<1:
        context.update({'havingMgc':False})
    else:
        tMgc_list = []
        dfcs.fillna('')
        for index, row in dfcs.iterrows():
            sTitle = ''
            sDate = toDate(str(row['发文时间']))
            if '标题' in dfcs.columns:
                sTitle = row['标题']
            a = {'error': row['错误'], 'tips': row['建议'], 'sentence': row['错误出现位置'], 'type': row['账号类型'], 'name': row['账号名称'],
                'date': sDate, 'title': sTitle, }
            tMgc_list.append(a)
        if dfcs.shape[0] > 0:
            dCity['stringMgcCount'] = '本次检测发现敏感信息%d处，详细情况见附表政务新媒体发布内容敏感信息统计表。' % (dfcs.shape[0])
        else:
            dCity['stringMgcCount'] = '本次检测未发现涉敏内容。'
        tMgc_results = {'tMgc_contents': tMgc_list}
        context.update(tMgc_results)

        # table1
        context.update(dCity)

    # -----------------------
    #
    # 按模板生成报告
    #
    temp_word(fnTemplate,
              fnReport,
              context, dirTemp, city)

    #更新目录并另存为pdf
    print('  更新目录，转换为PDF...')
    update_toc( fnReport )

    #签章
    print('  签章...')
    fnTmp = fnReport.replace('.docx', '_.pdf')
    fnPDF = fnReport.replace('.docx', '.pdf')
    if city in {'庆阳市', '平凉市', '临夏回族自治州'}:
        addStamp(fnTmp,
        'D:/Projects/POM/DEV/SCRIPTS/stamps_dwl.pdf' , 
        fnPDF, 115)
    else:
        addStamp(fnTmp,'D:/Projects/POM/DEV/SCRIPTS/stamps_dwl.pdf',fnPDF)
    if True:
        os.remove(fnTmp)
    
    
def createDir(dirP, dirS):
    dirN = dirP
    if os.path.isdir(dirP):
        dirN = os.path.join(dirP, dirS)
        if not (os.path.exists(dirN)):
            os.mkdir(dirN)
        if os.path.isdir(dirN):
            pass
        else:
            dirN = dirP
            print('Directory ' + dirN + ' cannot be created.')
    return dirN
    # def createDir(dirP, dirS):

# 合并错别字文件
def mergeCMC(keyword, strPathCBZ, strFnCbz):
    # cityShorten
    cityShorten = {'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
            '临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
            '兰州新区': '兰州新区', '兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
                'BY': '白银市', 'DX': '定西市', 'JQ': '酒泉市', 'JYG': '嘉峪关市', 'LN': '陇南市',
            'LX': '临夏回族自治州', 'PL': '平凉市', 'QY': '庆阳市', 'TS': '天水市', 'WW': '武威市', 'XQ': '兰州新区',
            'LZXQ': '兰州新区', 'LZ': '兰州市', 'ZY': '张掖市', 'GN': '甘南藏族自治州', 'SZ': '省直部门', 'JC': '金昌市', }
    df = pd.DataFrame()
    for fn in glob.glob(os.path.join(strPathCBZ, '*'+keyword+'*.xlsx')):
        p, f = os.path.split(fn)
        city=''
        for c in cityShorten.keys():
            if c in f:
                city = cityShorten[c]
                break
        if len(city)<1:
            print("!!!!! City Name not matched ( ", f, " )")
        dfn = pd.read_excel(fn)
        dfn['市州'] = city
        df = df.append(dfn, ignore_index=True)
        print(city, f, dfn.shape[0], '/', df.shape[0])
    df.to_excel(strFnCbz)
    #def mergeCMC

if __name__ == "__main__":

    # 运行之前先转换excel文件的日期列

    info = {
        "year": "2023",
        "month": "6",
        "datePub": "二〇二三年七月",
        "dateStart": "2023年6月1日",
        "dateEnd": "2023年6月30日",
        "days": "30",
        "serialNum": "8",
    }
    # 数据根目录，
    strPath = 'D:/Projects/POM/DATA/2023年7月/6月报告/'
    createDir(strPath, '全文')
    createDir(strPath, '转发')
    createDir(strPath, '报告')
    createDir(strPath, '汇总')
    createDir(strPath, '监测')
    # 监测数据
    strFnMonitoring = strPath + '汇总/6月汇总数据_2023.6.xlsx'
    # word模板文件
    strPathTemplate = strPath + 'POM_ReportTemplate.docx'
    # 错别字
    strFnCbz = strPath + '汇总/CBZ.xlsx'
    if not os.path.exists(strFnCbz):# 汇总错别字
        strPathCBZ = strPath + '监测/'
        mergeCMC("错别", strPathCBZ, strFnCbz)
    # 敏感词
    strFnMgc = strPath + '汇总/MGC.xlsx'
    if not os.path.exists(strFnMgc):#汇总敏感词
        strPathMGC = strPath + '监测/'
        mergeCMC("敏感", strPathMGC, strFnMgc)
    # 数据目录
    strPathOutput = strPath 


    # 打开监测数据、错别字、敏感词
    df = pd.read_excel(strFnMonitoring)
    dfW = pd.read_excel(strFnCbz)
    dfS = pd.read_excel(strFnMgc)

    # df.loc[df['账号类型'] == '微信服务号', '账号类型'] = '微信'
    # df.loc[df['账号类型'] == '微信订阅号', '账号类型'] = '微信'

    # 统一监测结果表述
    df.loc[df['监测结果'] == '连续两周未更新', '监测结果'] = '超过两周未更新'

    # 过长名称替换为简称，便于绘图
    df.loc[df['区县/地方部门'] == '积石山保安族东乡族撒拉族自治县', '区县/地方部门'] = '积石山县'
    df.loc[df['区县/地方部门'] == '阿克塞哈萨克族自治县', '区县/地方部门'] = '阿克塞自治县'

    # 省直、 市直、 州直
    df['市/省局'] = df['市/省局'].fillna('省直部门')
    df['区县/地方部门'] = df['区县/地方部门'].fillna('市直部门')
    df.loc[(df['市/省局'] == '临夏回族自治州') & (df['区县/地方部门'] == '市直部门'), '区县/地方部门'] = '州直部门'


    # 数据整理
    df.replace(r'\s+', '', regex=True, inplace=True) # 去除账号、单位名称中的空格、换行、tab等
    df.replace(r'^其他\+', '', regex=True, inplace=True) # 去除账号类型中的 "其它" 字样
    df['更新次数'] = df['更新次数'].fillna(0)
    df = df.fillna(value='')


    #########################################################
    #
    # 统计市州范围
    cities = {'白银市', '武威市', '庆阳市', '酒泉市', '天水市', '临夏回族自治州', '平凉市', '定西市', '兰州新区',
              '嘉峪关市', '庆阳市华池县', '庆阳市镇原县', '庆阳市宁县', '陇南市', '张掖市', '甘肃省'}
    #cities = cities | {'甘肃省'}#, '省直部门'}
    #cities = cities | {'陇南市'}#, '兰州市'}, '省直部门'}
    #cities = cities | {'甘南藏族自治州', '金昌市', '兰州市', '张掖市', '甘肃省', '省直部门'}
    #
    cities = {'甘肃省'}  # 只统计特定市州

    # strPathOutput目录下生成报告目录和临时文件目录：Reports 和 Intermediate
    dirP = os.path.abspath(os.path.dirname(strPathOutput))
    dirReports = createDir(dirP, 'Reports')
    dirIntermediate = createDir(dirP, 'Intermediate')
    for city in cities:
        summaryCity(info, city, df, dfW, dfS, strPathTemplate, os.path.join(dirReports, city + '政务新媒体监测报告_{}年{}月.docx'.format(info['year'], info['month'])), dirIntermediate)