pomscripts/statForward2023s2.py

1035 lines
54 KiB
Python
Raw Normal View History

2023-07-22 01:19:10 +00:00
import datetime
import csv
import pandas as pd
import numpy as np
import glob, os, re, time
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from difflib import SequenceMatcher
from collections import Counter
import difflib
from docxtpl import DocxTemplate
from docxtpl import InlineImage
from docx.shared import Mm
import jieba
import jieba.posseg as pseg
def fetch_chinese(s):
pattern =re.compile(r'[^\u4e00-\u9fa5]')
sc = re.sub(pattern, '', s)
return sc
#---
#那我们的目标就是将字段列名的日期数据替换成标准的日期格式,具体的思路是:
#1、先用excel实验2018-11-02对应的日期时间戳是43406。
#2、我再用2018-11-02减43406看看是从那一年开始计算的所以得出结论是1899-12-30。
#3、那最后要达成目标就只需要时间戳+1899-12-30就等于对应的当前日
def ts2date(dates, sf='%Y-%m-%d'):#定义转化日期戳的函数,dates为日期戳
delta=datetime.timedelta(days=dates)
today=datetime.datetime.strptime('1899-12-30','%Y-%m-%d')+delta#将1899-12-30转化为可以计算的时间格式并加上要转化的日期戳
return datetime.datetime.strftime(today,sf)#制定输出日期的格式
#---
# 画柱状图
def drawBar(data, recipe, title='', fn=''):
plt.figure(figsize=(6, 4))
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
counties = recipe
countyRates = data
plt.bar(counties, countyRates, width=0.5)
plt.xticks(counties, counties, rotation=35)
plt.ylim((0, 1))
def to_percent(temp, position):
return '%2.0f' % (100 * temp) + '%'
plt.gca().yaxis.set_major_formatter(FuncFormatter(to_percent))
plt.title(title, fontsize=16)
plt.tight_layout()
plt.savefig(fn)
# plt.show()
plt.cla()
plt.clf()
plt.close()
def getWBData(path, cities, hasBody=False):
# cityShorten
cityShorten = {'LZ':'兰州市', 'LX':'临夏回族自治州', 'JC':'金昌市', 'ZY':'张掖市', 'LN':'陇南市', 'JYG':'嘉峪关市',
'TS':'天水市', 'GN':'甘南藏族自治州', 'BY':'白银市', 'JQ':'酒泉市', 'QY':'庆阳市', 'PL':'平凉市',
'DX':'定西市', 'WW':'武威市', 'SZ':'省直部门', 'XQ': '兰州新区', 'LZXQ': '兰州新区',
'lz': '兰州市', 'lx': '临夏回族自治州', 'jc': '金昌市', 'zy': '张掖市', 'ln': '陇南市', 'jyg': '嘉峪关市',
'ts': '天水市', 'gn': '甘南藏族自治州', 'by': '白银市', 'jq': '酒泉市', 'qy': '庆阳市', 'pl': '平凉市',
'dx': '定西市', 'ww': '武威市', 'sz': '省直部门', 'xq': '兰州新区', 'lzxq': '兰州新区',
'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
'兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
}
dirCs = os.listdir(path)
cs = ['微博id', '微博正文', '头条文章url', '原始图片url', '被转发微博原始图片url', '是否为原创微博', '微博视频url', '发布位置', 'date', '发布工具', '点赞数',
'转发数', '评论数', 'weiboID', 'weiboName', '市州']
dfWB = pd.DataFrame(columns=cs)
cityCount = 0
for dirC in dirCs:
if dirC[:1] == '.':
continue
if not os.path.isdir(os.path.join(path, dirC)):
continue
if 'weixin' in dirC.lower():
continue
if 'tt' in dirC.lower():
continue
if not cityShorten[dirC] in cities:
continue
print(' city: ', cityShorten[dirC], dirC)
cityCount += 1
# City LN
cols = ['微博id', '微博正文', '头条文章url', '原始图片url', '被转发微博原始图片url', '是否为原创微博', '微博视频url', '发布位置', 'date', '发布工具', '点赞数',
'转发数', '评论数'] #WB下载工具中的格式
dfWBC = pd.DataFrame(columns=cols)
dirCTs = os.listdir(os.path.join(path, dirC))
for dirCT in dirCTs:
if dirCT[:1] == '.':
continue
# 时段 weibo weibo_1
if not os.path.isdir(os.path.join(path, dirC, dirCT)):
continue
if 'weixin' in dirCT.lower():
continue
if 'tt' in dirCT.lower():
continue
print(' read WB... dir:',dirCT)
dirAs = os.listdir(os.path.join(path, dirC, dirCT))
for dirA in dirAs:
if dirA[:1] == '.':
continue
# 都是账号名称目录下再存账号ID.txt
if not os.path.isdir(os.path.join(path, dirC, dirCT, dirA)):
continue
##print('---',dirA)
# 账号名称
wbName = dirA
fileAs = os.listdir(os.path.join(path, dirC, dirCT, dirA))
if len(fileAs) > 0 and os.path.splitext(fileAs[0])[-1] == '.csv':
wbId = fileAs[0][:-4]
if len(fileAs) > 1 and wbId.startswith('.'):
wbId = fileAs[1][:-4]
# 读取文件
##print('----',wbName, wbId)
filename = os.path.join(path, dirC, dirCT, dirA, fileAs[0])
dfdfwb = pd.read_csv(filename, sep=',', header=None, names=cols,
index_col=None)#, engine='python', encoding='gbk'#utf-8
dfdfwb = dfdfwb[1:]
dfdfwb["weiboID"] = wbId
dfdfwb["weiboName"] = wbName
dfWBC = dfWBC.append(dfdfwb)
#print(wbName, wbId, fileAs[0], dfdfwb.shape, dfWBC.shape)
if len(fileAs)>1:
print(" +=+= ", fileAs)
print(' ', dfWBC.shape)
#dfWBC.to_excel("D:/Projects/POM/2021年6月/二季度/全文/WB/WB_"+dirC+".xlsx")
dfWBC['市州'] = cityShorten[dirC]
dfWB = dfWB.append(dfWBC)
print('Read WB finished. cities', cityCount, '; lines', dfWB.shape)
#dfWB.to_excel("D:/Projects/POM/2021年7月/2021年上半年/WB_ALL.xlsx")
return dfWB
# 从数据目录中读取xlsx文件拼接到一起
def getWXData(path, cities, hasBody=False):
# cityShorten
cityShorten = {'LZ':'兰州市', 'LX':'临夏回族自治州', 'JC':'金昌市', 'ZY':'张掖市', 'LN':'陇南市', 'JYG':'嘉峪关市',
'TS':'天水市', 'GN':'甘南藏族自治州', 'BY':'白银市', 'JQ':'酒泉市', 'QY':'庆阳市', 'PL':'平凉市',
'DX':'定西市', 'WW':'武威市', 'SZ':'省直部门', 'XQ': '兰州新区', 'LZXQ': '兰州新区',
'lz': '兰州市', 'lx': '临夏回族自治州', 'jc': '金昌市', 'zy': '张掖市', 'ln': '陇南市', 'jyg': '嘉峪关市',
'ts': '天水市', 'gn': '甘南藏族自治州', 'by': '白银市', 'jq': '酒泉市', 'qy': '庆阳市', 'pl': '平凉市',
'dx': '定西市', 'ww': '武威市', 'sz': '省直部门', 'xq': '兰州新区', 'lzxq': '兰州新区',
'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
'兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
}
dirBatches = os.listdir(path)
cols = ['公众号', '链接', '日期', '标题', '内容', '头条', '市州', '阅读数']
dfWX = pd.DataFrame(columns=cols)
countC = 0
countFnC = 0
# 监测批次目录
for dirBatch in dirBatches:
if not os.path.isdir(os.path.join(path, dirBatch)):
continue # 仅目录
# City LN
# 列出市州文件名称
fileCs = os.listdir(os.path.join(path, dirBatch))
count = 0
for fileC in fileCs:
if fileC[:1] == '.':
continue
# 处理目录
if os.path.isdir(os.path.join(path, dirBatch, fileC)) and 'weixin' in fileC.lower():
print(' ', os.path.join(path, dirBatch, fileC))
fs = os.listdir(os.path.join(path, dirBatch, fileC))
for f in fs:
fe = os.path.splitext(f)[-1]
if fe == '.xlsx' or fe == '.xls':
fName = os.path.splitext(fileC)[0]
cityname = cityShorten[dirBatch]
if cityname in cities:
dfdfwxc = pd.read_excel(os.path.join(path, dirBatch, fileC, f))
dfdfwxc['市州'] = cityname
print(' read wx: ', cityShorten[dirBatch], dirBatch, fName, dfdfwxc.shape)
dfWX = dfWX.append(dfdfwxc)
count = count + 1
# 处理文件
fExt = os.path.splitext(fileC)[-1]
if fExt != '.xlsx' and fExt != '.xls':
continue # 限制文件扩展名
fName = os.path.splitext(fileC)[0]
cityname = cityShorten[dirBatch]
if cityname in cities:
dfdfwxc = pd.read_excel(os.path.join(path, dirBatch, fileC))
dfdfwxc['市州'] = cityShorten[dirBatch]
print(' read wx: ', cityShorten[dirBatch], dirBatch, fName, dfdfwxc.shape)
dfWX = dfWX.append(dfdfwxc)
count = count + 1
countFnC += count
if count > 0:
countC += 1
print(" Read WX Finished. cities ", countC, '; Files', countFnC, '; lines ', dfWX.shape[0])
#dfWX.to_excel("D:/Projects/POM/2021年7月/2021年上半年/WX_ALL.xlsx")
return dfWX
# 从数据目录中读取xlsx文件拼接到一起
def getTTData(path, cities, hasBody=False):
# cityShorten
cityShorten = {'LZ':'兰州市', 'LX':'临夏回族自治州', 'JC':'金昌市', 'ZY':'张掖市', 'LN':'陇南市', 'JYG':'嘉峪关市',
'TS':'天水市', 'GN':'甘南藏族自治州', 'BY':'白银市', 'JQ':'酒泉市', 'QY':'庆阳市', 'PL':'平凉市',
'DX':'定西市', 'WW':'武威市', 'SZ':'省直部门', 'XQ': '兰州新区', 'LZXQ': '兰州新区',
'lz': '兰州市', 'lx': '临夏回族自治州', 'jc': '金昌市', 'zy': '张掖市', 'ln': '陇南市', 'jyg': '嘉峪关市',
'ts': '天水市', 'gn': '甘南藏族自治州', 'by': '白银市', 'jq': '酒泉市', 'qy': '庆阳市', 'pl': '平凉市',
'dx': '定西市', 'ww': '武威市', 'sz': '省直部门', 'xq': '兰州新区', 'lzxq': '兰州新区',
'白银': '白银市', '定西': '定西市', '酒泉': '酒泉市', '嘉峪关': '嘉峪关市', '陇南': '陇南市',
'临夏': '临夏回族自治州', '平凉': '平凉市', '庆阳': '庆阳市', '天水': '天水市', '武威': '武威市', '新区': '兰州新区',
'兰州': '兰州市', '张掖': '张掖市', '甘南': '甘南藏族自治州', '省直': '省直部门', '金昌': '金昌市',
}
dirCs = os.listdir(path)
#account date title nread ncomment content url origin
cs = ['account', 'date', 'title', 'nread', 'ncomment', 'content', 'url', 'origin', 'city']
dfTT = pd.DataFrame(columns=cs)
cityCount = 0
for dirC in dirCs:
if dirC[:1] == '.':
continue
if not os.path.isdir(os.path.join(path, dirC)):
continue
if 'weixin' in dirC.lower():
continue
if 'weibo' in dirC.lower():
continue
if not cityShorten[dirC] in cities:
continue
print(' city: ', cityShorten[dirC], dirC)
cityCount += 1
# City LN
dfTTC = pd.DataFrame(columns=cs)
dirCTs = os.listdir(os.path.join(path, dirC))
for dirCT in dirCTs:
if dirCT[:1] == '.':
continue
# 时段 weibo weibo_1
if not os.path.isdir(os.path.join(path, dirC, dirCT)):
continue
if 'weixin' in dirCT.lower():
continue
if 'weibo' in dirCT.lower():
continue
if 'tt' in dirCT.lower():
print(' read TT... dir:',dirCT)
fns = os.listdir(os.path.join(path, dirC, dirCT))
for fn in fns:
if fn[:1] == '.':
continue
if not fn[-5:] == '.xlsx':
continue
#print('---',fn)
# 账号名称
ttName = fn[fn.index('_')+1:]
ttName = ttName[:ttName.index('_')]
#D:\Projects\POM\DATA\2022年11月\10月报告\全文\LN\TT
fileAs = os.path.join(path, dirC, dirCT, fn)
#print(' ', ttName, fileAs)
if len(fileAs) > 0:
try:
dfdftt = pd.read_excel(fileAs)
dfTTC = dfTTC.append(dfdftt)
except:
print("read file failed. ", fileAs)
#dfdfwb = pd.read_csv(filename, sep=',', header=None, names=cols,
# index_col=None)#, engine='python', encoding='gbk'#utf-8
#dfdfwb = dfdfwb[1:]
#dfdfwb["weiboID"] = wbId
#dfdfwb["weiboName"] = wbName
#dfTTC = dfTTC.append(dfdfwb)
#print(ttName, '读入:', dfdftt.shape[0], ' 总计:', dfTTC.shape[0])
#if len(fileAs)>1:
# print(" +=+= ", fileAs)
print(' 读入头条数据行数', dfTTC.shape)
#dfTTC.to_excel("D:/Projects/POM/2021年6月/二季度/全文/WB/WB_"+dirC+".xlsx")
dfTTC['city'] = cityShorten[dirC]
dfTT = dfTT.append(dfTTC)
print('Read TT finished. cities', cityCount, '; lines', dfTT.shape)
#dfTT.to_excel("D:/Projects/POM/2021年7月/2021年上半年/WB_ALL.xlsx")
return dfTT
def fetch_chinese(s):
pattern =re.compile(r'[^\u4e00-\u9fa5]')
sc = re.sub(pattern, '', s)
return sc
if __name__ == "__main__":
starttime = datetime.datetime.now()
_RATIO = 0.5
isDoWX = True
isDoWB = True
isDoTT = True
cities = [
'临夏回族自治州',
'白银市',
'定西市',
'酒泉市',
'嘉峪关市',
'平凉市',
'庆阳市',
'天水市',
'武威市',
'兰州新区',
'陇南市',
'兰州市', '张掖市', '甘南藏族自治州', '金昌市',
'省直部门', # 共12市2州1新区
]
'''
cities = [
'临夏回族自治州',
'省直部门', # 共12市2州1新区
]
'''
#cities = ['陇南市', '临夏回族自治州', '白银市', '定西市', '酒泉市', '平凉市','武威市','天水市']
cities = ['酒泉市']
# 转发任务
sTaskTitle = '内容'
sTaskDate = '时间'
dfTask = pd.read_excel('D:/Projects/POM/DATA/2023年6月/季度报告/转发台账第二季度.xlsx', parse_dates=[sTaskDate])
dfTask.dropna(axis=0,subset = [sTaskTitle])
yT0 = dfTask.columns.get_loc('序号')
yT1 = dfTask.columns.get_loc(sTaskTitle)
#dfTask[sTaskDate] = pd.to_datetime(dfTask[sTaskDate]).dt.date
# 账号信息
strFnAccount = 'D:/Projects/POM/DATA/2023年7月/6月报告/全国报送系统表单_2023.6.30.xlsx'
dfAllAccount = pd.read_excel(strFnAccount)
# 增加列
dfAllAccount.loc[:, '转发数'] = 0
dfAllAccount.loc[:, '阅读数'] = 0
dfAllAccount = pd.concat([dfAllAccount, pd.DataFrame(np.zeros((dfAllAccount.shape[0], dfTask.shape[0])), columns=dfTask['序号'].astype(str).tolist())], axis=1)
# 整理数据
dfAllAccount['市/省局'] = dfAllAccount['市/省局'].fillna('省直部门')
dfAllAccount['区县/地方部门'] = dfAllAccount['区县/地方部门'].fillna('市直部门')
dfAllAccount.loc[(dfAllAccount['市/省局'].isin(['临夏回族自治州', '甘南藏族自治州'])) & (dfAllAccount['区县/地方部门'] == '市直部门'), '区县/地方部门'] = '州直部门'
dfAllAccount.loc[(dfAllAccount['市/省局'].isin(['省直部门'])) & (dfAllAccount['区县/地方部门'] == '市直部门'), '区县/地方部门'] = '省直部门'
# 过长名称替换为简称,便于绘图
dfAllAccount.loc[dfAllAccount['区县/地方部门'] == '积石山保安族东乡族撒拉族自治县', '区县/地方部门'] = '积石山县'
dfAllAccount.loc[dfAllAccount['区县/地方部门'] == '阿克塞哈萨克族自治县', '区县/地方部门'] = '阿克塞自治县'
sUnitName = '单位全称' #单位全称
#sUnitSubjectName = '开设主体' #开设主体
#yAccountUnit = dfAllAccount.columns.get_loc(sUnitName)
#yAccountUnitSubject = dfAllAccount.columns.get_loc(sUnitSubjectName)
yAccountName = dfAllAccount.columns.get_loc('账号名称')
yAccountCity = dfAllAccount.columns.get_loc('市/省局')
yAccountCounty = dfAllAccount.columns.get_loc('区县/地方部门')
dfAllAccount.loc[dfAllAccount['市/省局'].isin(['白银市',]) , '单位全称'] = dfAllAccount.loc[dfAllAccount['市/省局'].isin(['白银市',]) , '开设主体']
newNames = {"白银市公安局交通警察支队车辆管理所":"白银市公安局","白银市公安局交通警察支队":"白银市公安局",
"白银市公安局交通警察支队铜城高速公路大队":"白银市公安局","白银市公安局交通警察支队响泉高速公路大队":"白银市公安局",
"白银市公安局交通警察支队会师高速公路大队":"白银市公安局","白银市公安局交通警察支队条山高速公路大队":"白银市公安局",
"白银市公安局出入境管理科":"白银市公安局","白银市禁毒委员会办公室":"白银市公安局",
"白银市公安局交通警察支队喜泉高速公路大队":"白银市公安局","白银市卫生计生综合监督执法局":"白银市卫生健康委员会",}
dfAllAccount[sUnitName].replace(newNames, inplace=True)
fnTemplate = 'D:/Projects/POM/DATA/2023年7月/6月报告/POM_ForewardTemplate.docx'
# 数据根目录,
strPath = ['D:/Projects/POM/DATA/2023年6月/季度报告/全文/']
strOutputPath = 'D:/Projects/POM/DATA/2023年6月/季度报告/转发/'
context = {
"year": "2023",
"month": "6",
"pubMonth": "7",
"dateStart": "2023年4月1日",
"dateEnd": "2023年6月30日"
}
################################################
# 创建存储矩阵
dfO = pd.DataFrame(columns=['任务序号', '任务名称', '类型', '公众号', '日期', '内容', '链接', '市州'] )
################################################
# WX
if isDoWX:
print('=============================================================')
print('---- WX ----')
dfWX = pd.DataFrame()
for strP in strPath:
ddff = getWXData(strP, cities)
dfWX = dfWX.append(ddff)
dfWX = dfWX.fillna(value=0)
yWXtitle = dfWX.columns.get_loc('标题')
yWXnread = dfWX.columns.get_loc('阅读数')
yWXdate = dfWX.columns.get_loc('日期')
yWXurl = dfWX.columns.get_loc('链接')
# 公众号 链接 日期 标题 内容 头条 city
## 逐个市州统计每个账号的转发情况
#cities = dfWX['市州'].unique()
for city in cities:
print('---- WX title match', city, ' ----' )
# 本市微信数据
dataC = dfWX.loc[dfWX['市州'] == city].copy()
# 获取微信账号数
accounts = dataC['公众号'].unique()
# 所有微信账号数
maskCWX = ( (dfAllAccount['账号类型'] == '微信服务号')|(dfAllAccount['账号类型'] == '微信订阅号') ) & (dfAllAccount['市/省局'] == city)
accountNumCWX = maskCWX.tolist().count(True)
# 按获取得微信账号遍历
for account in accounts:
#print(account)
# 该账号的所有文章
dataA = dataC.loc[dataC['公众号'] == account].copy() # 一个公众号的所有文章
sR = pd.Series(dtype='object')
sR['类型'] = '微信'
sR['市州'] = city
sR['账号名称'] = account
count = 0
arn = 0
# 从账号信息中匹配该账号详细信息
mask = ( (dfAllAccount['账号类型'] == '小程序+微信')
| (dfAllAccount['账号类型'] == '微信服务号')
| (dfAllAccount['账号类型'] == '微信订阅号') ) & (dfAllAccount['市/省局'] == city) & (dfAllAccount['账号名称']==account)
if mask.any():
sxq = str(dfAllAccount.loc[mask, '区县/地方部门'].values[0])
if sxq.lower() !='nan':
sR['区县'] = sxq
sdwmc = str(dfAllAccount.loc[mask, sUnitName].values[0])
if sdwmc.lower() != 'nan':
sR['单位名称'] = sdwmc
else:
print(' !!!! 微信', account, '', city, '无详细信息' )
continue
# 按任务标题逐个匹配所有发文,得到每篇任务的转发情况
for i in range(dfTask[sTaskTitle].count()):
# 对于每一篇任务文章
rn = dfTask.iloc[i, yT0] # 序号
ssrt = str(dfTask.iloc[i, yT1]) # 标题/内容
rt = fetch_chinese(ssrt) # 只取汉字
forwarded = 0 # 转发数
readNum = 0 # 阅读数
# 查看该账号的所有文章
for j in range(dataA.shape[0]):
str1 = fetch_chinese(str(dataA.iloc[j, yWXtitle])) # 只取汉字
# 任务标题过长,截取前半部分进行对比
if len(rt) > len(str1):
strRT = rt[:len(str1)]
else:#文章标题过长,只比较任务标题长度部分
strRT = rt
str1 = str1[:len(rt)]
ratio = difflib.SequenceMatcher(None, strRT, str1).quick_ratio()
'''
if (i==4 or i==5 or i==6 ) and account=='陇南市工业和信息化局': # and ratio<0.7 and ratio > 0.3 :
if ratio > 0.5:
print('-----------------')
print(ratio)
print(strRT)
print(str1)
'''
# 遇到相似的,认为已转发,即跳出不再查找
if ratio > _RATIO:
forwarded += 1
readNum += int(dataA.iloc[j, yWXnread])
if forwarded > 0:
break
sR[str(rn)] = forwarded # 记录该篇文章的转发数
count += forwarded # 累加该篇文章的转发数
arn += readNum # 累加该篇文章的阅读数
# 记录该篇任务转发情况加入
if forwarded > 0:
dfO = dfO.append([{'任务序号': rn, '任务名称': ssrt,
'类型': '微信',
'公众号': account,
'日期': dataA.iloc[j, yWXdate],
'内容': str1,
'链接': dataA.iloc[j, yWXurl],
'市州': city,
'阅读数': readNum,
}], ignore_index=True)
#记录该任务的转发情况
dfAllAccount.loc[mask, str(rn)] = forwarded
#记录该账号的总转发数
dfAllAccount.loc[mask, '转发数'] = count
sR['转发数'] = count
sR['阅读数'] = arn
# 全市总转发文章篇数
ccwx = dfAllAccount.loc[maskCWX, '转发数'].sum()
# 全市总转发率
rcc = ccwx/accountNumCWX/dfTask.shape[0]
print(' ', city, '共有', accountNumCWX, '个微信号,获取数据', len(accounts), '个。共转发', ccwx, '次,转发率{:.1f}%'.format(rcc*100) )
#countWxForewards = dfRR.shape[0]
#print(' 获取 WX 账号数', len(dfWX['公众号'].unique()),'参与转发账号数', countWxForewards)
# WB
if isDoWB:
print('=============================================================')
print('---- WB data read ----')
#获取微博数据
dfWB = pd.DataFrame()
for strP in strPath:
ddff = getWBData(strP, cities)
dfWB = dfWB.append(ddff)
yWBcontent = dfWB.columns.get_loc('微博正文')
yWBdate = dfWB.columns.get_loc('date')
yWBurl = dfWB.columns.get_loc('头条文章url')
################################################
# WB
# 微博id 微博正文 头条文章url 原始图片url 被转发微博原始图片url 是否为原创微博 微博视频url 发布位置 date
# 发布工具 点赞数 转发数 评论数 weiboID weiboName city
#cities = dfWB['市州'].unique()
for city in cities:
print('---- WB match', city, ' ----' )
# 本市微博数据
dataC = dfWB.loc[dfWB['市州'] == city].copy()
# 获取数据的微博账号
accounts = dataC['weiboName'].unique()
# 本市所有微博账号
maskCWB = (dfAllAccount['账号类型'] == '新浪微博') & (dfAllAccount['市/省局'] == city)
accountNumCWB = maskCWB.tolist().count(True)
# 按获取的微博账号遍历
for account in accounts:
# print(account)
# 该公众号的所有文章
dataA = dataC.loc[dataC['weiboName'] == account].copy()
sR = pd.Series(dtype='object')
sR['类型'] = '新浪微博'
sR['市州'] = city
sR['账号名称'] = account
count = 0
# 为转发账号匹配单位全称和所属县区
mask = ( dfAllAccount['账号类型'] == '新浪微博' ) & (dfAllAccount['市/省局'] == city) & (dfAllAccount['账号名称']==account)
if mask.any():
sxq = str(dfAllAccount.loc[mask, '区县/地方部门'].values[0])
if sxq.lower() !='nan':
sR['区县'] = sxq
sdwmc = str(dfAllAccount.loc[mask, sUnitName].values[0])
if sdwmc.lower() != 'nan':
sR['单位名称'] = sdwmc
else:
print(' !!!! 微博', account, '', city, '无详细信息' )
continue
# 按任务标题逐个匹配所有发文,得到每篇任务的转发情况
for i in range(dfTask[sTaskTitle].count()):
rn = dfTask.iloc[i, yT0] # 任务序号
ssrt = str(dfTask.iloc[i, yT1]) # 任务标题
rt = fetch_chinese(ssrt) # 只取中文
forwarded = 0
# 对该账号的所有文章
for j in range(dataA.shape[0]):
str0 = str(dataA.iloc[j, yWBcontent])
str1 = fetch_chinese(str0)
str2 = str1[:len(rt)] # 取任务标题相同汉字数进行比较
ratio = difflib.SequenceMatcher(None, rt, str2).quick_ratio()
if ratio > _RATIO:
forwarded += 1
if forwarded > 0:
break
#记记录该任务的转发情况
dfAllAccount.loc[mask, str(rn)] = forwarded
sR[str(rn)] = forwarded
# 转发数累加到本账号里
count += forwarded
# 记录该篇任务转发情况加入
if forwarded > 0:
dfO = dfO.append([{'任务序号': rn, '任务名称': rt,
'类型': '新浪微博',
'公众号': account,
'日期': dataA.iloc[j, yWBdate],
'内容': str1,
'链接': dataA.iloc[j, yWBurl],
'市州': city,
}], ignore_index=True)
# 记录该账号的总转发数
dfAllAccount.loc[mask, '转发数'] = count
sR['转发数'] = count
# 全市总转发文章篇数
ccwb = dfAllAccount.loc[maskCWB, '转发数'].sum()
# 全市总转发率
rcc = ccwb/accountNumCWB/dfTask.shape[0]
print(' ', city, '共有', accountNumCWB, '个微博号,获取数据', len(accounts), '个。共转发', ccwb, '次,转发率{:.1f}%'.format(rcc*100) )
#countWbForewards = dfRR.shape[0] - countWxForewards
#print(' 获取 WB 账号数', len(dfWB['weiboName'].unique()), '参与转发账号数', countWbForewards)
# TT
if isDoTT:
print('=============================================================')
print('---- TT data read ----')
# id userId source city tid cellType title
# time-stamp date url commentCount readNum likeNum showNum
# 获取头条数据
dfTT = pd.DataFrame()
for strP in strPath:
ddff = getTTData(strP, cities)
dfTT = dfTT.append(ddff)
yTTtitle = dfTT.columns.get_loc('title')
yTTdate = dfTT.columns.get_loc('date')
yTTurl = dfTT.columns.get_loc('url')
# 逐个市州统计账号转发情况
for city in cities:
print("++++++++++++++++++++++++++++++++++++++++++++++++++")
print('---- TT title match', city, ' ----' )
# 本市头条数据
dataC = dfTT.loc[dfTT['city'] == city].copy()
# 获取数据的头条账号
accounts = dataC['account'].unique()
# 本市所有头条账号信息
maskCTT = (dfAllAccount['账号类型'] == '今日头条') & (dfAllAccount['市/省局'] == city)
accountNumCTT = maskCTT.tolist().count(True)
# 按头条数据的账号遍历
for account in accounts:
#print(account)
# 该账号的所有文章
dataA = dataC[dataC['account']==account]
sR = pd.Series([], dtype=pd.StringDtype())
sR['类型'] = '今日头条'
sR['市州'] = city
sR['账号名称'] = account
count = 0
# 为转发账号匹配单位全称和所属县区
mask = ( dfAllAccount['账号类型'] == '今日头条' ) & (dfAllAccount['市/省局'] == city) & (dfAllAccount['账号名称']==account)
if mask.any():
sxq = str(dfAllAccount.loc[mask, '区县/地方部门'].values[0])
if sxq.lower() !='nan':
sR['区县'] = sxq
sdwmc = str(dfAllAccount.loc[mask, sUnitName].values[0])
if sdwmc.lower() != 'nan':
sR['单位名称'] = sdwmc
else:
print(' !!!! 头条', account, '', city, '无详细信息' )
continue
# 按任务标题逐个匹配所有发文,得到每篇任务的转发情况
for i in range(dfTask[sTaskTitle].count()):
# 对于每一篇任务文章
rn = dfTask.iloc[i, yT0] # 任务序号
ssrt = str(dfTask.iloc[i, yT1]) # 任务标题
rt = fetch_chinese(ssrt) # 只取中文
forwarded = 0
# 查看该账号的所有文章
for j in range(dataA.shape[0]):
str0 = str(dataA.iloc[j, yTTtitle])
str1 = fetch_chinese(str0)
#
if len(rt) > len(str1): # 若任务标题过长,截取前半部分进行对比
strRT = rt[:len(str1)]
else: #若文章标题过长,只比较任务标题长度部分
strRT = rt
str1 = str1[:len(rt)]
ratio = difflib.SequenceMatcher(None, strRT, str1).quick_ratio()
if ratio > _RATIO:
forwarded += 1
if forwarded > 0:
break
#记录该任务转发情况
dfAllAccount.loc[mask, str(rn)] = forwarded
sR[str(rn)] = forwarded
count += forwarded
if forwarded > 0:
dfO = dfO.append([{'任务序号': rn, '任务名称': rt,
'类型': '今日头条',
'公众号': account,
'日期': dataA.iloc[j, yTTdate],
'内容': str1,
'链接': dataA.iloc[j, yTTurl],
'市州': city,
}], ignore_index=True)
# 记录该账号转发情况
dfAllAccount.loc[mask, '转发数'] = count
sR['转发数'] = count
# 全市总转发文章篇数
cctt = dfAllAccount.loc[maskCTT, '转发数'].sum()
# 全市总转发率
rcc = cctt/accountNumCTT/dfTask.shape[0]
print(' ', city, '共有', accountNumCTT, '个头条号,获取数据', len(accounts), '个。共转发', cctt, '次,转发率{:.1f}%'.format(rcc*100) )
#countTtForewards = dfRR.shape[0] - countWxForewards - countWbForewards
#print(' 获取 TT 账号数', len(dfTT['account'].unique()),'参与转发账号数', countTtForewards)
if isDoWX or isDoWB or isDoTT:
print('=============================================================')
print('---- STATISTICS ----')
print('=============================================================')
dfAllAccount.to_excel(strOutputPath + '甘肃省_转发账号.xlsx')
dfO.to_excel(strOutputPath + '甘肃省_转发文章.xlsx')
print('---- 统计市州转发率 ----')
for city in cities:
#if city in ['兰州新区', '省直部门']:
# continue
print(" add up city", city)
maskC = ( (dfAllAccount['账号类型'] == '新浪微博')
| (dfAllAccount['账号类型'] == '微信服务号')
| (dfAllAccount['账号类型'] == '微信订阅号')
| (dfAllAccount['账号类型'] == '今日头条') ) & (dfAllAccount['市/省局'] == city)
# dfdfC = dfAllAccount.loc[((dfAllAccount['账号类型'] == '新浪微博')
# | (dfAllAccount['账号类型'] == '微信服务号')
# | (dfAllAccount['账号类型'] == '微信订阅号')
# | (dfAllAccount['账号类型'] == '今日头条'))
# & (dfAllAccount['市/省局'] == city)].copy()
dfdfC = dfAllAccount.loc[maskC,:]
dfdfC.to_excel(strOutputPath + city + '_转发账号.xlsx')
dfOCity = dfO[dfO['市州'] == city]
dfO.to_excel(strOutputPath + city + '_转发文章.xlsx')
#dfRRCity = dfRR.loc[dfRR['市州'] == city].copy()
#########################################################################################################
# 统计市/州直部门转发数
dfdfCD = dfdfC.loc[dfdfC['区县/地方部门'].isin(['州直部门', '市直部门', '省直部门'])].copy()
dfdfCDA = pd.pivot_table(dfdfCD, index=[sUnitName], values=['账号名称'],
aggfunc=['count'], fill_value='', margins=True, margins_name='总计')
dfdfCDC = pd.pivot_table(dfdfCD, index=[sUnitName], values=['转发数'],
aggfunc=['sum'], fill_value='', margins=True, margins_name='总计')
#dfdfCDR = pd.pivot_table(dfdfCD, index=['单位全称'], values=['阅读数'],
# aggfunc=['sum'], fill_value='', margins=True, margins_name='总计')
dfdfCD_A = pd.concat([dfdfCDA, dfdfCDC], axis=1)
#print('-', dfdfCD_A.columns.values)
# 合并多层索引MultiIndex
dfdfCD_A.columns = ['_'.join(col) for col in dfdfCD_A.columns.values]
#print('=', dfdfCD_A.columns.values)
# 计算转发率
dfdfCD_A['rate'] = dfdfCD_A.apply(
lambda x: int(x['sum_转发数'] / x['count_账号名称'] / dfTask[sTaskTitle].count() * 1000) / 1000.0, axis=1)
# 排序
dfdfCD_AD = dfdfCD_A[0:dfdfCD_A.shape[0] - 1].sort_values(by='rate', ascending=False)
dfdfCD_AD = pd.concat([dfdfCD_AD, dfdfCD_A[dfdfCD_A.shape[0] - 1:dfdfCD_A.shape[0]]], axis=0)
dfdfCD_AD.to_excel(strOutputPath + city + '部门转发统计表.xlsx')
#dfDD
##########################################################################################
# 全市/州账号按'区县'统计
# 发现目前版本pivot_table函数aggfunc用列表时前几列计算值不准确
# 所以,暂时单列计算,再合并
#dfdfCD = dfdfC.loc[dfdfC['区县/地方部门'].isin(['州直部门', '市直部门', '省直部门'])].copy()
dfdfCA = pd.pivot_table(dfdfC, index=['区县/地方部门'], values=['账号名称'],
aggfunc=['count'], fill_value='', margins=True, margins_name='总计')
dfdfCC = pd.pivot_table(dfdfC, index=['区县/地方部门'], values=['转发数'],
aggfunc=['sum'], fill_value='', margins=True, margins_name='总计')
#dfdfCDR = pd.pivot_table(dfdfCD, index=['单位全称'], values=['阅读数'],
# aggfunc=['sum'], fill_value='', margins=True, margins_name='总计')
dfdfC_A = pd.concat([dfdfCA, dfdfCC], axis=1)
#print('-', dfdfCD_A.columns.values)
# 合并多层索引MultiIndex
dfdfC_A.columns = ['_'.join(col) for col in dfdfC_A.columns.values]
#print('=', dfdfCD_A.columns.values)
# 计算转发率
dfdfC_A['rate'] = dfdfC_A.apply(
lambda x: int(x['sum_转发数'] / x['count_账号名称'] / dfTask[sTaskTitle].count() * 1000) / 1000.0, axis=1)
# 排序
dfdfC_AD = dfdfC_A[0:dfdfC_A.shape[0] - 1].sort_values(by='rate', ascending=False)
dfdfC_AD = pd.concat([dfdfC_AD, dfdfC_A[dfdfC_A.shape[0] - 1:dfdfC_A.shape[0]]], axis=0)
dfdfC_AD.to_excel(strOutputPath + city + '转发统计表.xlsx')
#dfCC
#########################################################
#
# 生成报告
tpl = DocxTemplate(fnTemplate)
if city in ['临夏回族自治州', '甘南藏族自治州']:
sL0 = ''
else:
sL0 = ''
info = {
"strL0":sL0,
"strL1":"区县",
"taskCount": dfTask[sTaskTitle].count(),
"aNum": int(dfdfC_AD.iloc[-1]['count_账号名称']),
"fNum": int(dfdfC_AD.iloc[-1]['sum_转发数']),
"r": '%.1f'%(dfdfC_AD.iloc[-1]['rate']*100.0),
#
"dNum": int(dfdfCD_AD.iloc[-1]['count_账号名称']), # 部门总账号数
"dFNum": int(dfdfCD_AD.iloc[-1]['sum_转发数']), # 部门总转发数
"dr": '%.1f'%(dfdfCD_AD.iloc[-1]['rate']*100.0), # 部门平均转发率
}
context.update(info)
# 县区转发率表格
t1_list = []
for index, row in dfdfC_AD.iterrows():
if index == "总计":
continue
t1_a = {'county': str(index), 'rate': '%.1f'%(row['rate']*100.0),
'account': int(row['count_账号名称']), 'fNum': int(row['sum_转发数']) }
t1_list.append(t1_a)
context['t1_contents'] = t1_list
# 部门转发率表格
t2_list = []
for index, row in dfdfCD_AD.iterrows():
if index == "总计":
continue
t2_a = {'name': str(index),
'rate': '%.1f'%(row['rate']*100.0),
'account': int(row['count_账号名称']),
'fNum': int(row['sum_转发数']) }
t2_list.append(t2_a)
context['t2_contents'] = t2_list
# 转发任务列表
t3_list = []
for index, row in dfTask.iterrows():
t3_a = {'id': row['序号'],
'title': row[sTaskTitle],
'date': row[sTaskDate].strftime('%m月%d') }
#'date': ts2date(row[sTaskDate], '%m月%d日') }
t3_list.append(t3_a)
context['t3_contents'] = t3_list
# 绘制区县转发率图
drawBar(dfdfC_AD['rate'][:-1], dfdfC_AD.index[:-1],
'区县转发率', os.path.join(strOutputPath, '_' + city + '_graphCounty.png'))
dc = {
'graphCounty': InlineImage(tpl, os.path.join(strOutputPath, '_' + city+'_graphCounty.png'), width=Mm(120)),
}
context.update(dc)
tpl.render(context)
tpl.save(strOutputPath+city+'转发统计报告_2023年{}月份.docx'.format(context['month']))
#########################################################
# 统计全省各市州和省级部门数据
if True:
dShortname = {"甘肃省交通运输厅":"省交通厅","甘肃省文化和旅游厅":"省文旅厅","甘肃省司法厅":"省司法厅","甘肃省人民政府国有资产监督管理委员会":"省国资委",
"甘肃省乡村振兴局":"省乡村振兴局","甘肃省民政厅":"省民政厅","甘肃省财政厅":"省财政厅","甘肃省人民政府驻北京办事处":"省政府驻京办",
"甘肃省人力资源和社会保障厅":"省人社厅","甘肃省无线电监测站":"省工信厅","甘肃省人民政府办公厅":"省政府","甘肃省工业和信息化厅":"省工信厅",
"甘肃省林业和草原局":"省林草局","甘肃省水利厅":"省水利厅","甘肃省公共资源交易中心":"省公共资源交易中心","甘肃省文物局":"省文物局",
"甘肃省药品监督管理局":"省药监局","甘肃省农村饮水安全管理办公室":"省水利厅","甘肃省应急管理厅":"省应急厅","甘肃省粮食和物资储备局":"省粮食局",
"甘肃省人民政府驻新疆办事处":"省政府驻疆办","甘肃省景泰川电力提灌管理局(甘肃省景泰川电力提灌工程指挥部)":"省水利厅","甘肃省生态环境厅":"省生态环境厅",
"甘肃省商务厅":"省商务厅","甘肃省社会保险事业管理局":"省人社厅","甘肃省科学技术厅":"省科技厅","甘肃省市场监督管理局":"省市场监管局",
"甘肃省经济合作局":"省商务厅","甘肃省体育局":"省体育局","甘肃省发展和改革委员会":"省发改委","甘肃省审计厅":"省审计厅","甘肃省教育厅":"省教育厅",
"甘肃省民族事务委员会":"省民委","甘肃省农业农村厅":"省农业农村厅","甘肃省人民政府外事办公室":"省政府外事办","甘肃省自然资源厅":"省自然资源厅",
"甘肃省统计局":"省统计局","甘肃省退役军人事务厅":"省退役军人厅","甘肃省疏勒河流域水资源局":"省水利厅","甘肃省广播电视局":"省广电局",
"甘肃省讨赖河流域水资源局":"省水利厅","甘肃省卫生健康委员会":"省卫健委","甘肃省药品检验研究院":"省药监局","甘肃省住房和城乡建设厅":"省住建厅",
"甘肃省公安厅":"省公安厅","甘肃省供销合作社联合社":"省供销社","甘肃省人民政府办公厅":"省政府办公厅","甘肃警察职业学院":"省公安厅",
"甘肃省教育考试院":"省教育厅","甘肃省医疗保障局":"省医保局","甘肃省公安厅刑事警察总队":"省公安厅","甘肃省人力资源市场":"省人社厅",
"甘肃省不动产登记事务中心":"省自然资源厅","甘肃省人力资源考试中心":"省人社厅","甘肃省人民政府驻上海办事处":"省政府驻上海办",
"甘肃省公安厅交通警察总队":"省公安厅","民航甘肃机场公安局":"省公安厅","甘肃省农业信息中心":"省农业农村厅","甘肃省高速路政执法总队":"省交通厅",
"甘肃省兰州市司法局强制隔离戒毒所官方微博":"省司法厅","甘肃省戒毒管理局":"省司法厅","甘肃省兰州监狱":"省司法厅","甘肃省合作监狱":"省司法厅",
"甘肃省天水监狱":"省司法厅","甘肃省女子强制隔离戒毒所官方微博":"省司法厅","甘肃省平凉监狱":"省司法厅","甘肃省武威监狱":"省司法厅",
"甘肃省武都监狱":"省司法厅","甘肃省永登监狱":"省司法厅","甘肃省白银监狱":"省司法厅","甘肃省第一强制隔离戒毒所":"省司法厅",
"甘肃省第三强制隔离戒毒所官方微博":"省司法厅","甘肃省第二强制隔离戒毒所":"省司法厅","甘肃省酒泉监狱":"省司法厅","甘肃省金昌监狱":"省司法厅",
"甘肃省公安厅":"省广电局","甘肃省人民政府外事办公室":"省外事办",}
dfAllAccount[sUnitName].replace(dShortname, inplace=True)
maskA = (dfAllAccount['账号类型'] == '新浪微博') | (dfAllAccount['账号类型'] == '微信服务号') | (dfAllAccount['账号类型'] == '微信订阅号') | (dfAllAccount['账号类型'] == '今日头条')
dfRR = dfAllAccount.loc[maskA,:]
#######
# 按市州统计
dfCountyA = pd.pivot_table(dfRR, index=['市/省局'], values=['账号名称'],
aggfunc = ['count'], fill_value='', margins=True, margins_name='总计')
dfCountyC = pd.pivot_table(dfRR, index=['市/省局'], values=['转发数'],
aggfunc = ['sum'], fill_value='', margins=True, margins_name='总计')
dfCounty = pd.concat([dfCountyA, dfCountyC], axis=1)
# 计算转发率
dfCounty.columns = ['_'.join(col) for col in dfCounty.columns.values]
dfCounty['rate'] = dfCounty.apply(lambda x: int(x['sum_转发数'] / x['count_账号名称'] / dfTask[sTaskTitle].count() * 1000)/1000.0, axis=1)
# 排序
dfCC = dfCounty[0:dfCounty.shape[0]-1].sort_values(by='rate', ascending=False) # 除最后总计行外进行排序
dfCC = pd.concat([dfCC, dfCounty[dfCounty.shape[0]-1:dfCounty.shape[0]] ], axis=0) # 加上总计行
dfCC.to_excel(strOutputPath + '甘肃省市州转发统计表.xlsx')
# 统计省直部门转发数
dfRRD = dfRR[ (dfRR['市/省局'] == '省直部门')]
if dfRRD.shape[0] > 0:
dfDA = pd.pivot_table(dfRRD, index=[sUnitName], values=['账号名称'],
aggfunc=['count'], fill_value='', margins=True, margins_name='总计')
dfDC = pd.pivot_table(dfRRD, index=[sUnitName], values=['转发数'],
aggfunc=['sum'], fill_value='', margins=True, margins_name='总计')
dfD = pd.concat([dfDA, dfDC], axis=1)
# 计算部门转发率
# 合并多层索引MultiIndex
dfD.columns = ['_'.join(col) for col in dfD.columns.values]
# 计算转发率
dfD['rate'] = dfD.apply(
lambda x: int(x['sum_转发数'] / x['count_账号名称'] / dfTask[sTaskTitle].count() * 1000) / 1000.0, axis=1)
# 排序
dfDD = dfD[0:dfD.shape[0] - 1].sort_values(by='rate', ascending=False)
dfDD = pd.concat([dfDD, dfD[dfD.shape[0] - 1:dfD.shape[0]]], axis=0)
dfDD.to_excel(strOutputPath + '甘肃省直部门转发统计表.xlsx')
#########################################################
#
# 生成报告
tpl = DocxTemplate(fnTemplate)
info = {
"strL0":"",
"strL1":"市州",
"taskCount": dfTask[sTaskTitle].count(),
"aNum": int(dfCC.iloc[-1]['count_账号名称']),
"fNum": int(dfCC.iloc[-1]['sum_转发数']),
"r": '%.1f'%(dfCC.iloc[-1]['rate']*100.0),
}
if dfDD.empty:
info.update( {
"dNum": 0, # 部门总账号数
"dFNum": 0, # 部门总转发数
"dr": '%.1f'%(0), # 部门平均转发率
} )
else:
info.update( {
"dNum": int(dfDD.iloc[-1]['count_账号名称']), # 部门总账号数
"dFNum": int(dfDD.iloc[-1]['sum_转发数']), # 部门总转发数
"dr": '%.1f'%(dfDD.iloc[-1]['rate']*100.0), # 部门平均转发率
})
context.update(info)
# 全省各市州转发率表格
t1_list = []
for index, row in dfCC.iterrows():
if index == "总计":
continue
t1_a = {'county': str(index), # 市州
'rate': '%.1f'%(row['rate']*100.0), # 转发比率
'account': int(row['count_账号名称']), # 账号数量
'fNum': int(row['sum_转发数']) } # 转发数量
t1_list.append(t1_a)
context['t1_contents'] = t1_list
# 部门转发率表格
t2_list = []
if not dfDD.empty:
for index, row in dfDD.iterrows():
if index == "总计":
continue
t2_a = {'name': str(index), #str(row['单位名称']),
'rate': '%.1f'%(row['rate']*100.0),
'account': int(row['count_账号名称']),
'fNum': int(row['sum_转发数']) }
t2_list.append(t2_a)
context['t2_contents'] = t2_list
# 转发任务列表
t3_list = []
for index, row in dfTask.iterrows():
t3_a = {'id': row['序号'],
'title': row['内容'],
'date': row[sTaskDate].strftime('%m月%d') } #ts2date(row[sTaskDate], '%m月%d日')
t3_list.append(t3_a)
context['t3_contents'] = t3_list
# 绘制区县转发率图
drawBar(dfCC['rate'][:-1], dfCC.index[:-1],
'市州转发率', os.path.join(strOutputPath, '_ALL_graphCounty.png'))
dc = {
'graphCounty': InlineImage(tpl, os.path.join(strOutputPath, '_ALL_graphCounty.png'), width=Mm(120)),
}
context.update(dc)
tpl.render(context)
tpl.save(strOutputPath+'甘肃省转发统计报告_2023年{}月份.docx'.format(context['month']))
endtime = datetime.datetime.now()
usedtime = endtime - starttime
print("time: ", usedtime)