73 lines
1.9 KiB
Python
73 lines
1.9 KiB
Python
import datetime
|
|
import os
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
import xlrd
|
|
import csv
|
|
path = "D:/2020/舆论监测平台/新媒体监测数据/平凉/平凉20201006/数据整理"
|
|
files = os.listdir(path)
|
|
time = datetime.datetime(2020,9,1)
|
|
time1 = datetime.datetime(2020,9,30)
|
|
flag = time1>time
|
|
print(time,time1,flag)
|
|
|
|
date_list = [x.strftime('%Y-%m-%d') for x in list(pd.date_range(start='2020-09-01', end='2020-09-30'))]
|
|
date_list.reverse()
|
|
for file in files:
|
|
# print(file)
|
|
dataset = []
|
|
# workbook = xlrd.open_workbook(path+"/"+file)
|
|
# table = workbook.sheets()[0]
|
|
csv_reader = csv.reader(open(path + "/" + file, 'r', encoding='utf-8'))
|
|
data = []
|
|
|
|
for row in csv_reader:
|
|
v = row[1]
|
|
if v != '公众号':
|
|
# print(table.row_values(row))
|
|
d = row[3]
|
|
print(d)
|
|
try:
|
|
date1 = datetime.datetime.strptime(str(d), '%Y-%m-%d')
|
|
if date1 >= time and date1 <= time1:
|
|
data.append(str(date1.strftime('%Y-%m-%d')))
|
|
except:
|
|
pass
|
|
|
|
|
|
print(data)
|
|
dict = {}
|
|
dict1 = {}
|
|
key_date = []
|
|
value_data = []
|
|
|
|
print(len(date_list))
|
|
value = None
|
|
# print(data)
|
|
for key in data:
|
|
dict[key] = dict.get(key, 0) + 1
|
|
print(dict)
|
|
|
|
for list in date_list:
|
|
dict1[list] = 0
|
|
print(dict1)
|
|
|
|
dict1.update(dict)
|
|
print(dict1)
|
|
|
|
for (key1, value1) in dict1.items():
|
|
key_date.append(key1)
|
|
value_data.append(value1)
|
|
fig = plt.figure(figsize=(18, 8))
|
|
plt.bar(key_date, value_data, 0.4, color="green")
|
|
plt.rcParams['font.sans-serif'] = ['SimHei']
|
|
plt.xlabel("日期")
|
|
plt.ylabel("更新次数")
|
|
plt.title("%s" %(file.split('.')[0]))
|
|
|
|
plt.xticks(key_date, key_date, rotation=0, fontsize=6)
|
|
plt.savefig('D:/2020/舆论监测平台/新媒体监测数据/平凉/平凉20201006/平凉数据分析20201006/image/%s.png' %(file.split('.')[0]))
|
|
|
|
|
|
|