avatar

目录
新冠状病毒预测

闲来无事,做一个确诊人数的预测吧,希望拐点早日降临。
首先数据就是日期和总确诊人数,走势是平缓到爆发到平缓,所以用logistics函数。总治愈目前处于上升趋势,多项式拟合吧。大体思路就是自变量特征从1到总的天数,然后把数字映射到日期,制图。
附上代码

python
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import numpy as np
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit



def date_encode(date):
# '01.24' -> 1 * 100 + 24 = 124
d = date.split('/')
return int(d[1]),int(d[2])

def date_decode(date):
# 124 -> '01.24'
return '{}.{}'.format(str(date // 100), str(date % 100))


df = pd.read_csv('data.csv')#encoding='utf-8',header=None,sep = '\t'
df.drop([33],inplace=True)
X = np.array(df.iloc[:,0]) #日期太多显示会重叠,前十天数据很平缓故先忽略掉
cur_month,cur_day=date_encode(X[0])
y = np.array(df['total_confirmed'])
z= np.array(df['new_recoveries'])
x = np.arange(len(y))

def get_date_list(cur_month,cur_day,days,prediction=7):
"""
得到原始数据和预测的日期
"""
month_day = [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
ans = []
n=days+prediction
while n:
if cur_day <= month_day[cur_month]:
d = "0" + str(cur_day) if cur_day < 10 else str(cur_day)
ans += [str(cur_month) + "/" + d]
cur_day += 1
n=n-1
else:
cur_day = 1
cur_month += 1
n=n-1
return ans


ans = get_date_list(cur_month,cur_day,len(y),prediction=7)
def logistic_function(t, K, P0, r):
r=0.27
t0 = 0
exp = np.exp(r * (t - t0))
return (K * exp * P0) / (K + (exp - 1) * P0)

def f_3(x, A, B, C, D):
return A*x*x*x + B*x*x + C*x + D


popt, pcov = curve_fit(logistic_function, x, y)
popt1, pcov1 = curve_fit(f_3, x, z)
predict_x = list(x)+[x[-1] + i for i in range(1, 8)] #数组合并不能直接相加
predict_x = np.array(predict_x)
predict_y = logistic_function(predict_x, popt[0], popt[1], popt[2])
predict_y = [int(i) for i in predict_y]
predict_z = f_3(predict_x, popt1[0], popt1[1], popt1[2],popt1[3])
predict_z = [int(i) for i in predict_z]
#print(ans[-7:],predict_y[-7:])

#输出新增确诊
new_infected = [predict_y[i]-predict_y[i-1] for i in range(-7,0)]
print(ans[-7:],new_infected)


#plt.scatter(x,y,color='purple',label='real')
#plt.plot(x,y,color='gray')
#plt.scatter(predict_x,predict_y,marker='x',color='red',label='predicted data')
#plt.xticks(predict_x,ans,rotation=90)
#plt.suptitle("Logistic Fitting Curve for 2019-nCov total infected numbers", fontsize=16, fontweight="bold")

#输出新增治愈
new_cured = [predict_z[i]-predict_z[i-1] for i in range(-7,0)]
print(ans[-7:],new_cured)

plt.scatter(x,z,color='purple',label='real')
plt.plot(x,z,color='gray')
plt.scatter(predict_x,predict_z,marker='x',color='red',label='predicted data')
plt.xticks(predict_x,ans,rotation=90)


plt.suptitle("polynomial regression Fitting Curve for 2019-nCov total cured numbers", fontsize=16, fontweight="bold")
plt.xlabel('date', fontsize=14)
plt.ylabel('infected number', fontsize=14)
plt.show()

预测走势

新增确诊人数:

新增治愈趋势:

新增治愈人数:

导出csv文件

导出文件,list作为列的方法:

python
1
2
3
4
5
6
a = ['2020/2/24', '2020/2/25', '2020/2/26', '2020/2/27', '2020/2/28', '2020/2/29', '2020/3/01']
b = [307, 236, 181, 139, 106, 81, 63]
c = [2818, 3019, 3229, 3447, 3673, 3909, 4153]
data = {'date':a,'new_confirmed':b,'new_recoveries':c}
dataframe = pd.DataFrame(data)
dataframe.to_csv(r'D:\python\test.csv')

结果

用list导入dataframe是list[[行],[行]]

python
1
2
3
4
5
6
7
list=[new_infected[i] for i in range(-7,0)]
list1=[predict_z[i] for i in range(-7,0)]
list.extend(list1)
list=[list]
column=['confirmed_day1','confirmed_day2','confirmed_day3','confirmed_day4','confirmed_day5','confirmed_day6','confirmed_day7','recovery_day1','recovery_day2','recovery_day3','recovery_day4','recovery_day5','recovery_day6','recovery_day7']
test=pd.DataFrame(columns=column,data=list,index=['total'])
test.to_csv('D:/test1.csv')
文章作者: Sunxin
文章链接: https://sunxin18.github.io/2020/02/25/virus/
版权声明: 本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 lalala
打赏
  • 微信
    微信
  • 支付宝
    支付宝

评论