import requests import xlwt import xlrd import re import sys import threading import os import PySimpleGUI as sg from bs4 import BeautifulSoup import matplotlib.pyplot as plt import matplotlib def url_text_get(url,code='utf-8'):#一个网页类容获取 r=requests.get(url) kv={'User-angent':'Mozilla/5.0'} r.raise_for_status() r.encoding=code return r.text def url_infor_get():#总人口链接信息提取 start_url='https://www.phb123.com/city/renkou/rk_' r_url_lis=[] for i in range(1,13): r_url_lis.append(start_url+str(i)+'.html') return r_url_lis def ayurl(html):#总疫情链接类容获取 soup=BeautifulSoup(html,'html.parser') y_url_list_infor=list(list()) lp=soup('ul') soup1=BeautifulSoup(str(lp[1]),'html.parser') l=soup1('a') i=0 for link in soup1.find_all('a'): li=[] li.append(l[i].string) i=i+1 li.append(link.get('href')) y_url_list_infor.append(li) return y_url_list_infor def download(li,p_list): global apath root = "d:/python练习文档/全球历史数据" isExists = os.path.exists(root) if not isExists: os.makedirs(root) else: pass path = root +'/'+ li[0] + '.xls' apath=root url =li[1] r = requests.get(url) with open(path, 'wb') as t: t.write(r.content) t.close() try: d_xieru(p_list, path, li[0]) except: pass def down(li,p_list): day = lp(p_list) for i in li: t=threading.Thread(target=download,args=(i,p_list,)) t.start() while threading.activeCount() !=1: pass return day def yqlj():#历史疫情情况链接获取 y_url_list_infor2=list(list()) ul='http://www.sy72.com/xls/world' ul2='http://www.sy72.com/world/world417_25970.html' y_url_list_infor=ayurl(url_text_get(ul2)) for i in range(0,len(y_url_list_infor)): ty=[] ty.append(y_url_list_infor[i][0]) number=''.join(re.findall(r'[0-9]+',y_url_list_infor[i][1])) ty.append(ul+number+'.xls') y_url_list_infor2.append(ty) return y_url_list_infor2 def text_renkou_get(html,li):#一个人口数获取 soup=BeautifulSoup(html,'html.parser') s=soup.find_all('tr') o=BeautifulSoup(str(s),'html.parser').find_all('td') for i in range(int(len(o)/5)): ls=[] ls.append(o[5*i+1].p.string) ls.append(o[5*i+2].string) li.append(ls) def renkouhuode(ur_list):#总人口数获得 global window layout = [[sg.Text('人口下载进度'),sg.Text('0.00%',key='2')], [sg.ProgressBar(len(ur_list), orientation='h', size=(20, 20), key='progressbar')], [sg.Cancel()]] window1= sg.Window('下载进度', layout) progress_bar = window1['progressbar'] t_li=list(list()) for i in range(len(ur_list)): url=ur_list[i] text_renkou_get(url_text_get(url),t_li) event, values = window1.read(timeout=20) if event == "Cancel": sys.exit(0) progress_bar.UpdateBar(i+ 1) window1.FindElement("2").Update(str("{:.2f}".format((i/len(ur_list)) * 100) + '%')) window1.close() # window.Element('2').Update(disabled=True) # window.Element('4').Update(disabled=True) window.Element('6').Update("***全球人口下载成功!***") event, values = window.read(timeout=100) f = xlwt.Workbook() sheet1 = f.add_sheet('class') for i in range(len(t_li)): sheet1.write(i + 1, 0, t_li[i][0]) sheet1.write(i + 1, 1, t_li[i][1]) f.save('全球国家人口数.xls') window.Element('6').Update(values.get('6')+"***全球人口保存路径为:"+os.path.split(os.path.realpath(__file__))[0]+"***") return t_li def tjs(x,y):#天数计算 day=0 if(x==1): day=y elif(x==2): day=31+y elif(x==3): day=60+y elif(x==4): day=91+y elif(x==5): day=121+y elif(x==6): day=152+y elif(x==7): day=182+y elif (x == 8): day = 213 + y elif (x == 9): day = 244+ y elif (x == 10): day = 274 + y elif (x == 11): day = 305+ y elif (x == 12): day = 335 + y return day def wenjianhuoqu():#文件夹所有子文件目录获取 filepath = 'd:/python练习文档/全球历史数据/' r_lis = list() pathDir=os.listdir(filepath) for allDir in pathDir: try: r_lis.append(os.path.join('%s%s'%(filepath,allDir))) except: print('{0:}位元素出错'.format(i)) return r_lis def lp(p_list): url="http://www.sy72.com/covid/index.asp?s1=0&s2=0" soup=BeautifulSoup(url_text_get(url),"html.parser") l=soup.find('tr',id="cx") data=list() adata=list() path="D:/python练习文档/全球历史数据/中国.xls" for i in l.children: soupl=BeautifulSoup(str(i),"html.parser") p=soupl.find_all("span") if(p!=[]): g=[] for k in p: g.append(k.string) data.append(g) y=['国家','疫情总确诊','疫情治愈','疫情死亡','时间'] adata.append(y) for j in range(len(data[0])): t=list() t.append("中国") t.append(data[2][j]) t.append(data[3][j]) t.append(data[4][j]) t.append(str(tjs(int(data[0][j].split(".")[-2]),int(data[0][j].split(".")[-1])))) adata.append(t) for j in range(len(p_list)): if(p_list[j][0]=="中国"): adata[0].append('人口') adata[0].append('占比') for o in range(1,len(adata)): adata[o].append(p_list[j][1]) adata[o].append(int(adata[o][1])/cf(p_list[j][1].split(','))) renkoubaocun(adata, path) return len(adata)-1 def riqizhuanhuan(data,p_list,s):#日期转换为天数 for i in range(1,len(data)): try: k=tjs(int(data[i][4].split('/')[-2]),int(data[i][4].split('/')[-1])) data[i][4]=str(k) except: print("第{0:}行出错".format(i)) for j in range(len(p_list)): if(p_list[j][0]==s): data[0].append('人口') data[0].append('占比') for o in range(1,len(data)): data[o].append(p_list[j][1]) data[o].append(int(data[o][1])/cf(p_list[j][1].split(','))) return data def cf(li):#'千分号字符转换int' s='' for i in range(len(li)): s=s+li[i] return int(s) def renkoubaocun(l_li,filename2):#疫情信息人口保存.xls f=xlwt.Workbook() sheet1 = f.add_sheet('class') for i in range(0,len(l_li)): try: sheet1.write(i,0,l_li[i][0]) sheet1.write(i,1,l_li[i][1]) sheet1.write(i,2,l_li[i][2]) sheet1.write(i,3,l_li[i][3]) sheet1.write(i,4,l_li[i][4]) sheet1.write(i,5,l_li[i][5]) sheet1.write(i,6,l_li[i][6]) except: break f.save(filename2) def d_xieru(p_list,path,s):#多个文件天数转换加xls转换 l_list=riqizhuanhuan(read_txt(path),p_list,s) renkoubaocun(l_list,path) def read_txt(filename): data=list() file=open(filename,'r',encoding='ANSI') file_data=file.readlines() for row in file_data: row = re.sub(r'<.*?>', " ",row) tmp_list=row.split(' ') tmp_list[-1]=tmp_list[-1].replace(' ','') tmp_list.pop(-1) data.append(tmp_list) file.close() return data def pxu(e_list):#排序 for lis in e_list: for i in range(len(lis)-1): for k in range(i+1,len(lis)): if(lis[k][1]>lis[i][1]): p=lis[i] lis[i]=lis[k] lis[k]=p def xieru(r_lis,e_list,z_list,day):#每天的数据读入 global window n=26 layout = [[sg.Text('数据加载进度'),sg.Text('0.00%',key='2')], [sg.ProgressBar(day, orientation='h', size=(20, 20), key='progressbar')], [sg.Cancel()]] window1= sg.Window('加载进度', layout) progress_bar = window1['progressbar'] for i in range(n,day+n-1): e_list.append([]) z_list.append([]) for j in range(len(r_lis)): try: data=xlrd.open_workbook(r_lis[j]) table=data.sheet_by_name(data.sheet_names()[0]) rowNum=table.nrows kli=[] cli=[] key=0 for l in range(1,rowNum): if(int(table.cell(l,4).value)==i and table.cell(l,0).value==table.cell(2,0).value): kli.append(table.cell(l,0).value) kli.append(table.cell(l,6).value) try: cli.append(table.cell(l,0).value) cli.append(int(table.cell(l,3).value)/int(table.cell(l,1).value)) except: cli.append(0) key=1 break if(key==1): e_list[i-n].append(kli) z_list[i-n].append(cli) except: continue event, values = window1.read(timeout=20) if event=="Cancel": window1.close() sys.exit(0) progress_bar.UpdateBar(i-n+1) window1.FindElement("2").Update(str("{:.2f}".format(((i-n)/(day))*100)+'%')) window1.close() #window.Element('2').Update(disabled=True) #window.Element('4').Update(disabled=True) try: e_list.remove([]) z_list.remove([]) except: return def plante(e_list,t):#循环画图 plt.switch_backend('TkAgg') plt.ion() matplotlib.rc('font', family='SimHei', weight='bold') try: for j in range(len(e_list)): if((j+26)==153 or (j+26)==133 ): continue else: if(len(e_list[j])>=20): N=20 else : N=len(e_list[j]) name=[] for i in range(N): name.append(e_list[j][i][0]) for i in range(N,20): name.append('暂无数据') city_name = name city_name.reverse() data = [] we=10 while(e_list[j][0][1]*we<50): we=we*10 if(e_list[j][0][1]*we>200): we=we/5 for i in range(N,20): data.append(0) for i in range(N): data.append(((e_list[j][N-i-1][1])*we)) colors = ['red', 'yellow', 'blue', 'green', 'gray','pink','black'] colors.reverse() plt.barh(range(len(data)),width=data, tick_label=city_name, color=colors) for a,b in zip(data,range(len(data))): plt.text(a+3, b,'{0:.2f}'.format(a), ha='center', va= 'center',fontsize=7) if(t==1): plt.title('2020年第{0:}天全球国家感染率'.format(j+26)) plt.xlabel('累计人数/总人口(1/{0:})'.format(we)) if(t==2): plt.title('2020年第{0:}天全球国家治愈率'.format(j+26)) plt.xlabel('治愈人数/总确诊(1/{0:})'.format(we)) plt.pause(0.275) plt.show() if(j==len(e_list)-1): break plt.clf() except: pass def guojiachaxun(r_list,s): li=list() for i in r_list: if(i.split("/")[-1].split(".")[0]==s): data = xlrd.open_workbook(i) table = data.sheet_by_name(data.sheet_names()[0]) rowNum = table.nrows for l in range(0, rowNum): kli = [] key = 0 kli.append(table.cell(l, 0).value) kli.append(table.cell(l, 1).value) kli.append(table.cell(l, 4).value) kli.append(table.cell(l, 5).value) kli.append(table.cell(l, 6).value) try: kli.remove([]) except: pass li.append(kli) return li def riqichaxun(e_list,z_list,s): i=list() try: day=tjs(int(s.split(".")[0]),int(s.split(".")[1])) i=e_list[day-26] for j in range(len(z_list[day-26])): i[j].append(z_list[day-26][j][1]) except: pass return i if __name__=="__main__":#程序入口 global window,apath apath='' sg.theme('Dark Brown 1') gn = [ [sg.InputText(default_text="请输入国家名", key='0', font=("Helvetica", 10)), sg.Button('G查询', size=(10, 1),key=('5'), font=("Helvetica", 10),disabled=True), sg.Text('叙述', size=(3, 1), font=("Helvetica", 10)), sg.Button('确诊统计图', size=(10, 1),key=('2'), font=("Helvetica", 10),disabled=True)], [sg.InputText(default_text="请输入日期(x.y)", key='1', font=("Helvetica", 10)), sg.Button('T查询', size=(10, 1),key=('3'),font=("Helvetica", 10),disabled=True), sg.Text('叙述', size=(3, 1), font=("Helvetica", 10)), sg.Button('治愈统计图', size=(10, 1),key=('4'),font=("Helvetica", 10),disabled=True)], ] headings = ['', '', '', '', ''] header = [[sg.Text(' ')] + [sg.Text(h, size=(12, 1)) for h in headings]] input_rows = [[sg.Multiline('', key='6', size=(75, 45), autoscroll=True)]] layout = gn + header + input_rows window = sg.Window('全球新冠历史数据', layout, icon='icon/puple128.ico', font='Courier 12', size=(750, 500)) event, values = window.read(timeout=100) try: day=down(yqlj(),renkouhuode(url_infor_get()))#文件下载加转换集合 except: window.Element('6').Update( "***全球疫情信息下载失败,将使用历史文件信息!***") event, values = window.read(timeout=100) window.Element('6').Update(values.get('6')+"***全球疫情信息下载转换成功!***") window.Element('6').Update(values.get('6') + "***历史疫情保存地址为:{:}***".format(apath)) window.Element('2').Update(disabled=False) window.Element('4').Update(disabled=False) window.Element('3').Update(disabled=False) window.Element('5').Update(disabled=False) event, values = window.read(timeout=100) e_list = list(list(list())) z_list = list(list(list())) try: xieru(wenjianhuoqu(), e_list, z_list, day) except: window.Element('2').Update(disabled=True) window.Element('4').Update(disabled=True) window.Element('3').Update(disabled=True) window.Element('6').Update(values.get('6') + "***数据载入失败!***") event, values = window.read(timeout=100) window.Element('6').Update(values.get('6') + "***数据加载成功!***"+" "+"***请开始进行操作***"+" ",autoscroll=False) pxu(e_list) pxu(z_list) while True: event, values = window.read() if event in (None,'关闭'): break if event in(None,'5'): try: t = "{0:<8}{1:<12}{2:<5}{3:<15}{4:<20}" window.Element('6').Update(t.format("国家","疫情总确诊","时间","人口","确诊占比")) s = values.get('0') data = guojiachaxun(wenjianhuoqu(), s) if(s=="中国"): data=data[:1:-1] for l in range(1,len(data)): ty=[] event, values = window.read(timeout=100) for i in range(len(data[l])): ty.append(str(data[l][i])) window.Element('6').Update(values.get('6') +t.format(ty[0],ty[1],ty[2],ty[3],ty[4])) except: window.Element('6').Update(values.get('6') + "***错误!***") if event in(None,'3'): '''try:''' t="{0:<4}{1:<8}{2:<20}{3:<20}{4:<6}" window.Element('6').Update(t.format("排名","国家","确诊占比","治愈占比","日期")) s = values.get('1') data = riqichaxun(e_list,z_list,s) for l in range(len(data)): ty=list() event, values = window.read(timeout=100) for i in range(len(data[l])): ty.append(str(data[l][i])) window.Element('6').Update(values.get('6')+t.format(str(l),ty[0],ty[1],ty[2],s)) '''except: window.Element('6').Update(values.get('6') + "***错误!***")''' if event in (None,'2'): t=1 plante(e_list, t) if event in (None,'4'): t=2 plante(z_list, t) window.close()
本文来自作者[曼山]投稿,不代表利天创世立场,如若转载,请注明出处:https://m.yqlvyou.com/zskp/202509-5718.html
评论列表(4条)
我是利天创世的签约作者“曼山”!
希望本篇文章《疫情数据查询与动态统计图的GUI实践:技术设计与数据加载挑战》能对你有所帮助!
本站[利天创世]内容主要涵盖:国足,欧洲杯,世界杯,篮球,欧冠,亚冠,英超,足球,综合体育
本文概览:import requests import xlwt import xlrd import re import sys import threading import os...