From fb8ac287a9c871cfd4800570ce7d9c8693821d6b Mon Sep 17 00:00:00 2001 From: baoliang Date: Tue, 26 Jan 2021 12:02:02 +0800 Subject: [PATCH 1/2] add gansudaily html parse --- .DS_Store | Bin 10244 -> 10244 bytes polls/utils.py | 17 ++++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.DS_Store b/.DS_Store index afafaf25f0e3fcfd0a5f5bb8815286219469082b..564a53de6819fc754945c51b1b276a86c0231e37 100644 GIT binary patch delta 339 zcmZn(XbIS0Aiz|9aB_@*#AGi4E;g5g9^TwnCr1gWz?dxps!T^1Ca)G)&eWSUxj|5B za-JX;TjD-1kh(fSH3U;?^ASNEMs;DyQw$6Y>+I806uHfORioSZ1EA%d*FdZr6d{UJw?#N@xiJZunw$^0T= zE}t@hCbBXV0PV;D!eV57_I8`*6#(_2t4M*n1fpW{N)b86g3TvHZnCLW$${LSx*=+k qe{kBD)D4YN={m?}HnInT+$Y;o7$Ydx1(nY!XE-pzq`AI+- zj?#-S%pdKWEH13T24&g`D>4ZvOimCsXR>}cIZ;?c1X;cK^Ny181BV Date: Tue, 26 Jan 2021 12:23:57 +0800 Subject: [PATCH 2/2] fix gansudaily econding bug --- polls/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/polls/utils.py b/polls/utils.py index d92d3b0..2e2ab57 100644 --- a/polls/utils.py +++ b/polls/utils.py @@ -97,11 +97,13 @@ def parse(url): "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", } r = requests.get(url, headers=headers) - if r.encoding=='ISO-8859-1': + print(r.encoding) + + if t == 'xgs' and r.encoding=='ISO-8859-1': r.encoding='GBK' else: r.encoding = 'utf-8' - # print(r.text) + # way2@8797799 selector = Selector(text=r.text) if t == 'weixin': og_title = selector.xpath(