add gansudaily html parse

This commit is contained in:
baoliang 2021-01-26 12:02:02 +08:00
parent 519eadff57
commit fb8ac287a9
2 changed files with 10 additions and 7 deletions

BIN
.DS_Store vendored

Binary file not shown.

View File

@ -97,8 +97,11 @@ def parse(url):
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
} }
r = requests.get(url, headers=headers) r = requests.get(url, headers=headers)
r.encoding = 'utf-8' if r.encoding=='ISO-8859-1':
print(r.text) r.encoding='GBK'
else:
r.encoding = 'utf-8'
# print(r.text)
selector = Selector(text=r.text) selector = Selector(text=r.text)
if t == 'weixin': if t == 'weixin':
og_title = selector.xpath( og_title = selector.xpath(
@ -162,10 +165,10 @@ if __name__ == '__main__':
# 'https://m.toutiao.com/i6883651337003729420/?tt_from=weixin&utm_campaign=client_share&app=news_article&utm_source=weixin&iid=1494959660475024&utm_medium=toutiao_android&wxshare_count=1') # 'https://m.toutiao.com/i6883651337003729420/?tt_from=weixin&utm_campaign=client_share&app=news_article&utm_source=weixin&iid=1494959660475024&utm_medium=toutiao_android&wxshare_count=1')
# og_title, og_description, og_url, og_image = parse( # og_title, og_description, og_url, og_image = parse(
# 'http://www.gov.cn/xinwen/2020-10/13/content_5550906.htm') # 'http://www.gov.cn/xinwen/2020-10/13/content_5550906.htm')
# og_title, og_description, og_url, og_image = parse( og_title, og_description, og_url, og_image = parse(
# 'http://xgs.gansudaily.com.cn/pages/h5/hot/b3297046a53e47f594ed19db90c1183c.html') 'http://gansu.gansudaily.com.cn/system/2021/01/25/030261998.shtml')
# print(og_title, og_description, og_url, og_image) print(og_title, og_description, og_url, og_image)
# print(send_voice_notify('13993199566')) # print(send_voice_notify('13993199566'))
r = send_tnps(['13609346975'], '') # r = send_tnps(['13609346975'], '')
print(r.status_code, r.text) # print(r.status_code, r.text)