add gansudaily html parse
This commit is contained in:
parent
519eadff57
commit
fb8ac287a9
|
@ -97,8 +97,11 @@ def parse(url):
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||||
}
|
}
|
||||||
r = requests.get(url, headers=headers)
|
r = requests.get(url, headers=headers)
|
||||||
r.encoding = 'utf-8'
|
if r.encoding=='ISO-8859-1':
|
||||||
print(r.text)
|
r.encoding='GBK'
|
||||||
|
else:
|
||||||
|
r.encoding = 'utf-8'
|
||||||
|
# print(r.text)
|
||||||
selector = Selector(text=r.text)
|
selector = Selector(text=r.text)
|
||||||
if t == 'weixin':
|
if t == 'weixin':
|
||||||
og_title = selector.xpath(
|
og_title = selector.xpath(
|
||||||
|
@ -162,10 +165,10 @@ if __name__ == '__main__':
|
||||||
# 'https://m.toutiao.com/i6883651337003729420/?tt_from=weixin&utm_campaign=client_share&app=news_article&utm_source=weixin&iid=1494959660475024&utm_medium=toutiao_android&wxshare_count=1')
|
# 'https://m.toutiao.com/i6883651337003729420/?tt_from=weixin&utm_campaign=client_share&app=news_article&utm_source=weixin&iid=1494959660475024&utm_medium=toutiao_android&wxshare_count=1')
|
||||||
# og_title, og_description, og_url, og_image = parse(
|
# og_title, og_description, og_url, og_image = parse(
|
||||||
# 'http://www.gov.cn/xinwen/2020-10/13/content_5550906.htm')
|
# 'http://www.gov.cn/xinwen/2020-10/13/content_5550906.htm')
|
||||||
# og_title, og_description, og_url, og_image = parse(
|
og_title, og_description, og_url, og_image = parse(
|
||||||
# 'http://xgs.gansudaily.com.cn/pages/h5/hot/b3297046a53e47f594ed19db90c1183c.html')
|
'http://gansu.gansudaily.com.cn/system/2021/01/25/030261998.shtml')
|
||||||
|
|
||||||
# print(og_title, og_description, og_url, og_image)
|
print(og_title, og_description, og_url, og_image)
|
||||||
# print(send_voice_notify('13993199566'))
|
# print(send_voice_notify('13993199566'))
|
||||||
r = send_tnps(['13609346975'], '')
|
# r = send_tnps(['13609346975'], '')
|
||||||
print(r.status_code, r.text)
|
# print(r.status_code, r.text)
|
||||||
|
|
Loading…
Reference in New Issue