123 lines
4.5 KiB
Python
123 lines
4.5 KiB
Python
from aliyunsdkcore.client import AcsClient
|
|
from aliyunsdkcore.request import CommonRequest
|
|
import requests
|
|
import random
|
|
from parsel import Selector
|
|
from channels.db import database_sync_to_async
|
|
from django.conf import settings
|
|
from itertools import chain
|
|
|
|
|
|
def sent_sms_code(phone, code):
|
|
client = AcsClient(settings.SMS_ACCESS_KEY_ID,
|
|
settings.SMS_ACCESS_KEY_SECRET, settings.SMS_REGION)
|
|
request = CommonRequest()
|
|
request.set_accept_format('json')
|
|
request.set_domain('dysmsapi.aliyuncs.com')
|
|
request.set_method('POST')
|
|
request.set_protocol_type('https') # https | http
|
|
request.set_version('2017-05-25')
|
|
request.set_action_name('SendSms')
|
|
|
|
request.add_query_param('RegionId', "cn-hangzhou")
|
|
request.add_query_param('PhoneNumbers', phone)
|
|
request.add_query_param('SignName', "短信验证")
|
|
request.add_query_param('TemplateCode', "SMS_12330409")
|
|
request.add_query_param('TemplateParam', '{"number":"%s"}' % (code,))
|
|
response = client.do_action(request)
|
|
return response
|
|
|
|
|
|
def generate_code():
|
|
return random.randint(1000, 9999)
|
|
|
|
|
|
def detect_type(url):
|
|
if 'mp.weixin.qq.com' in url:
|
|
return 'weixin'
|
|
elif 'toutiao.com' in url:
|
|
return 'toutiao'
|
|
elif 'ixigua.com' in url:
|
|
return 'xigua'
|
|
elif 'gansudaily.com.cn' in url:
|
|
return 'xgs'
|
|
else:
|
|
return 'other'
|
|
|
|
|
|
def parse(url):
|
|
t = detect_type(url)
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/3.53.1159.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 MicroMessenger/6.5.2.501 NetType/WIFI WindowsWechat",
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
}
|
|
r = requests.get(url, headers=headers)
|
|
r.encoding='utf-8'
|
|
print(r.text)
|
|
selector = Selector(text=r.text)
|
|
if t == 'weixin':
|
|
og_title = selector.xpath(
|
|
'//head/meta[@property="og:title"]/@content').get()
|
|
og_description = selector.xpath(
|
|
'//head/meta[@property="og:description"]/@content').get()
|
|
og_url = selector.xpath(
|
|
'//head/meta[@property="og:url"]/@content').get()
|
|
og_image = selector.xpath(
|
|
'//head/meta[@property="og:image"]/@content').get()
|
|
elif t == 'toutiao':
|
|
og_title = selector.xpath('//head/title/text()').get()
|
|
og_description = selector.xpath(
|
|
'//head/meta[@name="description"]/@content').get()
|
|
og_url = url
|
|
og_image = None
|
|
elif t == 'xgs':
|
|
og_title = selector.xpath('//head/title/text()').get()
|
|
og_description = selector.xpath(
|
|
'//head/meta[@name="description"]/@content').get()
|
|
og_url = url
|
|
og_image = None
|
|
else:
|
|
og_title = selector.xpath('//head/title/text()').get()
|
|
og_description = selector.xpath(
|
|
'//head/meta[@name="description"]/@content').get()
|
|
og_url = url
|
|
og_image = None
|
|
return (og_title, og_description, og_url, og_image)
|
|
|
|
|
|
def model_to_dict(instance, fields):
|
|
opts = instance._meta
|
|
data = {}
|
|
for f in chain(opts.concrete_fields, opts.private_fields, opts.many_to_many):
|
|
if not getattr(f, 'editable', False):
|
|
continue
|
|
if fields and f.name not in fields:
|
|
continue
|
|
if f.name == 'id':
|
|
data[f.name] = str(f.value_from_object(instance))
|
|
else:
|
|
data[f.name] = f.value_from_object(instance)
|
|
return data
|
|
|
|
|
|
def queryset_to_list(q, fields):
|
|
l = []
|
|
for row in q:
|
|
r = model_to_dict(row, fields)
|
|
l.append(r)
|
|
return l
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# sent_sms_code('13993199566')
|
|
# og_title, og_description, og_url, og_image = parse(
|
|
# 'https://mp.weixin.qq.com/s/EhX0Pm1e0FAfse0zz9ow8Q')
|
|
# og_title, og_description, og_url, og_image = parse(
|
|
# 'https://m.toutiao.com/i6883651337003729420/?tt_from=weixin&utm_campaign=client_share&app=news_article&utm_source=weixin&iid=1494959660475024&utm_medium=toutiao_android&wxshare_count=1')
|
|
# og_title, og_description, og_url, og_image = parse(
|
|
# 'http://www.gov.cn/xinwen/2020-10/13/content_5550906.htm')
|
|
og_title, og_description, og_url, og_image = parse(
|
|
'http://xgs.gansudaily.com.cn/pages/h5/hot/b3297046a53e47f594ed19db90c1183c.html')
|
|
|
|
print(og_title, og_description, og_url, og_image)
|