from aliyunsdkcore.client import AcsClient from aliyunsdkcore.request import CommonRequest import requests import random from parsel import Selector from channels.db import database_sync_to_async from django.conf import settings from itertools import chain def sent_sms_code(phone, code): client = AcsClient(settings.SMS_ACCESS_KEY_ID, settings.SMS_ACCESS_KEY_SECRET, settings.SMS_REGION) request = CommonRequest() request.set_accept_format('json') request.set_domain('dysmsapi.aliyuncs.com') request.set_method('POST') request.set_protocol_type('https') # https | http request.set_version('2017-05-25') request.set_action_name('SendSms') request.add_query_param('RegionId', "cn-hangzhou") request.add_query_param('PhoneNumbers', phone) request.add_query_param('SignName', "短信验证") request.add_query_param('TemplateCode', "SMS_12330409") request.add_query_param('TemplateParam', '{"number":"%s"}' % (code,)) response = client.do_action(request) return response def generate_code(): return random.randint(1000, 9999) def detect_type(url): if 'mp.weixin.qq.com' in url: return 'weixin' elif 'toutiao.com' in url: return 'toutiao' elif 'ixigua.com' in url: return 'xigua' elif 'gansudaily.com.cn' in url: return 'xgs' else: return 'other' def parse(url): t = detect_type(url) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/3.53.1159.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 MicroMessenger/6.5.2.501 NetType/WIFI WindowsWechat", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", } r = requests.get(url, headers=headers) r.encoding='utf-8' print(r.text) selector = Selector(text=r.text) if t == 'weixin': og_title = selector.xpath( '//head/meta[@property="og:title"]/@content').get() og_description = selector.xpath( '//head/meta[@property="og:description"]/@content').get() og_url = selector.xpath( '//head/meta[@property="og:url"]/@content').get() og_image = selector.xpath( '//head/meta[@property="og:image"]/@content').get() elif t == 'toutiao': og_title = selector.xpath('//head/title/text()').get() og_description = selector.xpath( '//head/meta[@name="description"]/@content').get() og_url = url og_image = None elif t == 'xgs': og_title = selector.xpath('//head/title/text()').get() og_description = selector.xpath( '//head/meta[@name="description"]/@content').get() og_url = url og_image = None else: og_title = selector.xpath('//head/title/text()').get() og_description = selector.xpath( '//head/meta[@name="description"]/@content').get() og_url = url og_image = None return (og_title, og_description, og_url, og_image) def model_to_dict(instance, fields): opts = instance._meta data = {} for f in chain(opts.concrete_fields, opts.private_fields, opts.many_to_many): if not getattr(f, 'editable', False): continue if fields and f.name not in fields: continue if f.name == 'id': data[f.name] = str(f.value_from_object(instance)) else: data[f.name] = f.value_from_object(instance) return data def queryset_to_list(q, fields): l = [] for row in q: r = model_to_dict(row, fields) l.append(r) return l if __name__ == '__main__': # sent_sms_code('13993199566') # og_title, og_description, og_url, og_image = parse( # 'https://mp.weixin.qq.com/s/EhX0Pm1e0FAfse0zz9ow8Q') # og_title, og_description, og_url, og_image = parse( # 'https://m.toutiao.com/i6883651337003729420/?tt_from=weixin&utm_campaign=client_share&app=news_article&utm_source=weixin&iid=1494959660475024&utm_medium=toutiao_android&wxshare_count=1') # og_title, og_description, og_url, og_image = parse( # 'http://www.gov.cn/xinwen/2020-10/13/content_5550906.htm') og_title, og_description, og_url, og_image = parse( 'http://xgs.gansudaily.com.cn/pages/h5/hot/b3297046a53e47f594ed19db90c1183c.html') print(og_title, og_description, og_url, og_image)