newmediamonitoring/polls/utils.py

123 lines
4.5 KiB
Python

from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.request import CommonRequest
import requests
import random
from parsel import Selector
from channels.db import database_sync_to_async
from django.conf import settings
from itertools import chain
def sent_sms_code(phone, code):
client = AcsClient(settings.SMS_ACCESS_KEY_ID,
settings.SMS_ACCESS_KEY_SECRET, settings.SMS_REGION)
request = CommonRequest()
request.set_accept_format('json')
request.set_domain('dysmsapi.aliyuncs.com')
request.set_method('POST')
request.set_protocol_type('https') # https | http
request.set_version('2017-05-25')
request.set_action_name('SendSms')
request.add_query_param('RegionId', "cn-hangzhou")
request.add_query_param('PhoneNumbers', phone)
request.add_query_param('SignName', "短信验证")
request.add_query_param('TemplateCode', "SMS_12330409")
request.add_query_param('TemplateParam', '{"number":"%s"}' % (code,))
response = client.do_action(request)
return response
def generate_code():
return random.randint(1000, 9999)
def detect_type(url):
if 'mp.weixin.qq.com' in url:
return 'weixin'
elif 'toutiao.com' in url:
return 'toutiao'
elif 'ixigua.com' in url:
return 'xigua'
elif 'gansudaily.com.cn' in url:
return 'xgs'
else:
return 'other'
def parse(url):
t = detect_type(url)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/3.53.1159.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 MicroMessenger/6.5.2.501 NetType/WIFI WindowsWechat",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
}
r = requests.get(url, headers=headers)
r.encoding='utf-8'
print(r.text)
selector = Selector(text=r.text)
if t == 'weixin':
og_title = selector.xpath(
'//head/meta[@property="og:title"]/@content').get()
og_description = selector.xpath(
'//head/meta[@property="og:description"]/@content').get()
og_url = selector.xpath(
'//head/meta[@property="og:url"]/@content').get()
og_image = selector.xpath(
'//head/meta[@property="og:image"]/@content').get()
elif t == 'toutiao':
og_title = selector.xpath('//head/title/text()').get()
og_description = selector.xpath(
'//head/meta[@name="description"]/@content').get()
og_url = url
og_image = None
elif t == 'xgs':
og_title = selector.xpath('//head/title/text()').get()
og_description = selector.xpath(
'//head/meta[@name="description"]/@content').get()
og_url = url
og_image = None
else:
og_title = selector.xpath('//head/title/text()').get()
og_description = selector.xpath(
'//head/meta[@name="description"]/@content').get()
og_url = url
og_image = None
return (og_title, og_description, og_url, og_image)
def model_to_dict(instance, fields):
opts = instance._meta
data = {}
for f in chain(opts.concrete_fields, opts.private_fields, opts.many_to_many):
if not getattr(f, 'editable', False):
continue
if fields and f.name not in fields:
continue
if f.name == 'id':
data[f.name] = str(f.value_from_object(instance))
else:
data[f.name] = f.value_from_object(instance)
return data
def queryset_to_list(q, fields):
l = []
for row in q:
r = model_to_dict(row, fields)
l.append(r)
return l
if __name__ == '__main__':
# sent_sms_code('13993199566')
# og_title, og_description, og_url, og_image = parse(
# 'https://mp.weixin.qq.com/s/EhX0Pm1e0FAfse0zz9ow8Q')
# og_title, og_description, og_url, og_image = parse(
# 'https://m.toutiao.com/i6883651337003729420/?tt_from=weixin&utm_campaign=client_share&app=news_article&utm_source=weixin&iid=1494959660475024&utm_medium=toutiao_android&wxshare_count=1')
# og_title, og_description, og_url, og_image = parse(
# 'http://www.gov.cn/xinwen/2020-10/13/content_5550906.htm')
og_title, og_description, og_url, og_image = parse(
'http://xgs.gansudaily.com.cn/pages/h5/hot/b3297046a53e47f594ed19db90c1183c.html')
print(og_title, og_description, og_url, og_image)