Compare commits

...

8 Commits

Author SHA1 Message Date
changren-wcr
41c7aab347
Merge 0afddd0b9a into 0153b387e5 2024-06-12 21:03:40 +00:00
wangchangren
0afddd0b9a fix: refactor two extractors into one 2022-06-18 08:56:44 +08:00
wangchangren
9ff181a1ab fix: add new extractor for QingTing 2022-06-16 20:44:05 +08:00
changren-wcr
16e7b15f76
use youtube-dl match function instead of native python re
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-06-16 19:14:08 +08:00
changren-wcr
9a421b4e7e
fix regular search pattern for title
_html_search_regex() has default fatal=True: add a default to fall back to _og_search_title()

allow line break in .*

Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-06-16 19:12:26 +08:00
changren-wcr
87706c5ec8
Use _search_regex() to get proper error reports in youtube-dl
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-06-16 19:08:09 +08:00
changren-wcr
60783025df
remove capture of patterns that aren't used
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-06-16 19:02:30 +08:00
wangchangren
174624aef8 [QingTing] Add new extractor 2022-06-12 10:58:44 +08:00
2 changed files with 54 additions and 0 deletions

View File

@ -993,6 +993,7 @@ from .presstv import PressTVIE
from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
from .pyvideo import PyvideoIE
from .qingting import QingTingIE
from .qqmusic import (
QQMusicIE,
QQMusicSingerIE,

View File

@ -0,0 +1,53 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from youtube_dl import utils
class QingTingIE(InfoExtractor):
IE_NAME = 'QingTing'
_VALID_URL = r'''(?x)
(?:https?://)?(?:www\.)?
(?P<m>m\.)?(?:qingting\.fm|qtfm\.cn)/(?(m)v|)
channels/\d+/programs/(?P<id>\d+)'''
_TEST = {
'url': 'https://www.qingting.fm/channels/378005/programs/22257411/',
'md5': '47e6a94f4e621ed832c316fd1888fb3c',
'info_dict': {
'id': '22257411',
'ext': 'mp3',
'title': '用了十年才修改,谁在乎教科书?-睡前消息-蜻蜓FM听头条',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'(?s)<title\b[^>]*>(.*)</title>', webpage, 'title',
default=None) or self._og_search_title(webpage)
urlType = self._search_regex(
self._VALID_URL,
url, 'audio URL', group="m")
if urlType == 'm.':
url = self._search_regex(
r'''("|')audioUrl\1\s*:\s*("|')(?P<url>(?:(?!\2).)*)\2''',
webpage, 'audio URL', group="url")
test_url = utils.url_or_none(url)
if not test_url:
raise utils.ExtractorError('Invalid audio URL %s' % (url,))
return {
'id': video_id,
'title': title,
'ext': 'mp3',
'url': test_url,
}
else:
url = self._search_regex(
r'''("|')alternate\1\s*:\s*("|')(?P<url>(?:(?!\2).)*)\2''',
webpage, 'alternate URL', group="url")
test_url = utils.url_or_none(url)
if not test_url:
raise utils.ExtractorError('Invalid audio URL %s' % (url,))
return self.url_result(url=test_url, video_id=video_id, video_title=title)