Merge 0afddd0b9a into 0153b387e5

fix: refactor two extractors into one
fix: add new extractor for QingTing
2024-06-12 21:03:40 +00:00 · 2022-06-18 08:56:44 +08:00 · 2022-06-16 20:44:05 +08:00 · 2022-06-16 19:14:08 +08:00 · 2022-06-16 19:12:26 +08:00 · 2022-06-16 19:08:09 +08:00
2 changed files with 54 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -993,6 +993,7 @@ from .presstv import PressTVIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .puls4 import Puls4IE
 from .pyvideo import PyvideoIE
+from .qingting import QingTingIE
 from .qqmusic import (
    QQMusicIE,
    QQMusicSingerIE,
--- a/youtube_dl/extractor/qingting.py
+++ b/youtube_dl/extractor/qingting.py
@ -0,0 +1,53 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from youtube_dl import utils
+
+
+class QingTingIE(InfoExtractor):
+    IE_NAME = 'QingTing'
+    _VALID_URL = r'''(?x)
+                     (?:https?://)?(?:www\.)?
+                         (?P<m>m\.)?(?:qingting\.fm|qtfm\.cn)/(?(m)v|)
+                         channels/\d+/programs/(?P<id>\d+)'''
+    _TEST = {
+        'url': 'https://www.qingting.fm/channels/378005/programs/22257411/',
+        'md5': '47e6a94f4e621ed832c316fd1888fb3c',
+        'info_dict': {
+            'id': '22257411',
+            'ext': 'mp3',
+            'title': '用了十年才修改，谁在乎教科书？-睡前消息-蜻蜓FM听头条',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_regex(r'(?s)<title\b[^>]*>(.*)</title>', webpage, 'title',
+                                        default=None) or self._og_search_title(webpage)
+        urlType = self._search_regex(
+            self._VALID_URL,
+            url, 'audio URL', group="m")
+        if urlType == 'm.':
+            url = self._search_regex(
+                r'''("|')audioUrl\1\s*:\s*("|')(?P<url>(?:(?!\2).)*)\2''',
+                webpage, 'audio URL', group="url")
+            test_url = utils.url_or_none(url)
+            if not test_url:
+                raise utils.ExtractorError('Invalid audio URL %s' % (url,))
+            return {
+                'id': video_id,
+                'title': title,
+                'ext': 'mp3',
+                'url': test_url,
+            }
+        else:
+            url = self._search_regex(
+                r'''("|')alternate\1\s*:\s*("|')(?P<url>(?:(?!\2).)*)\2''',
+                webpage, 'alternate URL', group="url")
+            test_url = utils.url_or_none(url)
+            if not test_url:
+                raise utils.ExtractorError('Invalid audio URL %s' % (url,))
+            return self.url_result(url=test_url, video_id=video_id, video_title=title)
Author	SHA1	Message	Date
changren-wcr	41c7aab347	Merge `0afddd0b9a` into `0153b387e5`	2024-06-12 21:03:40 +00:00
wangchangren	0afddd0b9a	fix: refactor two extractors into one	2022-06-18 08:56:44 +08:00
wangchangren	9ff181a1ab	fix: add new extractor for QingTing	2022-06-16 20:44:05 +08:00
changren-wcr	16e7b15f76	use youtube-dl match function instead of native python re Co-authored-by: dirkf <fieldhouse@gmx.net>	2022-06-16 19:14:08 +08:00
changren-wcr	9a421b4e7e	fix regular search pattern for title _html_search_regex() has default fatal=True: add a default to fall back to _og_search_title() allow line break in .* Co-authored-by: dirkf <fieldhouse@gmx.net>	2022-06-16 19:12:26 +08:00
changren-wcr	87706c5ec8	Use _search_regex() to get proper error reports in youtube-dl Co-authored-by: dirkf <fieldhouse@gmx.net>	2022-06-16 19:08:09 +08:00
changren-wcr	60783025df	remove capture of patterns that aren't used Co-authored-by: dirkf <fieldhouse@gmx.net>	2022-06-16 19:02:30 +08:00
wangchangren	174624aef8	[QingTing] Add new extractor	2022-06-12 10:58:44 +08:00