From f561e0d81786534993b8392d9c75074042c0330b Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas Date: Sat, 9 Oct 2021 14:11:43 +0300 Subject: [PATCH 1/8] Add Glomex IEs * Add new IEs * GlomexBaseIE: Base IE class * GlomexIE: Extract videos from video.glomex.com (by deferring to glomex:embed) * GlomexEmbedIE: Extract Glomex videos by matching the player URL * Query the API to extract metadata, detect video formats and get the respective (JWT protected) stream/source URLs * The API query may return one or more videos: the latter case is treated as a playlist * As this is otherwise identically handled, a separate IE was not deemed necessary * However title and description fields are not set for playlist results * They do not exist in the parent object; obtaining them from the first entry is not indicative for the playlist content * As the playlist order is not always stable (this is true at least for related videos playlists), it makes writing test cases impossible * Let GenericIE detect embeds by matching all three integration methods: * HTML: glomex-player tag or data attributes * Javascript: naive parsing of inline scripts for string constants assigned to integration parameters * Iframe: src attribute GlomexEmbedIE._VALID_URL * Let GlomexIE and the former embed detection pass the origin URL to GlomexEmbedIE by smuggling it in the player URL, as this is an expected parameter in API requests * Add test cases for both single videos and two playlist flavors --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/generic.py | 7 + youtube_dl/extractor/glomex.py | 273 +++++++++++++++++++++++++++++ 3 files changed, 284 insertions(+) create mode 100644 youtube_dl/extractor/glomex.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6e8fc3961..b395a857c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1038,6 +1038,10 @@ from .rutube import ( RutubePersonIE, RutubePlaylistIE, ) +from .glomex import ( + GlomexIE, + GlomexEmbedIE, +) from .rutv import RUTVIE from .ruutu import RuutuIE from .ruv import RuvIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a9c064105..9111fdda8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -100,6 +100,7 @@ from .ustream import UstreamIE from .arte import ArteTVEmbedIE from .videopress import VideoPressIE from .rutube import RutubeIE +from .glomex import GlomexEmbedIE from .limelight import LimelightBaseIE from .anvato import AnvatoIE from .washingtonpost import WashingtonPostIE @@ -3199,6 +3200,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( rutube_urls, video_id, video_title, ie=RutubeIE.ie_key()) + # Look for Glomex embeds + glomex_urls = GlomexEmbedIE._extract_urls(webpage, url) + if glomex_urls: + return self.playlist_from_matches( + glomex_urls, video_id, video_title, ie=GlomexEmbedIE.ie_key()) + # Look for WashingtonPost embeds wapo_urls = WashingtonPostIE._extract_urls(webpage) if wapo_urls: diff --git a/youtube_dl/extractor/glomex.py b/youtube_dl/extractor/glomex.py new file mode 100644 index 000000000..031c6f099 --- /dev/null +++ b/youtube_dl/extractor/glomex.py @@ -0,0 +1,273 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_parse_qs, + compat_urllib_parse_urlparse, + compat_urllib_parse_urlencode, +) +from ..utils import ( + ExtractorError, + determine_ext, + int_or_none, + try_get, + smuggle_url, + unsmuggle_url, + unescapeHTML, +) + + +class GlomexBaseIE(InfoExtractor): + _DEFAULT_ORIGIN_URL = 'https://player.glomex.com/' + _API_URL = 'https://integration-cloudfront-eu-west-1.mes.glomex.cloud/' + + @staticmethod + def _smuggle_origin_url(url, origin_url): + return smuggle_url(url, {'origin': origin_url}) + + @classmethod + def _unsmuggle_origin_url(cls, url, fallback_origin_url=None): + defaults = {'origin': fallback_origin_url or cls._DEFAULT_ORIGIN_URL} + unsmuggled_url, data = unsmuggle_url(url, default=defaults) + return unsmuggled_url, data['origin'] + + def _get_videoid_type(self, video_id): + _VIDEOID_TYPES = { + 'v': 'video', + 'pl': 'playlist', + 'rl': 'related videos playlist', + 'cl': 'curated playlist', + } + prefix = video_id.split('-')[0] + return _VIDEOID_TYPES.get(prefix, 'unknown type') + + def _download_api_data(self, video_id, integration, current_url=None): + query = { + 'integration_id': integration, + 'playlist_id': video_id, + 'current_url': current_url or self._DEFAULT_ORIGIN_URL, + } + video_id_type = self._get_videoid_type(video_id) + return self._download_json( + self._API_URL, + video_id, 'Downloading %s JSON' % video_id_type, + 'Unable to download %s JSON' % video_id_type, + query=query) + + def _download_and_extract_api_data(self, video_id, integration, current_url): + api_data = self._download_api_data(video_id, integration, current_url) + videos = api_data['videos'] + if not videos: + raise ExtractorError('no videos found for %s' % video_id) + if len(videos) == 1: + return self._extract_api_data(videos[0], video_id) + # assume some kind of playlist + videos = [ + self._extract_api_data(video, video_id) + for video in videos + ] + return self.playlist_result(videos, video_id) + + def _extract_api_data(self, video, video_id): + if video.get('error_code') == 'contentGeoblocked': + self.raise_geo_restricted(countries=video['geo_locations']) + info = self._extract_info(video, video_id) + info['formats'] = self._extract_formats(video, video_id) + return info + + @staticmethod + def _extract_info(video, video_id=None, require_title=True): + title = video['title'] if require_title else video.get('title') + + def append_image_url(url, default='profile:player-960x540'): + if url: + return '%s/%s' % (url, default) + thumbnail = append_image_url(try_get(video, + lambda x: x['image']['url'])) + thumbnails = [ + dict(width=960, height=540, + **{k: append_image_url(v) if k == 'url' else v + for k, v in image.items() if k in ('id', 'url')}) + for image in video.get('images', []) + ] or None + + return { + 'id': video.get('clip_id') or video_id, + 'title': title, + 'description': video.get('description'), + 'thumbnail': thumbnail, + 'thumbnails': thumbnails, + 'duration': int_or_none(video.get('clip_duration')), + 'timestamp': video.get('created_at'), + } + + def _extract_formats(self, options, video_id): + formats = [] + for format_id, format_url in options['source'].items(): + ext = determine_ext(format_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id, + fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + if options.get('language'): + for format in formats: + format['language'] = options.get('language') + self._sort_formats(formats) + return formats + + +class GlomexIE(GlomexBaseIE): + IE_NAME = 'glomex' + IE_DESC = 'Glomex videos' + _VALID_URL = r'https?://video.glomex.com/[^/]+/(?Pv-[^-]+)' + # Hard-coded integration ID for video.glomex.com + _INTEGRATION_ID = '19syy24xjn1oqlpc' + + _TEST = { + 'url': 'https://video.glomex.com/sport/v-cb24uwg77hgh-nach-2-0-sieg-guardiola-mit-mancity-vor-naechstem-titel', + 'md5': 'cec33a943c4240c9cb33abea8c26242e', + 'info_dict': { + 'id': 'v-cb24uwg77hgh', + 'ext': 'mp4', + 'title': 'md5:38a90cedcfadd72982c81acf13556e0c', + 'description': 'md5:1ea6b6caff1443fcbbba159e432eedb8', + 'duration': 29600, + 'timestamp': 1619895017, + 'upload_date': '20210501', + 'age_limit': None, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + # Defer to glomex:embed IE: Build and return a player URL using the + # matched video ID and the hard-coded integration ID + return self.url_result( + GlomexEmbedIE.build_player_url(video_id, self._INTEGRATION_ID, + url), + GlomexEmbedIE.ie_key(), + video_id + ) + + +class GlomexEmbedIE(GlomexBaseIE): + IE_NAME = 'glomex:embed' + IE_DESC = 'Glomex embedded videos' + _BASE_PLAYER_URL = 'https://player.glomex.com/integration/1/iframe-player.html' + _VALID_URL = r'(?:https?:)?//player\.glomex\.com/integration/[^/]+/iframe-player\.html\?(?:(?:integrationId=(?P[^&#]+)|playlistId=(?P[^&#]+)|[^&=#]+=[^&#]+)&?)+' + + _TESTS = [{ + 'url': 'https://player.glomex.com/integration/1/iframe-player.html?integrationId=4059a013k56vb2yd&playlistId=v-cfa6lye0dkdd-sf', + 'info_dict': { + 'id': 'v-cfa6lye0dkdd-sf', + 'ext': 'mp4', + 'timestamp': 1635337199, + 'duration': 133080, + 'upload_date': '20211027', + 'description': 'md5:e741185fc309310ff5d0c789b437be66', + 'title': 'md5:35647293513a6c92363817a0fb0a7961', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://player.glomex.com/integration/1/iframe-player.html?origin=fullpage&integrationId=19syy24xjn1oqlpc&playlistId=rl-vcb49w1fb592p&playlistIndex=0', + 'info_dict': { + 'id': 'rl-vcb49w1fb592p', + }, + 'playlist_count': 100, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://player.glomex.com/integration/1/iframe-player.html?playlistId=cl-bgqaata6aw8x&integrationId=19syy24xjn1oqlpc', + 'info_dict': { + 'id': 'cl-bgqaata6aw8x', + }, + 'playlist_mincount': 2, + 'params': { + 'skip_download': True, + }, + }] + + @classmethod + def build_player_url(cls, video_id, integration, origin_url=None): + query_string = compat_urllib_parse_urlencode({ + 'playlistId': video_id, + 'integrationId': integration, + }) + player_url = '%s?%s' % (cls._BASE_PLAYER_URL, query_string) + if origin_url is not None: + player_url = cls._smuggle_origin_url(player_url, origin_url) + return player_url + + @classmethod + def _match_integration(cls, url): + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + m = cls._VALID_URL_RE.match(url) + assert m + return compat_str(m.group('integration')) + + @classmethod + def _extract_urls(cls, webpage, origin_url): + # https://docs.glomex.com/publisher/video-player-integration/javascript-api/ + EMBED_RE = r'''(?x) + (?: + ]+?src=(?P<_q1>%(quot_re)s) + (?P(?:https?:)?//player\.glomex\.com/integration/[^/]+/iframe-player\.html\? + (?:(?!(?P=_q1)).)+)(?P=_q1)| + <(?Pglomex-player|div)(?: + data-integration-id=(?P<_q2>%(quot_re)s)(?P(?:(?!(?P=_q2)).)+)(?P=_q2)| + data-playlist-id=(?P<_q3>%(quot_re)s)(?P(?:(?!(?P=_q3)).)+)(?P=_q3)| + data-glomex-player=(?P<_q4>%(quot_re)s)(?Ptrue)(?P=_q4)| + [^>]*? + )+>| + # naive parsing of inline scripts for hard-coded integration parameters + <(?Pscript)[^<]*?>(?: + (?P<_stjs1>dataset\.)?integrationId\s*(?(_stjs1)=|:)\s* + (?P<_q5>%(quot_re)s)(?P(?:(?!(?P=_q5)).)+)(?P=_q5)\s*(?(_stjs1);|,)?| + (?P<_stjs2>dataset\.)?playlistId\s*(?(_stjs2)=|:)\s* + (?P<_q6>%(quot_re)s)(?P(?:(?!(?P=_q6)).)+)(?P=_q6)\s*(?(_stjs2);|,)?| + (?:\s|.)*? + )+ + ) + ''' % {'quot_re': r'[\"\']'} + for mobj in re.finditer(EMBED_RE, webpage): + url, html_tag, video_id_html, integration_html, glomex_player, \ + script_tag, video_id_js, integration_js = \ + mobj.group('url', 'html_tag', 'id_html', + 'integration_html', 'glomex_player', 'script_tag', + 'id_js', 'integration_js') + if url: + yield cls._smuggle_origin_url(unescapeHTML(url), origin_url) + elif html_tag: + if html_tag == "div" and not glomex_player: + continue + if not video_id_html or not integration_html: + continue + yield cls.build_player_url(video_id_html, integration_html, url) + elif script_tag: + if not video_id_js or not integration_js: + continue + yield cls.build_player_url(video_id_js, integration_js, url) + + def _real_extract(self, url): + url, origin_url = self._unsmuggle_origin_url(url) + embed_id = self._match_id(url) + query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + video_id = query['playlistId'][0] + # perhaps redundant + assert embed_id == video_id + integration = query['integrationId'][0] + return self._download_and_extract_api_data(video_id, integration, + origin_url) From 6880bf43342d3812e3f145b15c6cc99633dfde65 Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas Date: Wed, 10 Nov 2021 07:34:16 +0200 Subject: [PATCH 2/8] Force evaluation --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9111fdda8..c77fb7eaf 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -3201,7 +3201,7 @@ class GenericIE(InfoExtractor): rutube_urls, video_id, video_title, ie=RutubeIE.ie_key()) # Look for Glomex embeds - glomex_urls = GlomexEmbedIE._extract_urls(webpage, url) + glomex_urls = list(GlomexEmbedIE._extract_urls(webpage, url)) if glomex_urls: return self.playlist_from_matches( glomex_urls, video_id, video_title, ie=GlomexEmbedIE.ie_key()) From abfc16a1230551da9c09a2e4236fb6034f99853e Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas Date: Wed, 10 Nov 2021 13:16:35 +0200 Subject: [PATCH 3/8] Regex fixup --- youtube_dl/extractor/glomex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/glomex.py b/youtube_dl/extractor/glomex.py index 031c6f099..9e4abdbd8 100644 --- a/youtube_dl/extractor/glomex.py +++ b/youtube_dl/extractor/glomex.py @@ -128,7 +128,7 @@ class GlomexBaseIE(InfoExtractor): class GlomexIE(GlomexBaseIE): IE_NAME = 'glomex' IE_DESC = 'Glomex videos' - _VALID_URL = r'https?://video.glomex.com/[^/]+/(?Pv-[^-]+)' + _VALID_URL = r'https?://video\.glomex\.com/[^/]+/(?Pv-[^-]+)' # Hard-coded integration ID for video.glomex.com _INTEGRATION_ID = '19syy24xjn1oqlpc' From 4225c46d3badce037fab845890559e80c6f80423 Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas Date: Thu, 11 Nov 2021 10:40:33 +0200 Subject: [PATCH 4/8] Revert to _VALID_URL to match video_id and integration * Retrieve the last instance of said parameters that appears in the query string, rather than the first previously * Resolve the respective comment in #30212 --- youtube_dl/extractor/glomex.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/glomex.py b/youtube_dl/extractor/glomex.py index 9e4abdbd8..316e27770 100644 --- a/youtube_dl/extractor/glomex.py +++ b/youtube_dl/extractor/glomex.py @@ -6,7 +6,6 @@ import re from .common import InfoExtractor from ..compat import ( compat_str, - compat_parse_qs, compat_urllib_parse_urlparse, compat_urllib_parse_urlencode, ) @@ -263,11 +262,13 @@ class GlomexEmbedIE(GlomexBaseIE): def _real_extract(self, url): url, origin_url = self._unsmuggle_origin_url(url) - embed_id = self._match_id(url) - query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - video_id = query['playlistId'][0] - # perhaps redundant - assert embed_id == video_id - integration = query['integrationId'][0] + # must return a valid match since it was already tested when selecting the IE + try: + matches = self._VALID_URL_RE.match(url).groupdict() + except AttributeError: + matches = re.match(self._VALID_URL, url).groupdict() + # id is not enforced in the pattern, so do it now; ditto integration + video_id = matches['id'] + integration = matches['integration'] return self._download_and_extract_api_data(video_id, integration, origin_url) From d303e1e05ff6a397a7f762c910e1485c6f2bd1d2 Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas Date: Thu, 11 Nov 2021 11:16:29 +0200 Subject: [PATCH 5/8] GlomexEmbedIE: Reuse _VALID_URL in _extract_urls * Let _extract_urls reuse _VALID_URL after making scheme optional and simplifying the query string part * Upon an iframe match * Add the scheme to the matched URL, if necessary * Match the URL against the full _VALID_URL --- youtube_dl/extractor/glomex.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/glomex.py b/youtube_dl/extractor/glomex.py index 316e27770..98482fc87 100644 --- a/youtube_dl/extractor/glomex.py +++ b/youtube_dl/extractor/glomex.py @@ -162,7 +162,8 @@ class GlomexEmbedIE(GlomexBaseIE): IE_NAME = 'glomex:embed' IE_DESC = 'Glomex embedded videos' _BASE_PLAYER_URL = 'https://player.glomex.com/integration/1/iframe-player.html' - _VALID_URL = r'(?:https?:)?//player\.glomex\.com/integration/[^/]+/iframe-player\.html\?(?:(?:integrationId=(?P[^&#]+)|playlistId=(?P[^&#]+)|[^&=#]+=[^&#]+)&?)+' + _VALID_URL = r'''(?x)https?://player\.glomex\.com/integration/[^/]+/iframe-player\.html + \?(?:(?:integrationId=(?P[^&#]+)|playlistId=(?P[^&#]+)|[^&=#]+=[^&#]+)&?)+''' _TESTS = [{ 'url': 'https://player.glomex.com/integration/1/iframe-player.html?integrationId=4059a013k56vb2yd&playlistId=v-cfa6lye0dkdd-sf', @@ -219,12 +220,16 @@ class GlomexEmbedIE(GlomexBaseIE): @classmethod def _extract_urls(cls, webpage, origin_url): + # make the scheme in _VALID_URL optional + _URL_RE = r'(?:https?:)?//' + cls._VALID_URL.split('://', 1)[1] + # simplify the query string part of _VALID_URL; after extracting iframe + # src, the URL will be matched again + _URL_RE = _URL_RE.split(r'\?', 1)[0] + r'\?(?:(?!(?P=_q1)).)+' # https://docs.glomex.com/publisher/video-player-integration/javascript-api/ EMBED_RE = r'''(?x) (?: ]+?src=(?P<_q1>%(quot_re)s) - (?P(?:https?:)?//player\.glomex\.com/integration/[^/]+/iframe-player\.html\? - (?:(?!(?P=_q1)).)+)(?P=_q1)| + (?P%(url_re)s)(?P=_q1)| <(?Pglomex-player|div)(?: data-integration-id=(?P<_q2>%(quot_re)s)(?P(?:(?!(?P=_q2)).)+)(?P=_q2)| data-playlist-id=(?P<_q3>%(quot_re)s)(?P(?:(?!(?P=_q3)).)+)(?P=_q3)| @@ -240,7 +245,7 @@ class GlomexEmbedIE(GlomexBaseIE): (?:\s|.)*? )+ ) - ''' % {'quot_re': r'[\"\']'} + ''' % {'quot_re': r'[\"\']', 'url_re': _URL_RE} for mobj in re.finditer(EMBED_RE, webpage): url, html_tag, video_id_html, integration_html, glomex_player, \ script_tag, video_id_js, integration_js = \ @@ -248,7 +253,14 @@ class GlomexEmbedIE(GlomexBaseIE): 'integration_html', 'glomex_player', 'script_tag', 'id_js', 'integration_js') if url: - yield cls._smuggle_origin_url(unescapeHTML(url), origin_url) + url = unescapeHTML(url) + if url.startswith('//'): + scheme = compat_urllib_parse_urlparse(origin_url).scheme \ + if origin_url else 'https' + url = '%s:%s' % (scheme, url) + if not cls.suitable(url): + continue + yield cls._smuggle_origin_url(url, origin_url) elif html_tag: if html_tag == "div" and not glomex_player: continue From 699390c40d56edd892a36d4fc0be07b656c8641c Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas Date: Sat, 13 Nov 2021 08:47:38 +0200 Subject: [PATCH 6/8] Remove unnecessary quote escape --- youtube_dl/extractor/glomex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/glomex.py b/youtube_dl/extractor/glomex.py index 98482fc87..0913128e2 100644 --- a/youtube_dl/extractor/glomex.py +++ b/youtube_dl/extractor/glomex.py @@ -245,7 +245,7 @@ class GlomexEmbedIE(GlomexBaseIE): (?:\s|.)*? )+ ) - ''' % {'quot_re': r'[\"\']', 'url_re': _URL_RE} + ''' % {'quot_re': r'["\']', 'url_re': _URL_RE} for mobj in re.finditer(EMBED_RE, webpage): url, html_tag, video_id_html, integration_html, glomex_player, \ script_tag, video_id_js, integration_js = \ From ae8fb74131c9161aef4b85445528fd8c3e970884 Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas Date: Fri, 19 Nov 2021 08:23:55 +0200 Subject: [PATCH 7/8] Fix typo url -> origin_url --- youtube_dl/extractor/glomex.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/glomex.py b/youtube_dl/extractor/glomex.py index 0913128e2..8af927b71 100644 --- a/youtube_dl/extractor/glomex.py +++ b/youtube_dl/extractor/glomex.py @@ -266,11 +266,13 @@ class GlomexEmbedIE(GlomexBaseIE): continue if not video_id_html or not integration_html: continue - yield cls.build_player_url(video_id_html, integration_html, url) + yield cls.build_player_url(video_id_html, integration_html, + origin_url) elif script_tag: if not video_id_js or not integration_js: continue - yield cls.build_player_url(video_id_js, integration_js, url) + yield cls.build_player_url(video_id_js, integration_js, + origin_url) def _real_extract(self, url): url, origin_url = self._unsmuggle_origin_url(url) From 96800222076e707e1f98364024aa4fcedb980810 Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas Date: Sun, 12 Dec 2021 20:59:52 +0200 Subject: [PATCH 8/8] Remove unused method --- youtube_dl/extractor/glomex.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/youtube_dl/extractor/glomex.py b/youtube_dl/extractor/glomex.py index 8af927b71..c4b913b06 100644 --- a/youtube_dl/extractor/glomex.py +++ b/youtube_dl/extractor/glomex.py @@ -5,7 +5,6 @@ import re from .common import InfoExtractor from ..compat import ( - compat_str, compat_urllib_parse_urlparse, compat_urllib_parse_urlencode, ) @@ -210,14 +209,6 @@ class GlomexEmbedIE(GlomexBaseIE): player_url = cls._smuggle_origin_url(player_url, origin_url) return player_url - @classmethod - def _match_integration(cls, url): - if '_VALID_URL_RE' not in cls.__dict__: - cls._VALID_URL_RE = re.compile(cls._VALID_URL) - m = cls._VALID_URL_RE.match(url) - assert m - return compat_str(m.group('integration')) - @classmethod def _extract_urls(cls, webpage, origin_url): # make the scheme in _VALID_URL optional