GlomexEmbedIE: Reuse _VALID_URL in _extract_urls
* Let _extract_urls reuse _VALID_URL after making scheme optional and simplifying the query string part * Upon an iframe match * Add the scheme to the matched URL, if necessary * Match the URL against the full _VALID_URL
This commit is contained in:
parent
4225c46d3b
commit
d303e1e05f
@ -162,7 +162,8 @@ class GlomexEmbedIE(GlomexBaseIE):
|
|||||||
IE_NAME = 'glomex:embed'
|
IE_NAME = 'glomex:embed'
|
||||||
IE_DESC = 'Glomex embedded videos'
|
IE_DESC = 'Glomex embedded videos'
|
||||||
_BASE_PLAYER_URL = 'https://player.glomex.com/integration/1/iframe-player.html'
|
_BASE_PLAYER_URL = 'https://player.glomex.com/integration/1/iframe-player.html'
|
||||||
_VALID_URL = r'(?:https?:)?//player\.glomex\.com/integration/[^/]+/iframe-player\.html\?(?:(?:integrationId=(?P<integration>[^&#]+)|playlistId=(?P<id>[^&#]+)|[^&=#]+=[^&#]+)&?)+'
|
_VALID_URL = r'''(?x)https?://player\.glomex\.com/integration/[^/]+/iframe-player\.html
|
||||||
|
\?(?:(?:integrationId=(?P<integration>[^&#]+)|playlistId=(?P<id>[^&#]+)|[^&=#]+=[^&#]+)&?)+'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://player.glomex.com/integration/1/iframe-player.html?integrationId=4059a013k56vb2yd&playlistId=v-cfa6lye0dkdd-sf',
|
'url': 'https://player.glomex.com/integration/1/iframe-player.html?integrationId=4059a013k56vb2yd&playlistId=v-cfa6lye0dkdd-sf',
|
||||||
@ -219,12 +220,16 @@ class GlomexEmbedIE(GlomexBaseIE):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_urls(cls, webpage, origin_url):
|
def _extract_urls(cls, webpage, origin_url):
|
||||||
|
# make the scheme in _VALID_URL optional
|
||||||
|
_URL_RE = r'(?:https?:)?//' + cls._VALID_URL.split('://', 1)[1]
|
||||||
|
# simplify the query string part of _VALID_URL; after extracting iframe
|
||||||
|
# src, the URL will be matched again
|
||||||
|
_URL_RE = _URL_RE.split(r'\?', 1)[0] + r'\?(?:(?!(?P=_q1)).)+'
|
||||||
# https://docs.glomex.com/publisher/video-player-integration/javascript-api/
|
# https://docs.glomex.com/publisher/video-player-integration/javascript-api/
|
||||||
EMBED_RE = r'''(?x)
|
EMBED_RE = r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
<iframe[^>]+?src=(?P<_q1>%(quot_re)s)
|
<iframe[^>]+?src=(?P<_q1>%(quot_re)s)
|
||||||
(?P<url>(?:https?:)?//player\.glomex\.com/integration/[^/]+/iframe-player\.html\?
|
(?P<url>%(url_re)s)(?P=_q1)|
|
||||||
(?:(?!(?P=_q1)).)+)(?P=_q1)|
|
|
||||||
<(?P<html_tag>glomex-player|div)(?:
|
<(?P<html_tag>glomex-player|div)(?:
|
||||||
data-integration-id=(?P<_q2>%(quot_re)s)(?P<integration_html>(?:(?!(?P=_q2)).)+)(?P=_q2)|
|
data-integration-id=(?P<_q2>%(quot_re)s)(?P<integration_html>(?:(?!(?P=_q2)).)+)(?P=_q2)|
|
||||||
data-playlist-id=(?P<_q3>%(quot_re)s)(?P<id_html>(?:(?!(?P=_q3)).)+)(?P=_q3)|
|
data-playlist-id=(?P<_q3>%(quot_re)s)(?P<id_html>(?:(?!(?P=_q3)).)+)(?P=_q3)|
|
||||||
@ -240,7 +245,7 @@ class GlomexEmbedIE(GlomexBaseIE):
|
|||||||
(?:\s|.)*?
|
(?:\s|.)*?
|
||||||
)+</script>
|
)+</script>
|
||||||
)
|
)
|
||||||
''' % {'quot_re': r'[\"\']'}
|
''' % {'quot_re': r'[\"\']', 'url_re': _URL_RE}
|
||||||
for mobj in re.finditer(EMBED_RE, webpage):
|
for mobj in re.finditer(EMBED_RE, webpage):
|
||||||
url, html_tag, video_id_html, integration_html, glomex_player, \
|
url, html_tag, video_id_html, integration_html, glomex_player, \
|
||||||
script_tag, video_id_js, integration_js = \
|
script_tag, video_id_js, integration_js = \
|
||||||
@ -248,7 +253,14 @@ class GlomexEmbedIE(GlomexBaseIE):
|
|||||||
'integration_html', 'glomex_player', 'script_tag',
|
'integration_html', 'glomex_player', 'script_tag',
|
||||||
'id_js', 'integration_js')
|
'id_js', 'integration_js')
|
||||||
if url:
|
if url:
|
||||||
yield cls._smuggle_origin_url(unescapeHTML(url), origin_url)
|
url = unescapeHTML(url)
|
||||||
|
if url.startswith('//'):
|
||||||
|
scheme = compat_urllib_parse_urlparse(origin_url).scheme \
|
||||||
|
if origin_url else 'https'
|
||||||
|
url = '%s:%s' % (scheme, url)
|
||||||
|
if not cls.suitable(url):
|
||||||
|
continue
|
||||||
|
yield cls._smuggle_origin_url(url, origin_url)
|
||||||
elif html_tag:
|
elif html_tag:
|
||||||
if html_tag == "div" and not glomex_player:
|
if html_tag == "div" and not glomex_player:
|
||||||
continue
|
continue
|
||||||
|
Loading…
Reference in New Issue
Block a user