MegaTVComEmbedIE: Make canonical URL extraction more robust
This commit is contained in:
parent
28fddc1758
commit
96a0ad4778
@ -166,10 +166,26 @@ class MegaTVComEmbedIE(MegaTVComBaseIE):
|
|||||||
url = '%s:%s' % (scheme, url)
|
url = '%s:%s' % (scheme, url)
|
||||||
yield url
|
yield url
|
||||||
|
|
||||||
|
def _match_canonical_url(self, webpage):
|
||||||
|
LINK_RE = r'''(?x)
|
||||||
|
<link(?:
|
||||||
|
rel=(?P<_q1>%(quot_re)s)(?P<canonical>canonical)(?P=_q1)|
|
||||||
|
href=(?P<_q2>%(quot_re)s)(?P<href>(?:(?!(?P=_q2)).)+)(?P=_q2)|
|
||||||
|
[^>]*?
|
||||||
|
)+>
|
||||||
|
''' % {'quot_re': r'["\']'}
|
||||||
|
for mobj in re.finditer(LINK_RE, webpage):
|
||||||
|
canonical, href = mobj.group('canonical', 'href')
|
||||||
|
if canonical and href:
|
||||||
|
return unescapeHTML(href)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
webpage = self._download_webpage(url, 'N/A')
|
webpage = self._download_webpage(url, 'N/A')
|
||||||
player_attrs = self._extract_player_attrs(webpage)
|
player_attrs = self._extract_player_attrs(webpage)
|
||||||
canonical_url = player_attrs['share_url']
|
canonical_url = player_attrs.get('share_url') or \
|
||||||
|
self._match_canonical_url(webpage)
|
||||||
|
if not canonical_url:
|
||||||
|
raise ExtractorError('canonical URL not found')
|
||||||
video_id = compat_parse_qs(compat_urllib_parse_urlparse(
|
video_id = compat_parse_qs(compat_urllib_parse_urlparse(
|
||||||
canonical_url).query)['p'][0]
|
canonical_url).query)['p'][0]
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user