Compare commits

...

5 Commits

Author SHA1 Message Date
dirkf
bd9918040d
Merge d7b502a727 into 0153b387e5 2024-06-12 23:17:11 +00:00
dirkf
d7b502a727 [BFIPlayer] Support Brightcove video host, replacing Ooyala 2023-04-16 21:25:04 +01:00
dirkf
74e39ca0fd [utils] Allow kwargs for parse_qs()
* supported by `compat_parse_qs()`:
keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replac>
* now equivalent to yt-dlp
2023-04-16 21:24:09 +01:00
dirkf
dc990a61cc [compat] Make parse_qs[l] match Py3.10 for Py>=2.6
* support only default separator '&', not r'[&;]', like 3.10+
* support max_num_fields, like 3.8+
2023-04-16 21:20:07 +01:00
dirkf
9bbe366275 [BrightcoveNew] Support brightcove🆕 pseudo-URL scheme
* scheme (`content_type` is `video` or `playlist`):
brightcove🆕{account_id}:{player_id}:{embed}:{content_type}:{conte>
* also support smuggled `force_videoid` to pass desired resulting video_id
2023-04-16 19:39:10 +01:00
4 changed files with 124 additions and 64 deletions

View File

@ -2448,12 +2448,12 @@ try:
except ImportError:
import BaseHTTPServer as compat_http_server
# urllib.parse
try:
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
from urllib.parse import urlencode as compat_urllib_parse_urlencode
from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
else re.compile(r'([\x00-\x7f]+)'))
@ -2543,14 +2543,38 @@ except ImportError: # Python 2
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
setattr(compat_urllib_parse, '_urlencode',
getattr(compat_urllib_parse, 'urlencode'))
for name, fix in (
('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
('parse_unquote', compat_urllib_parse_unquote),
('unquote_plus', compat_urllib_parse_unquote_plus),
('urlencode', compat_urllib_parse_urlencode)):
setattr(compat_urllib_parse, name, fix)
finally:
try:
# arguments changed in 3.8 and 3.10
from urllib.parse import parse_qs as _parse_qs
_parse_qs('a=b', separator='&')
compat_parse_qs = _parse_qs
except (ImportError, TypeError): # Python 2, < 3.10
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
# Python 2's version is apparently totally broken
# Also use this implementation for Py < 3.10
# * support only default separator '&', not r'[&;]', like 3.10+
# * support max_num_fields, like 3.8+
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'):
qs, _coerce_result = qs, compat_str
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
encoding='utf-8', errors='replace',
max_num_fields=None, separator='&'):
if not isinstance(separator, (compat_str, str)):
raise ValueError('Separator must be of type string or bytes')
# DoS protection, if anyone cares
if qs and max_num_fields is not None and qs.count(separator) >= max_num_fields:
raise ValueError('Too many fields')
_coerce_result = compat_str
r = []
for name_value in pairs:
for name_value in qs.split(separator):
if not name_value and not strict_parsing:
continue
nv = name_value.split('=', 1)
@ -2575,10 +2599,11 @@ except ImportError: # Python 2
return r
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'):
encoding='utf-8', errors='replace',
max_num_fields=None, separator='&'):
parsed_result = {}
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
encoding=encoding, errors=errors)
encoding, errors, max_num_fields, separator)
for name, value in pairs:
if name in parsed_result:
parsed_result[name].append(value)
@ -2586,14 +2611,9 @@ except ImportError: # Python 2
parsed_result[name] = [value]
return parsed_result
setattr(compat_urllib_parse, '_urlencode',
getattr(compat_urllib_parse, 'urlencode'))
for name, fix in (
('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
('parse_unquote', compat_urllib_parse_unquote),
('unquote_plus', compat_urllib_parse_unquote_plus),
('urlencode', compat_urllib_parse_urlencode),
('parse_qs', compat_parse_qs)):
('parse_qs', compat_parse_qs),
('parse_qsl', _parse_qsl)):
setattr(compat_urllib_parse, name, fix)
compat_urllib_parse_parse_qs = compat_parse_qs

View File

@ -4,7 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import extract_attributes
from ..utils import (
extract_attributes,
parse_qs,
remove_start,
smuggle_url,
)
class BFIPlayerIE(InfoExtractor):
@ -12,26 +17,39 @@ class BFIPlayerIE(InfoExtractor):
_VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
_TEST = {
'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online',
'md5': 'e8783ebd8e061ec4bc6e9501ed547de8',
'md5': '15598bdd6a413ce9363970754f054d76',
'info_dict': {
'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63',
'ext': 'mp4',
'title': 'Computer Doctor',
'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b',
'timestamp': 1564424975,
'upload_date': '20190729',
'uploader_id': '6057949427001',
},
'skip': 'BFI Player films cannot be played outside of the UK',
# 'skip': 'BFI Player films cannot be played outside of the UK',
}
_BRIGHTCOVE_ACCOUNT_ID = '6057949427001'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
entries = []
for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage):
player_attr = extract_attributes(player_el)
ooyala_id = player_attr.get('data-video-id')
if not ooyala_id:
film_only = 'play-film' in parse_qs(url, keep_blank_values=True)
def entries():
for player_el in re.finditer(r'(?s)<video-js\b[^>]+>', webpage):
player_attr = extract_attributes(player_el.group(0))
bcv_id, account_id, player_id, embed = (
player_attr.get(x) for x in ('data-ref-id', 'data-acid', 'data-pid', 'data-embed'))
if not bcv_id:
continue
entries.append(self.url_result(
'ooyala:' + ooyala_id, 'Ooyala',
ooyala_id, player_attr.get('data-label')))
return self.playlist_result(entries)
if film_only and player_attr.get('data-video-type') != 'film':
continue
bc_url = 'brightcove:new:%s:%s:%s:video:ref:%s' % (
account_id or self._BRIGHTCOVE_ACCOUNT_ID, player_id or 'default', embed or 'default', bcv_id)
yield self.url_result(smuggle_url(
bc_url, {'referrer': url, 'force_videoid': remove_start(bcv_id, 'ref:')}), ie='BrightcoveNew', video_id=video_id)
return self.playlist_result(entries())

View File

@ -340,7 +340,7 @@ class BrightcoveLegacyIE(InfoExtractor):
class BrightcoveNewIE(AdobePassIE):
IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)'
_VALID_URL = r'(?:brightcove:new|(?P<u>https?)):(?(u)//players\.brightcove\.net/)(?P<account_id>\d+)(?(u)/|:)(?P<player_id>[^/]+)(?(u)_|:)(?P<embed>[^/]+)(?(u)/index\.html\?.*|:)(?P<content_type>video|playlist)(?(u)Id=|:)(?P<video_id>\d+|ref:[^&]+)'
_TESTS = [{
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'md5': 'c8100925723840d4b0d243f7025703be',
@ -593,7 +593,7 @@ class BrightcoveNewIE(AdobePassIE):
'ip_blocks': smuggled_data.get('geo_ip_blocks'),
})
account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()[1:]
policy_key_id = '%s_%s' % (account_id, player_id)
policy_key = self._downloader.cache.load('brightcove', policy_key_id)
@ -678,4 +678,4 @@ class BrightcoveNewIE(AdobePassIE):
json_data.get('description'))
return self._parse_brightcove_metadata(
json_data, video_id, headers=headers)
json_data, smuggled_data.get('force_videoid') or video_id, headers=headers)

View File

@ -2402,7 +2402,7 @@ class YoutubeDLError(Exception):
class ExtractorError(YoutubeDLError):
"""Error during info extraction."""
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
""" tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
"""
@ -2421,6 +2421,7 @@ class ExtractorError(YoutubeDLError):
self.exc_info = sys.exc_info() # preserve original exception
self.cause = cause
self.video_id = video_id
self.ie = ie
def format_traceback(self):
if self.traceback is None:
@ -6561,3 +6562,24 @@ def join_nonempty(*values, **kwargs):
if from_dict is not None:
values = (traverse_obj(from_dict, variadic(v)) for v in values)
return delim.join(map(compat_str, filter(None, values)))
class classproperty(object):
"""property access for class methods with optional caching"""
def __new__(cls, *args, **kwargs):
func = args[0] if len(args) > 0 else kwargs.get('func')
if not func:
return functools.partial(cls, *args, **kwargs)
return super(classproperty, cls).__new__(cls)
def __init__(self, func, cache=False):
functools.update_wrapper(self, func)
self.func = func
self._cache = {} if cache else None
def __get__(self, _, cls):
if self._cache is None:
return self.func(cls)
elif cls not in self._cache:
self._cache[cls] = self.func(cls)
return self._cache[cls]