From 9bbe366275a051291b6943bb8b8ebdced851c247 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 16 Apr 2023 19:39:10 +0100
Subject: [PATCH 1/4] [BrightcoveNew] Support `brightcove:new:` pseudo-URL
 scheme

* scheme (`content_type` is `video` or `playlist`):
brightcove:new:{account_id}:{player_id}:{embed}:{content_type}:{conte>
* also support smuggled `force_videoid` to pass desired resulting video_id
---
 youtube_dl/extractor/brightcove.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 6022076ac..ad1f86b89 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -340,7 +340,7 @@ class BrightcoveLegacyIE(InfoExtractor):
 
 class BrightcoveNewIE(AdobePassIE):
     IE_NAME = 'brightcove:new'
-    _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)'
+    _VALID_URL = r'(?:brightcove:new|(?P<u>https?)):(?(u)//players\.brightcove\.net/)(?P<account_id>\d+)(?(u)/|:)(?P<player_id>[^/]+)(?(u)_|:)(?P<embed>[^/]+)(?(u)/index\.html\?.*|:)(?P<content_type>video|playlist)(?(u)Id=|:)(?P<video_id>\d+|ref:[^&]+)'
     _TESTS = [{
         'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
         'md5': 'c8100925723840d4b0d243f7025703be',
@@ -593,7 +593,7 @@ class BrightcoveNewIE(AdobePassIE):
             'ip_blocks': smuggled_data.get('geo_ip_blocks'),
         })
 
-        account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
+        account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()[1:]
 
         policy_key_id = '%s_%s' % (account_id, player_id)
         policy_key = self._downloader.cache.load('brightcove', policy_key_id)
@@ -678,4 +678,4 @@ class BrightcoveNewIE(AdobePassIE):
                 json_data.get('description'))
 
         return self._parse_brightcove_metadata(
-            json_data, video_id, headers=headers)
+            json_data, smuggled_data.get('force_videoid') or video_id, headers=headers)

From dc990a61cc4d08005e1266281df75c466eedf514 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 16 Apr 2023 21:20:07 +0100
Subject: [PATCH 2/4] [compat] Make parse_qs[l] match Py3.10 for Py>=2.6

* support only default separator '&', not r'[&;]', like 3.10+
* support max_num_fields, like 3.8+
---
 youtube_dl/compat.py | 114 +++++++++++++++++++++++++------------------
 1 file changed, 67 insertions(+), 47 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index fe62caf80..6a2fb28cb 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2398,12 +2398,12 @@ try:
 except ImportError:
     import BaseHTTPServer as compat_http_server
 
+# urllib.parse
 try:
     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
     from urllib.parse import unquote as compat_urllib_parse_unquote
     from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
     from urllib.parse import urlencode as compat_urllib_parse_urlencode
-    from urllib.parse import parse_qs as compat_parse_qs
 except ImportError:  # Python 2
     _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
                 else re.compile(r'([\x00-\x7f]+)'))
@@ -2493,60 +2493,80 @@ except ImportError:  # Python 2
 
         return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
 
-    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
-    # Python 2's version is apparently totally broken
-    def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
-                   encoding='utf-8', errors='replace'):
-        qs, _coerce_result = qs, compat_str
-        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
-        r = []
-        for name_value in pairs:
-            if not name_value and not strict_parsing:
-                continue
-            nv = name_value.split('=', 1)
-            if len(nv) != 2:
-                if strict_parsing:
-                    raise ValueError('bad query field: %r' % (name_value,))
-                # Handle case of a control-name with no equal sign
-                if keep_blank_values:
-                    nv.append('')
-                else:
-                    continue
-            if len(nv[1]) or keep_blank_values:
-                name = nv[0].replace('+', ' ')
-                name = compat_urllib_parse_unquote(
-                    name, encoding=encoding, errors=errors)
-                name = _coerce_result(name)
-                value = nv[1].replace('+', ' ')
-                value = compat_urllib_parse_unquote(
-                    value, encoding=encoding, errors=errors)
-                value = _coerce_result(value)
-                r.append((name, value))
-        return r
-
-    def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
-                        encoding='utf-8', errors='replace'):
-        parsed_result = {}
-        pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
-                           encoding=encoding, errors=errors)
-        for name, value in pairs:
-            if name in parsed_result:
-                parsed_result[name].append(value)
-            else:
-                parsed_result[name] = [value]
-        return parsed_result
-
     setattr(compat_urllib_parse, '_urlencode',
             getattr(compat_urllib_parse, 'urlencode'))
     for name, fix in (
             ('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
             ('parse_unquote', compat_urllib_parse_unquote),
             ('unquote_plus', compat_urllib_parse_unquote_plus),
-            ('urlencode', compat_urllib_parse_urlencode),
-            ('parse_qs', compat_parse_qs)):
+            ('urlencode', compat_urllib_parse_urlencode)):
         setattr(compat_urllib_parse, name, fix)
+finally:
+    try:
+        # arguments changed in 3.8 and 3.10
+        from urllib.parse import parse_qs as _parse_qs
+        _parse_qs('a=b', separator='&')
+        compat_parse_qs = _parse_qs
+    except (ImportError, TypeError):  # Python 2, < 3.10
 
-compat_urllib_parse_parse_qs = compat_parse_qs
+        # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+        # Python 2's version is apparently totally broken
+        # Also use this implementation for Py < 3.10
+        # * support only default separator '&', not r'[&;]', like 3.10+
+        # * support max_num_fields, like 3.8+
+        def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+                       encoding='utf-8', errors='replace',
+                       max_num_fields=None, separator='&'):
+            if not isinstance(separator, (compat_str, str)):
+                raise ValueError('Separator must be of type string or bytes')
+            # DoS protection, if anyone cares
+            if qs and max_num_fields is not None and qs.count(separator) >= max_num_fields:
+                raise ValueError('Too many fields')
+            _coerce_result = compat_str
+            r = []
+            for name_value in qs.split(separator):
+                if not name_value and not strict_parsing:
+                    continue
+                nv = name_value.split('=', 1)
+                if len(nv) != 2:
+                    if strict_parsing:
+                        raise ValueError('bad query field: %r' % (name_value,))
+                    # Handle case of a control-name with no equal sign
+                    if keep_blank_values:
+                        nv.append('')
+                    else:
+                        continue
+                if len(nv[1]) or keep_blank_values:
+                    name = nv[0].replace('+', ' ')
+                    name = compat_urllib_parse_unquote(
+                        name, encoding=encoding, errors=errors)
+                    name = _coerce_result(name)
+                    value = nv[1].replace('+', ' ')
+                    value = compat_urllib_parse_unquote(
+                        value, encoding=encoding, errors=errors)
+                    value = _coerce_result(value)
+                    r.append((name, value))
+            return r
+
+        def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+                            encoding='utf-8', errors='replace',
+                            max_num_fields=None, separator='&'):
+            parsed_result = {}
+            pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+                               encoding, errors, max_num_fields, separator)
+            for name, value in pairs:
+                if name in parsed_result:
+                    parsed_result[name].append(value)
+                else:
+                    parsed_result[name] = [value]
+            return parsed_result
+
+        for name, fix in (
+                ('parse_qs', compat_parse_qs),
+                ('parse_qsl', _parse_qsl)):
+            setattr(compat_urllib_parse, name, fix)
+
+    compat_urllib_parse_parse_qs = compat_parse_qs
 
 try:
     from urllib.request import DataHandler as compat_urllib_request_DataHandler

From 74e39ca0fd91ec3d55140cab292eb9ef98e9f3d5 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 16 Apr 2023 21:24:09 +0100
Subject: [PATCH 3/4] [utils] Allow kwargs for `parse_qs()`

* supported by `compat_parse_qs()`:
keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replac>
* now equivalent to yt-dlp
---
 youtube_dl/utils.py | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d80ceb007..716fd36b9 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2388,7 +2388,7 @@ class YoutubeDLError(Exception):
 class ExtractorError(YoutubeDLError):
     """Error during info extraction."""
 
-    def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
+    def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
         """ tb, if given, is the original traceback (so that it can be printed out).
         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
         """
@@ -2407,6 +2407,7 @@ class ExtractorError(YoutubeDLError):
         self.exc_info = sys.exc_info()  # preserve original exception
         self.cause = cause
         self.video_id = video_id
+        self.ie = ie
 
     def format_traceback(self):
         if self.traceback is None:
@@ -4096,8 +4097,8 @@ def escape_url(url):
     ).geturl()
 
 
-def parse_qs(url):
-    return compat_parse_qs(compat_urllib_parse.urlparse(url).query)
+def parse_qs(url, **kwargs):
+    return compat_parse_qs(compat_urllib_parse.urlparse(url).query, **kwargs)
 
 
 def read_batch_urls(batch_fd):
@@ -6191,3 +6192,24 @@ def join_nonempty(*values, **kwargs):
     if from_dict is not None:
         values = (traverse_obj(from_dict, variadic(v)) for v in values)
     return delim.join(map(compat_str, filter(None, values)))
+
+
+class classproperty(object):
+    """property access for class methods with optional caching"""
+    def __new__(cls, *args, **kwargs):
+        func = args[0] if len(args) > 0 else kwargs.get('func')
+        if not func:
+            return functools.partial(cls, *args, **kwargs)
+        return super(classproperty, cls).__new__(cls)
+
+    def __init__(self, func, cache=False):
+        functools.update_wrapper(self, func)
+        self.func = func
+        self._cache = {} if cache else None
+
+    def __get__(self, _, cls):
+        if self._cache is None:
+            return self.func(cls)
+        elif cls not in self._cache:
+            self._cache[cls] = self.func(cls)
+        return self._cache[cls]

From d7b502a7278097f68592dc5f6423141be7c69efb Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 16 Apr 2023 21:25:04 +0100
Subject: [PATCH 4/4] [BFIPlayer] Support Brightcove video host, replacing
 Ooyala

---
 youtube_dl/extractor/bfi.py | 44 ++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/youtube_dl/extractor/bfi.py b/youtube_dl/extractor/bfi.py
index 60c8944b5..cf4512caa 100644
--- a/youtube_dl/extractor/bfi.py
+++ b/youtube_dl/extractor/bfi.py
@@ -4,7 +4,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import extract_attributes
+from ..utils import (
+    extract_attributes,
+    parse_qs,
+    remove_start,
+    smuggle_url,
+)
 
 
 class BFIPlayerIE(InfoExtractor):
@@ -12,26 +17,39 @@ class BFIPlayerIE(InfoExtractor):
     _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
     _TEST = {
         'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online',
-        'md5': 'e8783ebd8e061ec4bc6e9501ed547de8',
+        'md5': '15598bdd6a413ce9363970754f054d76',
         'info_dict': {
             'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63',
             'ext': 'mp4',
             'title': 'Computer Doctor',
             'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b',
+            'timestamp': 1564424975,
+            'upload_date': '20190729',
+            'uploader_id': '6057949427001',
         },
-        'skip': 'BFI Player films cannot be played outside of the UK',
+        # 'skip': 'BFI Player films cannot be played outside of the UK',
     }
+    _BRIGHTCOVE_ACCOUNT_ID = '6057949427001'
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        entries = []
-        for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage):
-            player_attr = extract_attributes(player_el)
-            ooyala_id = player_attr.get('data-video-id')
-            if not ooyala_id:
-                continue
-            entries.append(self.url_result(
-                'ooyala:' + ooyala_id, 'Ooyala',
-                ooyala_id, player_attr.get('data-label')))
-        return self.playlist_result(entries)
+
+        film_only = 'play-film' in parse_qs(url, keep_blank_values=True)
+
+        def entries():
+            for player_el in re.finditer(r'(?s)<video-js\b[^>]+>', webpage):
+                player_attr = extract_attributes(player_el.group(0))
+                bcv_id, account_id, player_id, embed = (
+                    player_attr.get(x) for x in ('data-ref-id', 'data-acid', 'data-pid', 'data-embed'))
+                if not bcv_id:
+                    continue
+                if film_only and player_attr.get('data-video-type') != 'film':
+                    continue
+                bc_url = 'brightcove:new:%s:%s:%s:video:ref:%s' % (
+                    account_id or self._BRIGHTCOVE_ACCOUNT_ID, player_id or 'default', embed or 'default', bcv_id)
+
+                yield self.url_result(smuggle_url(
+                    bc_url, {'referrer': url, 'force_videoid': remove_start(bcv_id, 'ref:')}), ie='BrightcoveNew', video_id=video_id)
+
+        return self.playlist_result(entries())