From ff75c300f52321dc7322e28d1df153cf0ea65a6d Mon Sep 17 00:00:00 2001 From: dirkf Date: Sat, 17 Jun 2023 15:34:11 +0100 Subject: [PATCH 1/4] [jsinterp] Fix test for failed match in extract_object() --- youtube_dl/jsinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 9d4a5bc57..c18c4fef1 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -985,9 +985,9 @@ class JSInterpreter(object): \((?P[^)]*)\)\s* (?P{.+})''' % {'name': re.escape(funcname)}, self.code) - code, _ = self._separate_at_paren(func_m.group('code')) # refine the match if func_m is None: raise self.Exception('Could not find JS function "{funcname}"'.format(**locals())) + code, _ = self._separate_at_paren(func_m.group('code')) # refine the match return self.build_arglist(func_m.group('args')), code def extract_function(self, funcname): From d6433cbb2c4440056a38846e35bb5a3efa9bcac2 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sat, 17 Jun 2023 15:43:10 +0100 Subject: [PATCH 2/4] [jsinterp] Don't find unrelated objects --- youtube_dl/jsinterp.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index c18c4fef1..00f219440 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -941,15 +941,15 @@ class JSInterpreter(object): _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' obj = {} obj_m = re.search( - r'''(?x) - (?(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*) - }\s*; - ''' % (re.escape(objname), _FUNC_NAME_RE), + r'''(?xs) + (?:{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s* + (?P({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*) + }}\s*); + '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE), self.code) - if not obj_m: + fields = obj_m and obj_m.group('fields') + if fields is None: raise self.Exception('Could not find object ' + objname) - fields = obj_m.group('fields') # Currently, it only supports function definitions fields_m = re.finditer( r'''(?x) From ae8ba2c31977b68b75221f80c488c0b12385269c Mon Sep 17 00:00:00 2001 From: dirkf Date: Sat, 17 Jun 2023 15:36:39 +0100 Subject: [PATCH 3/4] [YouTube] Fix `KeyError QV` in signature extraction failed * temporarily force missing global definition into sig JS * improve test: thanks https://github.com/yt-dlp/yt-dlp/issues/7327#issuecomment-1595274615 * resolves #32314 --- test/test_youtube_signature.py | 7 ++++++- youtube_dl/extractor/youtube.py | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index e7bce9d68..4ba586e53 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -63,6 +63,11 @@ _SIG_TESTS = [ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', + ), + ( + 'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js', + '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', ) ] @@ -231,7 +236,7 @@ def n_sig(jscode, sig_input): make_sig_test = t_factory( - 'signature', signature, re.compile(r'.*-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$')) + 'signature', signature, re.compile(r'(?s).*(?:-|/player/)(?P[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$')) for test_spec in _SIG_TESTS: make_sig_test(*test_spec) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0411c49f1..0bbce71a3 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1569,8 +1569,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') - jsi = JSInterpreter(jscode) + # temporary (please) hack for player 6ed0d907 #32314 + ah = 'var AH={LR:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c},QV:function(a){a.reverse()},pO:function(a,b){a.splice(0,b)}};' + jsi = JSInterpreter(ah + jscode) + initial_function = jsi.extract_function(funcname) + return lambda s: initial_function([s]) def _decrypt_signature(self, s, video_id, player_url): From 07af47960f3bb262ead02490ce65c8c45c01741e Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 18 Jun 2023 00:52:18 +0100 Subject: [PATCH 4/4] [YouTube] Improve fix for ae8ba2c Thx: https://github.com/yt-dlp/yt-dlp/commit/01aba25 --- youtube_dl/extractor/youtube.py | 4 +--- youtube_dl/jsinterp.py | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0bbce71a3..1855fca7f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1569,9 +1569,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') - # temporary (please) hack for player 6ed0d907 #32314 - ah = 'var AH={LR:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c},QV:function(a){a.reverse()},pO:function(a,b){a.splice(0,b)}};' - jsi = JSInterpreter(ah + jscode) + jsi = JSInterpreter(jscode) initial_function = jsi.extract_function(funcname) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 00f219440..1ba9c3d67 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -940,15 +940,18 @@ class JSInterpreter(object): def extract_object(self, objname): _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' obj = {} - obj_m = re.search( - r'''(?xs) - (?:{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s* - (?P({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*) - }}\s*); - '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE), - self.code) - fields = obj_m and obj_m.group('fields') - if fields is None: + fields = None + for obj_m in re.finditer( + r'''(?xs) + {0}\s*\.\s*{1}|{1}\s*=\s*\{{\s* + (?P({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*) + }}\s*; + '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE), + self.code): + fields = obj_m.group('fields') + if fields: + break + else: raise self.Exception('Could not find object ' + objname) # Currently, it only supports function definitions fields_m = re.finditer(