From 9112e668a5ea6376017718db9ff13b369d53ad7a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 22 Jun 2023 13:23:31 +0530 Subject: [PATCH 1/3] [YouTube] Improve nsig function name extraction Fixes player b7910ca8, using `,` vs `;` See https://github.com/ytdl-org/youtube-dl/issues/32292#issuecomment-1602231170 Co-authored-by: dirkf --- test/test_youtube_signature.py | 11 +++-------- youtube_dl/extractor/youtube.py | 19 +++++++++++++------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 4ba586e53..5dcabaf95 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -63,11 +63,6 @@ _SIG_TESTS = [ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', - ), - ( - 'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', ) ] @@ -157,8 +152,8 @@ _NSIG_TESTS = [ 'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ', ), ( - 'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js', - 'E2AQVN6y_zM7uN9w8z', '9A2dbY5GDZrt9A', + 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js', + '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ', ), ] @@ -236,7 +231,7 @@ def n_sig(jscode, sig_input): make_sig_test = t_factory( - 'signature', signature, re.compile(r'(?s).*(?:-|/player/)(?P[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$')) + 'signature', signature, re.compile(r'.*-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$')) for test_spec in _SIG_TESTS: make_sig_test(*test_spec) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1855fca7f..24e2efbd9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1623,15 +1623,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx') if not idx: return nfunc + + VAR_RE_TMPL = r'var\s+%s\s*=\s*(?P\[(?P%s)\])[;,]' + note = 'Initial JS player n function {0} (%s[%s])' % (nfunc, idx) + + def search_function_code(needle, group): + return self._search_regex( + VAR_RE_TMPL % (re.escape(nfunc), needle), jscode, + note.format(group), group=group) + if int_or_none(idx) == 0: - real_nfunc = self._search_regex( - r'var %s\s*=\s*\[([a-zA-Z_$][\w$]*)\];' % (re.escape(nfunc), ), jscode, - 'Initial JS player n function alias ({nfunc}[{idx}])'.format(**locals())) + real_nfunc = search_function_code(r'[a-zA-Z_$][\w$]*', group='alias') if real_nfunc: return real_nfunc - return self._parse_json(self._search_regex( - r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode, - 'Initial JS player n function name ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)] + return self._parse_json( + search_function_code('.+?', group='name'), + nfunc, transform_source=js_to_json)[int(idx)] def _extract_n_function(self, video_id, player_url): player_id = self._extract_player_info(player_url) From ebdc82c58684b4e202fabc046f9a40fc73cccde5 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 22 Jun 2023 17:24:48 +0100 Subject: [PATCH 2/3] [workflows/ci.yml] Replace actions/setup-python for legacy Pythons Thanks MatteoH2O1999: https://github.com/MatteoH2O1999/setup-python --- .github/workflows/ci.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 51abdce1d..9be4eaa89 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,10 +38,12 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up supported Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - if: ${{ matrix.python-impl == 'cpython' && ! contains(fromJSON('["3.3", "3.4"]'), matrix.python-version) }} + # wrap broken actions/setup-python@v4 + uses: ytdl-org/setup-python@v1 with: python-version: ${{ matrix.python-version }} + cache-build: true + allow-build: info - name: Set up Java 8 if: ${{ matrix.python-impl == 'jython' }} uses: actions/setup-java@v2 From fa7f0effbe4e14fcf70e1dc4496371c9862b64b9 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 22 Jun 2023 23:10:04 +0100 Subject: [PATCH 3/3] [YouTube] Avoid crash in author extraction --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 24e2efbd9..9c419c002 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -448,7 +448,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): extract_attributes(self._search_regex( r'''(?s)(]+\bitemprop\s*=\s*("|')%s\2[^>]*>)''' % re.escape(var_name), - get_element_by_attribute('itemprop', 'author', webpage) or '', + get_element_by_attribute('itemprop', 'author', webpage or '') or '', 'author link', default='')), paths[var_name][0])