Merge 72db217289 into 4d05f84325

[PalcoMP3] Conform to new linter rule
* no space after @ in decorator
2024-06-27 06:36:32 +08:00 · 2024-06-20 20:03:49 +01:00 · 2024-06-20 20:03:49 +01:00 · 2024-06-20 20:03:49 +01:00 · 2024-06-20 20:03:49 +01:00 · 2024-06-20 20:03:49 +01:00
10 changed files with 360 additions and 205 deletions
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@ -577,9 +577,11 @@ class TestJSInterpreter(unittest.TestCase):
    def test_unary_operators(self):
        jsi = JSInterpreter('function f(){return 2  -  - - 2;}')
        self.assertEqual(jsi.call_function('f'), 0)
-        # fails
+        jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
-        # jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
+        self.assertEqual(jsi.call_function('f'), 0)
-        # self.assertEqual(jsi.call_function('f'), 0)
+        # https://github.com/ytdl-org/youtube-dl/issues/32815
        jsi = JSInterpreter('function f(){return 0  - 7 * - 6;}')
        self.assertEqual(jsi.call_function('f'), 42)
    """ # fails so far
    def test_packed(self):
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -158,6 +158,10 @@ _NSIG_TESTS = [
        'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
        '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
    ),
    (
        'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
        '1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
    ),
 ]
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -3033,7 +3033,6 @@ class InfoExtractor(object):
            transform_source=transform_source, default=None)
    def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
        # allow passing `transform_source` through to _find_jwplayer_data()
        transform_source = kwargs.pop('transform_source', None)
        kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -413,8 +413,6 @@ from .foxnews import (
    FoxNewsArticleIE,
 )
 from .foxsports import FoxSportsIE
 from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
 from .francetv import (
    FranceTVIE,
    FranceTVSiteIE,
@ -1011,7 +1009,11 @@ from .radiocanada import (
 from .radiode import RadioDeIE
 from .radiojavan import RadioJavanIE
 from .radiobremen import RadioBremenIE
-from .radiofrance import RadioFranceIE
+from .radiofrance import (
    RadioFrancePodcastEpisodeIE,
    RadioFrancePodcastPlaylistIE,
    RadioFranceWebradioIE,
 )
 from .rai import (
    RaiPlayIE,
    RaiPlayLiveIE,
--- a/youtube_dl/extractor/franceculture.py
+++ b/youtube_dl/extractor/franceculture.py
@ -1,73 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    extract_attributes,
    int_or_none,
 )
 class FranceCultureIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
        'info_dict': {
            'id': 'rendez-vous-au-pays-des-geeks',
            'display_id': 'rendez-vous-au-pays-des-geeks',
            'ext': 'mp3',
            'title': 'Rendez-vous au pays des geeks',
            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20140301',
            'timestamp': 1393700400,
            'vcodec': 'none',
        }
    }, {
        # no thumbnail
        'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        video_data = extract_attributes(self._search_regex(
            r'''(?sx)
                (?:
                    </h1>|
                    <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
                ).*?
                (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
            ''',
            webpage, 'video data'))
        video_url = video_data.get('data-url') or video_data['data-asset-source']
        title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
        description = self._html_search_regex(
            r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
            webpage, 'description', default=None)
        thumbnail = self._search_regex(
            r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
            webpage, 'thumbnail', default=None)
        uploader = self._html_search_regex(
            r'(?s)<span class="author">(.*?)</span>',
            webpage, 'uploader', default=None)
        ext = determine_ext(video_url.lower())
        return {
            'id': display_id,
            'display_id': display_id,
            'url': video_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'ext': ext,
            'vcodec': 'none' if ext == 'mp3' else None,
            'uploader': uploader,
            'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
            'duration': int_or_none(video_data.get('data-duration')),
        }
--- a/youtube_dl/extractor/franceinter.py
+++ b/youtube_dl/extractor/franceinter.py
@ -1,59 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import month_by_name
 class FranceInterIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
    _TEST = {
        'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
        'md5': '9e54d7bdb6fdc02a841007f8a975c094',
        'info_dict': {
            'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
            'ext': 'mp3',
            'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
            'description': 'md5:401969c5d318c061f86bda1fa359292b',
            'thumbnail': r're:^https?://.*\.jpg',
            'upload_date': '20160907',
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        video_url = self._search_regex(
            r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
            webpage, 'video url', group='url')
        title = self._og_search_title(webpage)
        description = self._og_search_description(webpage)
        thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
        upload_date_str = self._search_regex(
            r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
            webpage, 'upload date', fatal=False)
        if upload_date_str:
            upload_date_list = upload_date_str.split()
            upload_date_list.reverse()
            upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
            upload_date_list[2] = '%02d' % int(upload_date_list[2])
            upload_date = ''.join(upload_date_list)
        else:
            upload_date = None
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'formats': [{
                'url': video_url,
                'vcodec': 'none',
            }],
        }
--- a/youtube_dl/extractor/palcomp3.py
+++ b/youtube_dl/extractor/palcomp3.py
@ -8,7 +8,7 @@ from ..compat import compat_str
 from ..utils import (
    int_or_none,
    str_or_none,
-    try_get,
+    traverse_obj,
 )
@ -109,7 +109,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
    }
    name'''
-    @ classmethod
+    @classmethod
    def suitable(cls, url):
        return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
@ -118,7 +118,8 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
        artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
        def entries():
-            for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
+            for music in traverse_obj(artist, (
                    'musics', 'nodes', lambda _, m: m['musicID'])):
                yield self._parse_music(music)
        return self.playlist_result(
@ -137,7 +138,7 @@ class PalcoMP3VideoIE(PalcoMP3BaseIE):
            'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
            'description': 'md5:7043342c09a224598e93546e98e49282',
            'upload_date': '20161107',
-            'uploader_id': 'maiaramaraisaoficial',
+            'uploader_id': '@maiaramaraisaoficial',
            'uploader': 'Maiara e Maraisa',
        }
    }]
--- a/youtube_dl/extractor/radiofrance.py
+++ b/youtube_dl/extractor/radiofrance.py
@ -4,56 +4,284 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    get_element_by_attribute,
    int_or_none,
    parse_iso8601,
    strip_or_none,
    url_or_none
 )
-class RadioFranceIE(InfoExtractor):
+class RadioFranceBaseIE(InfoExtractor):
-    _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
+    _BASE_URL = r'https://www.radiofrance.fr/'
    IE_NAME = 'radiofrance'
-    _TEST = {
+    def extract_api_data(self, api_path, id, html):
-        'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
+        pattern = r'<script [^>]*sveltekit:data-url="https://www\.radiofrance\.fr/api/v[\d.]+/%s[^>]*>(?P<json>.*)</script>' % api_path
-        'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
+        json = self._search_regex(pattern, html, 'API data', flags=re.DOTALL, group='json')
-        'info_dict': {
+
-            'id': 'one-one',
+        if not json:
-            'ext': 'ogg',
+            raise ExtractorError('%s: JSON data not found' % id)
-            'title': 'One to one',
+
-            'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
+        try:
-            'uploader': 'Thomas Hercouët',
+            json = self._parse_json(json, id)
-        },
+            json = self._parse_json(json['body'], id)
            if api_path == 'path':
                return json['content']
            elif api_path == 'stations':
                return json
            else:
                raise ExtractorError('Coding error')
        except KeyError:
            raise ExtractorError('%s: Invalid JSON' % id)
    def get_title(self, api_data, webpage=None):
        title = strip_or_none(api_data.get('title'))
        if not title and webpage:
            title = strip_or_none(get_element_by_attribute('h1', None, webpage, False)) or strip_or_none(self._og_search_title(webpage))
        return title
    def get_description(self, api_data, webpage=None):
        description = strip_or_none(api_data.get('standFirst'))
        if not description and webpage:
            description = strip_or_none(self._og_search_description(webpage))
        return description
    def get_thumbnail(self, api_data, webpage=None):
        thumbnail = None
        visual = api_data.get('visual')
        if visual:
            thumbnail = url_or_none(visual.get('src'))
        if not thumbnail and webpage:
            thumbnail = self._og_search_thumbnail(webpage)
        return thumbnail
    def get_timestamp(self, api_data, webpage=None):
        timestamp = api_data.get('publishedDate')
        if not timestamp and webpage:
            timestamp = parse_iso8601(self._html_search_meta('article:published_time', webpage, 'publication time', ))
        return timestamp
    def get_brand(self, api_data, webpage=None):
        brand = strip_or_none(api_data.get('brand'))
        if not brand and webpage:
            brand = self._og_search_property('site_name', webpage, 'Station name', fatal=False)
        return brand
    def extract_episode(self, episode_id, api_data):
        manifestations = api_data.get('manifestations')
        if manifestations is None or len(manifestations) == 0:
            return None, None
        url = url_or_none(manifestations[0]['url'])
        duration = int_or_none(manifestations[0].get('duration'))
        return url, duration
    def get_playlist_entries(self, playlist_url, playlist_id, api_data, direction):
        playlist_data = api_data['expressions']
        entries = []
        items = playlist_data.get('items')
        for item in items:
            episode_path = item.get('path')
            if episode_path is None:
                self.report_warning('No path found for episode "%s"', item.get('title'))
                continue
            episode_id = RadioFrancePodcastEpisodeIE._match_id(self._BASE_URL + episode_path)
            if episode_id is None:
                self.report_warning('Could not parse id of episode from path: "%s"' % episode_path)
                continue
            episode_url, duration = self.extract_episode(episode_id, item)
            if episode_url is None:
                self.to_screen('Episode "%s" is not available' % episode_path)
                continue
            entry = {
                'id': episode_id,
                'url': episode_url,
                'title': self.get_title(item),
                'description': self.get_description(item),
                'timestamp': self.get_timestamp(item),
                'thumbnail': self.get_thumbnail(item),
                'duration': duration,
            }
            entries.append(entry)
        page_number = int_or_none(playlist_data.get('pageNumber'))
        if page_number:
            if direction in ['both', 'prev'] and playlist_data.get('prev') is not None:
                webpage, other_api_data = self.get_data(playlist_url, 'path', playlist_id, page=page_number - 1)
                entries = self.get_playlist_entries(playlist_url, playlist_id, other_api_data, direction='prev') + entries
            if direction in ['both', 'next'] and playlist_data.get('next') is not None:
                webpage, other_api_data = self.get_data(playlist_url, 'path', playlist_id, page=page_number + 1)
                entries = entries + self.get_playlist_entries(playlist_url, playlist_id, other_api_data, direction='next')
        return entries
    def get_data(self, url, api_path, id, page=None):
        query = {}
        note = None
        if page:
            query['p'] = page
            note = "Downloading page %i" % page
        webpage = self._download_webpage(url, id, query=query, note=note)
        api_data = self.extract_api_data(api_path, id, webpage)
        return webpage, api_data
 class RadioFrancePodcastEpisodeIE(RadioFranceBaseIE):
    _VALID_URL = r'https?://www\.radiofrance\.fr/(?:francemusique|franceinter|franceculture|franceinfo|mouv|fip)/podcasts/.+/.+-(?P<id>\d+)$'
    _TESTS = [{
        'note': 'Podcast episode with audio from France Info',
        'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-brief-eco/le-brief-eco-du-lundi-05-septembre-2022-8310713',
        'info_dict': {
            'id': '8310713',
            'ext': 'mp3',
            'url': r're:^https?://.*\.mp3$',
            'title': 'Pour la première fois en vingt ans, l’euro passe sous les 0,99\u00a0dollar',
            'description': str,
            'thumbnail': r're:^https?://.*\.jpg$',
            'timestamp': int,
            'duration': int,
            'upload_date': str
        }
    }, {
        'note': 'Podcast episode from France Musique',
        'url': 'https://www.radiofrance.fr/francemusique/podcasts/allegretto/lever-du-jour-9233228',
        'only_matching': True
    }, {
        'note': 'Podcast episode from FranceInter',
        'url': 'https://www.radiofrance.fr/franceinter/podcasts/rendez-vous-avec-x/un-mysterieux-echange-digne-de-la-guerre-froide-9343281',
        'only_matching': True
    }, {
        'note': 'Podcast episode from France Culture',
        'url': 'https://www.radiofrance.fr/franceculture/podcasts/la-science-cqfd/teotihuacan-la-plus-mysterieuse-des-cites-d-or-9224610',
        'only_matching': True
    }, {
        'note': 'Podcast episode from Le Mouv',
        'url': 'https://www.radiofrance.fr/mouv/podcasts/mouv-dj-la-caution/ncr2a-ne-cherche-rien-d-autre-ailleurs-1197950',
        'only_matching': True
    }, {
        'note': 'Podcast episode from FIP',
        'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip/hommage-au-cinema-de-vangelis-4734742',
        'only_matching': True
    }]
    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
+        id = self._match_id(url)
-        video_id = m.group('id')
+        webpage, api_data = self.get_data(url, 'path', id)
-
+        url, duration = self.extract_episode(id, api_data)
-        webpage = self._download_webpage(url, video_id)
+        if url is None:
-        title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
+            msg = 'Podcast file is not available. If the show is too recent, the file may not have been uploaded yet: try again later.'
-        description = self._html_search_regex(
+            raise ExtractorError(msg, expected=True, video_id=id)
            r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
            webpage, 'description', fatal=False)
        uploader = self._html_search_regex(
            r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
            webpage, 'uploader', fatal=False)
        formats_str = self._html_search_regex(
            r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
            webpage, 'audio URLs')
        formats = [
            {
                'format_id': fm[0],
                'url': fm[1],
                'vcodec': 'none',
                'preference': i,
            }
            for i, fm in
            enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
        ]
        self._sort_formats(formats)
        return {
-            'id': video_id,
+            'id': id,
-            'title': title,
+            'url': url,
-            'formats': formats,
+            'title': self.get_title(api_data, webpage),
-            'description': description,
+            'description': self.get_description(api_data, webpage),
-            'uploader': uploader,
+            'timestamp': self.get_timestamp(api_data, webpage),
            'thumbnail': self.get_thumbnail(api_data, webpage),
            'channel_id': self.get_brand(api_data, webpage),
            'duration': duration,
        }
 class RadioFrancePodcastPlaylistIE(RadioFranceBaseIE):
    _VALID_URL = r'https?://www\.radiofrance\.fr/(?:francemusique|franceinter|franceculture|franceinfo|mouv|fip)/podcasts/(?P<id>[^/]+?)(?:[?#].*)?$'
    _TESTS = [{
        'note': 'Podcast show with multiple pages of episodes and some of them are missing',
        'url': 'https://www.radiofrance.fr/franceculture/podcasts/une-semaine-dans-le-monde-10-11?p=2',
        'info_dict': {
            'id': 'une-semaine-dans-le-monde-10-11',
            'title': 'Une semaine dans le monde | 10-11',
            'description': str,
            'timestamp': int
        },
        'playlist_count': 23,
    }]
    def _real_extract(self, url):
        id = self._match_id(url)
        webpage, api_data = self.get_data(url, 'path', id)
        entries = self.get_playlist_entries(url, id, api_data, direction='both')
        entries.reverse()
        return {
            'id': id,
            '_type': 'playlist',
            'entries': entries,
            'title': self.get_title(api_data, webpage),
            'description': self.get_description(api_data, webpage),
            'timestamp': self.get_timestamp(api_data, webpage),
            'thumbnail': self.get_thumbnail(api_data, webpage),
            'channel_id': self.get_brand(api_data, webpage),
        }
 class RadioFranceWebradioIE(RadioFranceBaseIE):
    _VALID_URL = r'https?://www\.radiofrance\.fr/(?:francemusique|franceinter|franceculture|franceinfo|mouv|fip)/(?P<id>radio-[^/]+)$'
    _TESTS = [{
        'note': 'Full list of webradios available at https://www.radiofrance.fr/ecouter-musique',
        'url': 'https://www.radiofrance.fr/fip/radio-metal',
        'info_dict': {
            'id': 'radio-metal',
            'ext': 'aac',
            'title': str,
        },
        'params': {
            'format': 'aac',
            'skip_download': True,
        }
    }]
    def get_livestream_formats(self, id, api_data):
        sources = api_data['media']['sources']
        formats = []
        for source in sources:
            url = source.get('url')
            if not url:
                continue
            format_id = source.get('format')
            format = {
                'url': url,
                'format_id': format_id,
                'asr': 48000,
                'vcodec': 'none'
            }
            if format_id == 'mp3':
                format['preference'] = 1
                format['acodec'] = 'mp3'
                format['abr'] = source.get('bitrate')
            elif format_id == 'aac':
                format['preference'] = 2
                format['acodec'] = 'aac'
                format['abr'] = source.get('bitrate')
            elif format_id == 'hls':
                format['preference'] = 0
                format['manifest_url'] = url
            formats.append(format)
        if len(formats) == 0:
            raise ExtractorError('No live streaming URL found')
        return formats
    def _real_extract(self, url):
        id = self._match_id(url)
        webpage, api_data = self.get_data(url, 'stations', id)
        return {
            'id': id,
            'title': self.get_title(api_data, webpage),
            'formats': self.get_livestream_formats(id, api_data),
            'thumbnail': self.get_thumbnail(api_data, webpage),
            'channel_id': self.get_brand(api_data, webpage),
            'is_live': True
        }
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@ -14,6 +14,7 @@ from .utils import (
    remove_quotes,
    unified_timestamp,
    variadic,
    write_string,
 )
 from .compat import (
    compat_basestring,
@ -53,15 +54,16 @@ def wraps_op(op):
 # NB In principle NaN cannot be checked by membership.
 # Here all NaN values are actually this one, so _NaN is _NaN,
-# although _NaN != _NaN.
+# although _NaN != _NaN. Ditto Infinity.
 _NaN = float('nan')
 _Infinity = float('inf')
 def _js_bit_op(op):
    def zeroise(x):
-        return 0 if x in (None, JS_Undefined, _NaN) else x
+        return 0 if x in (None, JS_Undefined, _NaN, _Infinity) else x
    @wraps_op(op)
    def wrapped(a, b):
@ -84,7 +86,7 @@ def _js_arith_op(op):
 def _js_div(a, b):
    if JS_Undefined in (a, b) or not (a or b):
        return _NaN
-    return operator.truediv(a or 0, b) if b else float('inf')
+    return operator.truediv(a or 0, b) if b else _Infinity
 def _js_mod(a, b):
@ -220,6 +222,42 @@ class LocalNameSpace(ChainMap):
        return 'LocalNameSpace%s' % (self.maps, )
 class Debugger(object):
    ENABLED = False
    @staticmethod
    def write(*args, **kwargs):
        level = kwargs.get('level', 100)
        def truncate_string(s, left, right=0):
            if s is None or len(s) <= left + right:
                return s
            return '...'.join((s[:left - 3], s[-right:] if right else ''))
        write_string('[debug] JS: {0}{1}\n'.format(
            '  ' * (100 - level),
            ' '.join(truncate_string(compat_str(x), 50, 50) for x in args)))
    @classmethod
    def wrap_interpreter(cls, f):
        def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
            if cls.ENABLED and stmt.strip():
                cls.write(stmt, level=allow_recursion)
            try:
                ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
            except Exception as e:
                if cls.ENABLED:
                    if isinstance(e, ExtractorError):
                        e = e.orig_msg
                    cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
                raise
            if cls.ENABLED and stmt.strip():
                if should_ret or not repr(ret) == stmt:
                    cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
            return ret, should_ret
        return interpret_statement
 class JSInterpreter(object):
    __named_object_counter = 0
@ -307,8 +345,7 @@ class JSInterpreter(object):
    def __op_chars(cls):
        op_chars = set(';,[')
        for op in cls._all_operators():
-            for c in op[0]:
+            op_chars.update(op[0])
                op_chars.add(c)
        return op_chars
    def _named_object(self, namespace, obj):
@ -326,9 +363,8 @@ class JSInterpreter(object):
        # collections.Counter() is ~10% slower in both 2.7 and 3.9
        counters = dict((k, 0) for k in _MATCHING_PARENS.values())
        start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
-        in_quote, escaping, skipping = None, False, 0
+        in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
-        after_op, in_regex_char_group = True, False
+        skipping = 0
        for idx, char in enumerate(expr):
            paren_delta = 0
            if not in_quote:
@ -382,10 +418,12 @@ class JSInterpreter(object):
        return separated[0][1:].strip(), separated[1].strip()
    @staticmethod
-    def _all_operators():
+    def _all_operators(_cached=[]):
-        return itertools.chain(
+        if not _cached:
            _cached.extend(itertools.chain(
                # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
-            _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)
+                _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
        return _cached
    def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
        if op in ('||', '&&'):
@ -416,7 +454,7 @@ class JSInterpreter(object):
        except Exception as e:
            if allow_undefined:
                return JS_Undefined
-            raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e)
+            raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
    def _dump(self, obj, namespace):
        try:
@ -438,6 +476,7 @@ class JSInterpreter(object):
    _FINALLY_RE = re.compile(r'finally\s*\{')
    _SWITCH_RE = re.compile(r'switch\s*\(')
    @Debugger.wrap_interpreter
    def interpret_statement(self, stmt, local_vars, allow_recursion=100):
        if allow_recursion < 0:
            raise self.Exception('Recursion limit reached')
@ -511,7 +550,6 @@ class JSInterpreter(object):
                expr = self._dump(inner, local_vars) + outer
        if expr.startswith('('):
            m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
            if m:
                # short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
@ -693,7 +731,7 @@ class JSInterpreter(object):
                (?P<op>{_OPERATOR_RE})?
                =(?!=)(?P<expr>.*)$
            )|(?P<return>
-                (?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
+                (?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
            )|(?P<indexing>
                (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
            )|(?P<attribute>
@ -727,11 +765,12 @@ class JSInterpreter(object):
            raise JS_Break()
        elif expr == 'continue':
            raise JS_Continue()
        elif expr == 'undefined':
            return JS_Undefined, should_return
        elif expr == 'NaN':
            return _NaN, should_return
        elif expr == 'Infinity':
            return _Infinity, should_return
        elif md.get('return'):
            return local_vars[m.group('name')], should_return
@ -760,17 +799,27 @@ class JSInterpreter(object):
            right_expr = separated.pop()
            # handle operators that are both unary and binary, minimal BODMAS
            if op in ('+', '-'):
                # simplify/adjust consecutive instances of these operators
                undone = 0
                while len(separated) > 1 and not separated[-1].strip():
                    undone += 1
                    separated.pop()
                if op == '-' and undone % 2 != 0:
                    right_expr = op + right_expr
                elif op == '+':
                    while len(separated) > 1 and separated[-1].strip() in self.OP_CHARS:
                        right_expr = separated.pop() + right_expr
                # hanging op at end of left => unary + (strip) or - (push right)
                left_val = separated[-1]
                for dm_op in ('*', '%', '/', '**'):
                    bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
                    if len(bodmas) > 1 and not bodmas[-1].strip():
                        expr = op.join(separated) + op + right_expr
                        if len(separated) > 1:
                            separated.pop()
                            right_expr = op.join((left_val, right_expr))
                        else:
                            separated = [op.join((left_val, right_expr))]
                            right_expr = None
                        break
                if right_expr is None:
@ -797,6 +846,8 @@ class JSInterpreter(object):
            def eval_method():
                if (variable, member) == ('console', 'debug'):
                    if Debugger.ENABLED:
                        Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
                    return
                types = {
                    'String': compat_str,
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -2406,7 +2406,7 @@ class ExtractorError(YoutubeDLError):
        """ tb, if given, is the original traceback (so that it can be printed out).
        If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
        """
-
+        self.orig_msg = msg
        if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
            expected = True
        if video_id is not None:
Author	SHA1	Message	Date
Olivier Trichet	a05e5a4ae1	Merge `72db217289` into `4d05f84325`	2024-06-27 06:36:32 +08:00
dirkf	4d05f84325	[PalcoMP3] Conform to new linter rule * no space after @ in decorator	2024-06-20 20:03:49 +01:00
dirkf	e0094e63c3	[jsinterp] Various tweaks * treat Infinity like NaN * cache operator list	2024-06-20 20:03:49 +01:00
dirkf	fd8242e3ef	[jsinterp] Fix and improve expression parsing * improve BODMAS (fixes https://github.com/ytdl-org/youtube-dl/issues/32815) * support more weird expressions with multiple unary ops	2024-06-20 20:03:49 +01:00
dirkf	ad01fa6cca	[jsinterp] Add Debugger from yt-dlp * https://github.com/yt-dlp/yt-dlp/commit/8f53dc4 * thx pukkandan	2024-06-20 20:03:49 +01:00
dirkf	2eac0fa379	[utils] Save `orig_msg` in `ExtractorError`	2024-06-20 20:03:49 +01:00
Olivier Trichet	72db217289	[RadioFrance] Extractor fo thematic webradios	2022-12-22 14:22:19 -05:00
Olivier Trichet	fc933e686b	[RadioFrance] Refactoring	2022-12-22 13:01:10 -05:00
Olivier Trichet	ea02c40539	[RadioFrance] Extractor for podcast playlists	2022-12-22 13:00:54 -05:00
Olivier Trichet	7270ecf3d6	[RadioFrance] Extractor for podcast of Radio France stations	2022-12-22 13:00:17 -05:00
Olivier Trichet	dade9111f1	[RadioFrance] Remove old Radio France stations extractors These are not working anymore after their respectives websites were merged into www.radiofrance.fr.	2022-12-22 13:00:08 -05:00