Compare commits

...

28 Commits

Author SHA1 Message Date
belamenso
81ae40e442
Merge b6bfdb25f2 into 4d05f84325 2024-06-27 06:36:05 +08:00
dirkf
4d05f84325 [PalcoMP3] Conform to new linter rule
* no space after @ in decorator
2024-06-20 20:03:49 +01:00
dirkf
e0094e63c3 [jsinterp] Various tweaks
* treat Infinity like NaN
* cache operator list
2024-06-20 20:03:49 +01:00
dirkf
fd8242e3ef [jsinterp] Fix and improve expression parsing
* improve BODMAS (fixes https://github.com/ytdl-org/youtube-dl/issues/32815)
* support more weird expressions with multiple unary ops
2024-06-20 20:03:49 +01:00
dirkf
ad01fa6cca [jsinterp] Add Debugger from yt-dlp
* https://github.com/yt-dlp/yt-dlp/commit/8f53dc4
* thx pukkandan
2024-06-20 20:03:49 +01:00
dirkf
2eac0fa379 [utils] Save orig_msg in ExtractorError 2024-06-20 20:03:49 +01:00
belamenso
b6bfdb25f2
Merge branch 'ytdl-org:master' into switchtube 2022-09-19 11:05:33 +02:00
pukkandan
5662dad552 [jsinterp] Workaround operator associativity issue
* temporary fix for player 5a3b6271 [1]

1. https://github.com/yt-dlp/yt-dlp/issues/4635#issuecomment-1235384480
2022-09-19 11:05:04 +02:00
dirkf
c342d50ce8 [cache] Add cache validation by program version, based on yt-dlp 2022-09-19 11:05:04 +02:00
dirkf
b35aee2f97 [jsinterp] Handle new YT players 113ca41c, c57c113c
* add NaN
* allow any white-space character for `after_op`
* align with yt-dlp f26af78a8ac11d9d617ed31ea5282cfaa5bcbcfa (charcodeAt and bitwise overflow)
* allow escaping in regex, fixing player c57c113c
2022-09-19 11:05:04 +02:00
dirkf
bd41d33fc5 [options] Document that postprocessing is not forced by --postprocessor-args
Resolves #30307
2022-09-19 11:05:04 +02:00
dirkf
7b4e2290be [compat] Replace deficient ChainMap class in Py3.3 and earlier
* fix version check
2022-09-19 11:05:04 +02:00
dirkf
cdfc866e64 [compat] Replace deficient ChainMap class in Py3.3 and earlier 2022-09-19 11:05:04 +02:00
dirkf
8601874938 [jsinterp] Improve try/catch/finally support 2022-09-19 11:05:04 +02:00
dirkf
660cd68133 [jsinterp] Fix bug in operator precedence
* from 164b03c486
* added tests
2022-09-19 11:05:04 +02:00
dirkf
5dabc11e92 [YouTube] Improve error check for n-sig processing 2022-09-19 11:05:04 +02:00
dirkf
1b907b46cc [core] Avoid processing empty format list after removing bad formats
* also ensure compat encoding of error strings
2022-09-19 11:05:04 +02:00
dirkf
43aebf1313 [utils] Ensure RFC3986 encoding result is unicode 2022-09-19 11:05:04 +02:00
gudata
722e31467b [infoq] Avoid crash if the page has no mp3Form
* proposed fix for issue #31131, aligns with yt-dlp

Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-09-19 11:05:04 +02:00
dirkf
baa75fca86 [uktvplay] Support domain without .uktv 2022-09-19 11:05:04 +02:00
dirkf
9be9fffb25 [jsinterp] Clean up and pull yt-dlp style
* add compat_re_Pattern
* improve compat_collections_chain_map
* use class JS_Undefined
* remove unused code
2022-09-19 11:05:04 +02:00
dirkf
a63bbcbfcf [jsinterp] Handle regexp literals and throw/catch execution (#31182)
* based on f6ca640b12, thanks pukkandan
* adds parse support for regexp flags
2022-09-19 11:05:04 +02:00
dirkf
1862bf568b [jsinterp] Improve JS language support (#31175)
* operator ??
* operator ?.
* operator **
* accurate operator functions
* `undefined` handling
* object literals {a: 1, "b": expr}
* more tests for weird JS comparisons: see https://github.com/ytdl-org/youtube-dl/issues/31173#issuecomment-1217854397.
2022-09-19 11:05:04 +02:00
dirkf
6b98eb46fc [postprocessor] Don't replace existing value with null metadata parsed from title 2022-09-19 11:05:04 +02:00
dirkf
27281eb5bb [jsinterp] Overhaul JSInterp to handle new YT players 4c3f79c5, 324f67b9 (#31170)
* back-port from yt-dlp 8f53dc44a0cc1c2d98c35740b9293462c080f5d0, thanks pukkandan
* also support void, improve <</>> precedence, improve expressions in comma-list
* add more tests
2022-09-19 11:05:04 +02:00
dirkf
c9234b1cce [core] Make --max-downloads ... stop immediately on reaching the limit
Based on and closes #26638.
2022-09-19 11:05:04 +02:00
dirkf
93fdde431c [test, etc] Improve download test logs; also clean up some new flake8 issues (#31153)
* [test] Identify testcase errors better
* [test] Identify download errors better
* [extractor/minds] Linter
* [extractor/aes] Linter
2022-09-19 11:05:04 +02:00
Bartosz Białas
5b21754197 [SwitchTube] Add new extractor 2022-07-31 20:43:16 +02:00
9 changed files with 310 additions and 26 deletions

View File

@ -910,6 +910,9 @@
- **SVTPage** - **SVTPage**
- **SVTPlay**: SVT Play and Öppet arkiv - **SVTPlay**: SVT Play and Öppet arkiv
- **SVTSeries** - **SVTSeries**
- **switchtube**
- **switchtube:channel**
- **switchtube:profile**
- **SWRMediathek** - **SWRMediathek**
- **Syfy** - **Syfy**
- **SztvHu** - **SztvHu**

View File

@ -577,9 +577,11 @@ class TestJSInterpreter(unittest.TestCase):
def test_unary_operators(self): def test_unary_operators(self):
jsi = JSInterpreter('function f(){return 2 - - - 2;}') jsi = JSInterpreter('function f(){return 2 - - - 2;}')
self.assertEqual(jsi.call_function('f'), 0) self.assertEqual(jsi.call_function('f'), 0)
# fails jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
# jsi = JSInterpreter('function f(){return 2 + - + - - 2;}') self.assertEqual(jsi.call_function('f'), 0)
# self.assertEqual(jsi.call_function('f'), 0) # https://github.com/ytdl-org/youtube-dl/issues/32815
jsi = JSInterpreter('function f(){return 0 - 7 * - 6;}')
self.assertEqual(jsi.call_function('f'), 42)
""" # fails so far """ # fails so far
def test_packed(self): def test_packed(self):

View File

@ -158,6 +158,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ', '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
), ),
(
'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
'1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
),
] ]

View File

@ -3033,7 +3033,6 @@ class InfoExtractor(object):
transform_source=transform_source, default=None) transform_source=transform_source, default=None)
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
# allow passing `transform_source` through to _find_jwplayer_data() # allow passing `transform_source` through to _find_jwplayer_data()
transform_source = kwargs.pop('transform_source', None) transform_source = kwargs.pop('transform_source', None)
kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {} kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}

View File

@ -1235,6 +1235,11 @@ from .svt import (
SVTPlayIE, SVTPlayIE,
SVTSeriesIE, SVTSeriesIE,
) )
from .switchtube import (
SwitchTubeIE,
SwitchTubeProfileIE,
SwitchTubeChannelIE,
)
from .swrmediathek import SWRMediathekIE from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE from .syfy import SyfyIE
from .sztvhu import SztvHuIE from .sztvhu import SztvHuIE

View File

@ -8,7 +8,7 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
str_or_none, str_or_none,
try_get, traverse_obj,
) )
@ -109,7 +109,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
} }
name''' name'''
@ classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url) return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
@ -118,7 +118,8 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist'] artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
def entries(): def entries():
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []): for music in traverse_obj(artist, (
'musics', 'nodes', lambda _, m: m['musicID'])):
yield self._parse_music(music) yield self._parse_music(music)
return self.playlist_result( return self.playlist_result(
@ -137,7 +138,7 @@ class PalcoMP3VideoIE(PalcoMP3BaseIE):
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande', 'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
'description': 'md5:7043342c09a224598e93546e98e49282', 'description': 'md5:7043342c09a224598e93546e98e49282',
'upload_date': '20161107', 'upload_date': '20161107',
'uploader_id': 'maiaramaraisaoficial', 'uploader_id': '@maiaramaraisaoficial',
'uploader': 'Maiara e Maraisa', 'uploader': 'Maiara e Maraisa',
} }
}] }]

View File

@ -0,0 +1,219 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
extract_attributes,
float_or_none,
get_element_by_attribute,
int_or_none,
parse_iso8601,
strip_or_none,
urljoin,
)
class SwitchTubeIE(InfoExtractor):
_VALID_URL = r'https?://tube\.switch\.ch/videos/(?P<id>[\da-zA-Z]+)'
IE_NAME = 'switchtube'
_TESTS = [{
'url': 'https://tube.switch.ch/videos/0T1XfaIFSX',
'info_dict': {
'id': '0T1XfaIFSX',
'title': '2016_ASE_sqC03-Entretien',
'channel': 'ASE Assistant-e socio-éducatif-ve CFC',
'channel_url': 'https://tube.switch.ch/channels/bsaer76yoL',
'channel_id': 'bsaer76yoL',
'ext': 'mp4',
'description': None,
'thumbnail': r're:^https?://tube.switch.ch/image/representations/[\w-]+$',
'license': 'All rights reserved',
'creator': 'Jean-Marc Pouly from Eidgenössische Hochschule für Berufsbildung',
'uploader': 'Jean-Marc Pouly from Eidgenössische Hochschule für Berufsbildung',
'uploader_url': 'https://tube.switch.ch/profiles/42481',
'uploader_id': '42481',
'upload_date': '20220309',
'timestamp': 1646839068,
}
}, {
'url': 'https://tube.switch.ch/videos/0cf3886d',
'info_dict': {
'id': '0cf3886d',
'ext': 'mp4',
'title': 'Introduction: Mini-Batches in On- and Off-Policy Deep Reinforcement Learning',
'license': 'All rights reserved',
'description': 'One of the challenges in Deep Reinforcement Learning is to decorrelate the data. How this is possible with replay buffers is explained here.',
'thumbnail': r're:^https?://tube.switch.ch/image/representations/[\w-]+$',
'channel': 'CS-456 Artificial Neural Networks',
'channel_url': 'https://tube.switch.ch/channels/1deb03e0',
'channel_id': '1deb03e0',
'timestamp': 1590733406,
'upload_date': '20200529',
'creator': 'Wulfram Gerstner from École polytechnique fédérale de Lausanne (EPFL)',
'uploader': 'Wulfram Gerstner from École polytechnique fédérale de Lausanne (EPFL)',
'uploader_url': 'https://tube.switch.ch/profiles/94248',
'uploader_id': '94248',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title>', webpage, 'title')
info = {
'id': video_id,
'title': title,
'is_live': False,
}
info['view_count'] = int_or_none(self._html_search_regex(r'(?s)<p\b[^>]*>\s*(\d+)\s+views?\s*</p>', webpage,
'view_count', default=None))
info['license'] = self._html_search_regex(r'''(?s)<span\b[^>]*?\bproperty\s*=\s*["'](?:\b[cd]c:license\b\s*){2,}[^>]+>(.+)</span>''',
webpage, 'license', default=None)
info['description'] = strip_or_none(clean_html(
get_element_by_attribute('property', 'dc:description', webpage)))
info['duration'] = float_or_none(
self._search_regex(r'''\bdata-duration\s*=\s*["']([\d.]+)''',
webpage, 'duration', default=None))
channel_groups = self._search_regex(r'''(?s)<a\b[^>]+?\bhref\s*=\s*["']/channels/(.{4,}?)</a>''',
webpage, 'channel groups', default='')
channel_groups = re.split(r'''(?s)["'][^>]*>\s*''', channel_groups, 1)
if len(channel_groups) == 2:
for i, ch in enumerate(('channel_id', 'channel')):
info[ch] = strip_or_none(channel_groups[i])
if info['channel_id']:
info['channel_url'] = 'https://tube.switch.ch/channels/' + info['channel_id']
def outer_elements_by_attribute(attr, value, html, tag=None, escape_value=True, include_inner=False):
"""generate matching HTML element strings
if include_inner, tuples of (element, content)"""
pattern = r'''(?s)(?P<element><(%s)\b[^>]+?\b%s\s*=\s*("|')%s\b[^>]+>)(?P<inner>.*?)</\2>''' % \
(re.escape(tag) if tag is not None else r'\w+', attr, re.escape(value) if escape_value else value)
matches = re.finditer(pattern, html)
for m in matches:
yield m.group('element', 'inner') if include_inner else m.group('element')
for dt in outer_elements_by_attribute('property', 'dc:date', webpage, tag='span'):
dt = extract_attributes(dt)
if dt.get('class') == 'dt-published':
info['timestamp'] = parse_iso8601(dt.get('content'))
break
creator_groups = self._search_regex(
r'''(?s)<span\b[^>]+?\bclass\s*=\s*("|')(?:(?!\1).)*\bp-author\b(?:(?!\1).)*\1\s*property\s*=\s*["']dc:creator\b[^>]+>\s*(.*?<span\b[^>]+?\bclass\s*=\s*["']p-name\b.*</span>).*?</span>''',
webpage, 'creator groups', default='', group=2)
creator_groups = re.match(r'''(?s)<a\b[^>]+?\bhref\s*=\s*("|')/profiles/(?P<profile_id>.+?)\1[^>]*>\s*<span\b[^>]+>\s*(?P<creator_name>.+?)\s*</span>[,\s]*<span\b[^>]+\bclass\s*=\s*["']p-organization-name\b[^>]+>\s*(?P<organization_name>.+?)\s*</span>''', creator_groups)
if creator_groups:
creator_groups = creator_groups.groupdict()
info['uploader'] = info['creator'] = ' from '.join((creator_groups['creator_name'], creator_groups['organization_name']))
info['uploader_id'] = creator_groups['profile_id']
info['uploader_url'] = 'https://tube.switch.ch/profiles/' + info['uploader_id']
parsed_media_entries = self._parse_html5_media_entries(url, webpage, video_id)[0]
info['thumbnail'] = parsed_media_entries['thumbnail']
info['formats'] = parsed_media_entries['formats']
self._sort_formats(info['formats'])
return info
class SwitchTubeProfileIE(InfoExtractor):
_VALID_URL = r'https?://tube\.switch\.ch/profiles/(?P<id>[\da-zA-Z]+)'
IE_NAME = 'switchtube:profile'
_TESTS = [{
'url': 'https://tube.switch.ch/profiles/94248',
'info_dict': {
'id': '94248',
'title': 'Wulfram Gerstner',
'description': None,
},
'playlist_mincount': 94,
}]
@classmethod
def suitable(cls, url):
return False if SwitchTubeIE.suitable(url) else super(SwitchTubeProfileIE, cls).suitable(url)
def _real_extract(self, url):
channel_id = self._match_id(url)
webpage = self._download_webpage(url, channel_id)
channel_title = self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title>', webpage, 'title', default=None)
description = self._html_search_regex(r'''(?s)<div\b[^>]+class\s*=\s*("|')p-summary formatted\1[^>]+property\s*=\s*("|')dc:description\2[^>]*>\s*(.+?)\s*</div>''',
webpage, 'description', default=None, group=3)
entries = []
next_page = None
for current_page_number in itertools.count(1):
if next_page:
webpage = self._download_webpage(next_page, channel_id, note='Downloading page %d' % (current_page_number, ))
for _, video_path, video_id, title in re.findall(
r'''(?s)<a\b[^>]+\bhref\s*=\s*("|')(/videos/((?:(?!\1).)+?))\1[^>]*>\s*<div\b[^>]+\bclass\s*=\s*["']title\b[^>]+>(.+?)</div>''',
webpage):
video_url = urljoin(url, video_path)
if video_url:
entries.append(self.url_result(video_url, ie=SwitchTubeIE.ie_key(), video_id=video_id))
next_page = self._search_regex(
r'''<a\b[^>]+?\bhref\s*=\s*("|')(?P<path>/profiles/%s\?(?:(?!\1).)+)\1[^>]*>\s*Next\s*</a>''' % (channel_id,),
webpage, 'next page', group='path', default=None)
if next_page:
next_page = urljoin(url, next_page)
if not next_page:
break
return self.playlist_result(entries, channel_id, channel_title,
description)
class SwitchTubeChannelIE(InfoExtractor):
_VALID_URL = r'https?://tube\.switch\.ch/channels/(?P<id>[\da-zA-Z]+)'
IE_NAME = 'switchtube:channel'
_TESTS = [{
'url': 'https://tube.switch.ch/channels/1deb03e0',
'info_dict': {
'id': '1deb03e0',
'title': 'CS-456 Artificial Neural Networks',
'description': 'Class on Artificial Neural Networks and Reinforcement Learning designed for EPFL master students in CS and related disciplines.'
},
'playlist_mincount': 94,
}]
@classmethod
def suitable(cls, url):
return False if SwitchTubeIE.suitable(url) else super(SwitchTubeChannelIE, cls).suitable(url)
def _real_extract(self, url):
channel_id = self._match_id(url)
entries = []
for current_page_number in itertools.count(0):
page_url = urljoin(url, '/channels/%s?order=episodes&page=%d' % (channel_id, current_page_number))
webpage = self._download_webpage(page_url, channel_id, note='Downloading page %d' % (current_page_number + 1, ))
if current_page_number == 0:
channel_title = self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title>', webpage, 'title', default=None)
description = self._html_search_regex(r'''(?s)<div\b[^>]+class\s*=\s*("|')description formatted\1[^>]*>\s*<p>\s*(.+?)\s*</p>\s*</div>''',
webpage, 'description', default=None, group=2)
this_page_still_has_something = False
for _, video_path, video_id in re.findall(
r'''(?s)<a\b[^>]+\bhref\s*=\s*("|')(/videos/((?:(?!\1).)+?))\1[^>]*>''',
webpage):
video_url = urljoin(url, video_path)
if video_url:
this_page_still_has_something = True
entries.append(self.url_result(video_url, ie=SwitchTubeIE.ie_key(), video_id=video_id))
if not this_page_still_has_something:
break
return self.playlist_result(entries, channel_id, channel_title,
description)

View File

@ -14,6 +14,7 @@ from .utils import (
remove_quotes, remove_quotes,
unified_timestamp, unified_timestamp,
variadic, variadic,
write_string,
) )
from .compat import ( from .compat import (
compat_basestring, compat_basestring,
@ -53,15 +54,16 @@ def wraps_op(op):
# NB In principle NaN cannot be checked by membership. # NB In principle NaN cannot be checked by membership.
# Here all NaN values are actually this one, so _NaN is _NaN, # Here all NaN values are actually this one, so _NaN is _NaN,
# although _NaN != _NaN. # although _NaN != _NaN. Ditto Infinity.
_NaN = float('nan') _NaN = float('nan')
_Infinity = float('inf')
def _js_bit_op(op): def _js_bit_op(op):
def zeroise(x): def zeroise(x):
return 0 if x in (None, JS_Undefined, _NaN) else x return 0 if x in (None, JS_Undefined, _NaN, _Infinity) else x
@wraps_op(op) @wraps_op(op)
def wrapped(a, b): def wrapped(a, b):
@ -84,7 +86,7 @@ def _js_arith_op(op):
def _js_div(a, b): def _js_div(a, b):
if JS_Undefined in (a, b) or not (a or b): if JS_Undefined in (a, b) or not (a or b):
return _NaN return _NaN
return operator.truediv(a or 0, b) if b else float('inf') return operator.truediv(a or 0, b) if b else _Infinity
def _js_mod(a, b): def _js_mod(a, b):
@ -220,6 +222,42 @@ class LocalNameSpace(ChainMap):
return 'LocalNameSpace%s' % (self.maps, ) return 'LocalNameSpace%s' % (self.maps, )
class Debugger(object):
ENABLED = False
@staticmethod
def write(*args, **kwargs):
level = kwargs.get('level', 100)
def truncate_string(s, left, right=0):
if s is None or len(s) <= left + right:
return s
return '...'.join((s[:left - 3], s[-right:] if right else ''))
write_string('[debug] JS: {0}{1}\n'.format(
' ' * (100 - level),
' '.join(truncate_string(compat_str(x), 50, 50) for x in args)))
@classmethod
def wrap_interpreter(cls, f):
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
if cls.ENABLED and stmt.strip():
cls.write(stmt, level=allow_recursion)
try:
ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
except Exception as e:
if cls.ENABLED:
if isinstance(e, ExtractorError):
e = e.orig_msg
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
raise
if cls.ENABLED and stmt.strip():
if should_ret or not repr(ret) == stmt:
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
return ret, should_ret
return interpret_statement
class JSInterpreter(object): class JSInterpreter(object):
__named_object_counter = 0 __named_object_counter = 0
@ -307,8 +345,7 @@ class JSInterpreter(object):
def __op_chars(cls): def __op_chars(cls):
op_chars = set(';,[') op_chars = set(';,[')
for op in cls._all_operators(): for op in cls._all_operators():
for c in op[0]: op_chars.update(op[0])
op_chars.add(c)
return op_chars return op_chars
def _named_object(self, namespace, obj): def _named_object(self, namespace, obj):
@ -326,9 +363,8 @@ class JSInterpreter(object):
# collections.Counter() is ~10% slower in both 2.7 and 3.9 # collections.Counter() is ~10% slower in both 2.7 and 3.9
counters = dict((k, 0) for k in _MATCHING_PARENS.values()) counters = dict((k, 0) for k in _MATCHING_PARENS.values())
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1 start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
in_quote, escaping, skipping = None, False, 0 in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
after_op, in_regex_char_group = True, False skipping = 0
for idx, char in enumerate(expr): for idx, char in enumerate(expr):
paren_delta = 0 paren_delta = 0
if not in_quote: if not in_quote:
@ -382,10 +418,12 @@ class JSInterpreter(object):
return separated[0][1:].strip(), separated[1].strip() return separated[0][1:].strip(), separated[1].strip()
@staticmethod @staticmethod
def _all_operators(): def _all_operators(_cached=[]):
return itertools.chain( if not _cached:
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence _cached.extend(itertools.chain(
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS) # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
return _cached
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion): def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
if op in ('||', '&&'): if op in ('||', '&&'):
@ -416,7 +454,7 @@ class JSInterpreter(object):
except Exception as e: except Exception as e:
if allow_undefined: if allow_undefined:
return JS_Undefined return JS_Undefined
raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e) raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
def _dump(self, obj, namespace): def _dump(self, obj, namespace):
try: try:
@ -438,6 +476,7 @@ class JSInterpreter(object):
_FINALLY_RE = re.compile(r'finally\s*\{') _FINALLY_RE = re.compile(r'finally\s*\{')
_SWITCH_RE = re.compile(r'switch\s*\(') _SWITCH_RE = re.compile(r'switch\s*\(')
@Debugger.wrap_interpreter
def interpret_statement(self, stmt, local_vars, allow_recursion=100): def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if allow_recursion < 0: if allow_recursion < 0:
raise self.Exception('Recursion limit reached') raise self.Exception('Recursion limit reached')
@ -511,7 +550,6 @@ class JSInterpreter(object):
expr = self._dump(inner, local_vars) + outer expr = self._dump(inner, local_vars) + outer
if expr.startswith('('): if expr.startswith('('):
m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr) m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
if m: if m:
# short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig` # short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
@ -693,7 +731,7 @@ class JSInterpreter(object):
(?P<op>{_OPERATOR_RE})? (?P<op>{_OPERATOR_RE})?
=(?!=)(?P<expr>.*)$ =(?!=)(?P<expr>.*)$
)|(?P<return> )|(?P<return>
(?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$ (?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
)|(?P<indexing> )|(?P<indexing>
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$ (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
)|(?P<attribute> )|(?P<attribute>
@ -727,11 +765,12 @@ class JSInterpreter(object):
raise JS_Break() raise JS_Break()
elif expr == 'continue': elif expr == 'continue':
raise JS_Continue() raise JS_Continue()
elif expr == 'undefined': elif expr == 'undefined':
return JS_Undefined, should_return return JS_Undefined, should_return
elif expr == 'NaN': elif expr == 'NaN':
return _NaN, should_return return _NaN, should_return
elif expr == 'Infinity':
return _Infinity, should_return
elif md.get('return'): elif md.get('return'):
return local_vars[m.group('name')], should_return return local_vars[m.group('name')], should_return
@ -760,18 +799,28 @@ class JSInterpreter(object):
right_expr = separated.pop() right_expr = separated.pop()
# handle operators that are both unary and binary, minimal BODMAS # handle operators that are both unary and binary, minimal BODMAS
if op in ('+', '-'): if op in ('+', '-'):
# simplify/adjust consecutive instances of these operators
undone = 0 undone = 0
while len(separated) > 1 and not separated[-1].strip(): while len(separated) > 1 and not separated[-1].strip():
undone += 1 undone += 1
separated.pop() separated.pop()
if op == '-' and undone % 2 != 0: if op == '-' and undone % 2 != 0:
right_expr = op + right_expr right_expr = op + right_expr
elif op == '+':
while len(separated) > 1 and separated[-1].strip() in self.OP_CHARS:
right_expr = separated.pop() + right_expr
# hanging op at end of left => unary + (strip) or - (push right)
left_val = separated[-1] left_val = separated[-1]
for dm_op in ('*', '%', '/', '**'): for dm_op in ('*', '%', '/', '**'):
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim)) bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
if len(bodmas) > 1 and not bodmas[-1].strip(): if len(bodmas) > 1 and not bodmas[-1].strip():
expr = op.join(separated) + op + right_expr expr = op.join(separated) + op + right_expr
right_expr = None if len(separated) > 1:
separated.pop()
right_expr = op.join((left_val, right_expr))
else:
separated = [op.join((left_val, right_expr))]
right_expr = None
break break
if right_expr is None: if right_expr is None:
continue continue
@ -797,6 +846,8 @@ class JSInterpreter(object):
def eval_method(): def eval_method():
if (variable, member) == ('console', 'debug'): if (variable, member) == ('console', 'debug'):
if Debugger.ENABLED:
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
return return
types = { types = {
'String': compat_str, 'String': compat_str,

View File

@ -2406,7 +2406,7 @@ class ExtractorError(YoutubeDLError):
""" tb, if given, is the original traceback (so that it can be printed out). """ tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl. If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
""" """
self.orig_msg = msg
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
expected = True expected = True
if video_id is not None: if video_id is not None: