Compare commits

...

9 Commits

Author SHA1 Message Date
dirkf
2f65a7bb7d
Merge 66ee6aa2da into 4d05f84325 2024-06-27 06:36:47 +08:00
dirkf
4d05f84325 [PalcoMP3] Conform to new linter rule
* no space after @ in decorator
2024-06-20 20:03:49 +01:00
dirkf
e0094e63c3 [jsinterp] Various tweaks
* treat Infinity like NaN
* cache operator list
2024-06-20 20:03:49 +01:00
dirkf
fd8242e3ef [jsinterp] Fix and improve expression parsing
* improve BODMAS (fixes https://github.com/ytdl-org/youtube-dl/issues/32815)
* support more weird expressions with multiple unary ops
2024-06-20 20:03:49 +01:00
dirkf
ad01fa6cca [jsinterp] Add Debugger from yt-dlp
* https://github.com/yt-dlp/yt-dlp/commit/8f53dc4
* thx pukkandan
2024-06-20 20:03:49 +01:00
dirkf
2eac0fa379 [utils] Save orig_msg in ExtractorError 2024-06-20 20:03:49 +01:00
dirkf
66ee6aa2da [TRT] Add extractor for TRT (Turkish TV/radio) catch-up and live streams 2022-05-06 03:34:21 +01:00
dirkf
48c366cd89 [common] Avoid erasing extracted ld+json data with None values 2022-05-05 23:11:17 +01:00
dirkf
2e8984e8c2 [common] Extract series name from ld+json types such as TVSeries 2022-05-05 23:11:17 +01:00
8 changed files with 427 additions and 31 deletions

View File

@ -577,9 +577,11 @@ class TestJSInterpreter(unittest.TestCase):
def test_unary_operators(self): def test_unary_operators(self):
jsi = JSInterpreter('function f(){return 2 - - - 2;}') jsi = JSInterpreter('function f(){return 2 - - - 2;}')
self.assertEqual(jsi.call_function('f'), 0) self.assertEqual(jsi.call_function('f'), 0)
# fails jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
# jsi = JSInterpreter('function f(){return 2 + - + - - 2;}') self.assertEqual(jsi.call_function('f'), 0)
# self.assertEqual(jsi.call_function('f'), 0) # https://github.com/ytdl-org/youtube-dl/issues/32815
jsi = JSInterpreter('function f(){return 0 - 7 * - 6;}')
self.assertEqual(jsi.call_function('f'), 42)
""" # fails so far """ # fails so far
def test_packed(self): def test_packed(self):

View File

@ -158,6 +158,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ', '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
), ),
(
'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
'1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
),
] ]

View File

@ -1370,6 +1370,10 @@ class InfoExtractor(object):
if isinstance(json_ld, dict): if isinstance(json_ld, dict):
json_ld = [json_ld] json_ld = [json_ld]
def valued_dict(items):
"""Return dict from dict or iterable of pairs omitting None values"""
return dict((k, v) for k, v in (items.items() if isinstance(items, dict) else items) if v is not None)
INTERACTION_TYPE_MAP = { INTERACTION_TYPE_MAP = {
'CommentAction': 'comment', 'CommentAction': 'comment',
'AgreeAction': 'like', 'AgreeAction': 'like',
@ -1461,19 +1465,25 @@ class InfoExtractor(object):
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'): if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name')) info['series'] = unescapeHTML(part_of_series.get('name'))
elif item_type == 'Movie': elif item_type in ('TVSeries', 'Series', 'CreativeWorkSeries'):
series_name = unescapeHTML(e.get('name'))
info.update({ info.update({
'series': series_name,
})
elif item_type == 'Movie':
# here and in the next, don't erase existing value with None
info.update(valued_dict({
'title': unescapeHTML(e.get('name')), 'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')), 'description': unescapeHTML(e.get('description')),
'duration': parse_duration(e.get('duration')), 'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('dateCreated')), 'timestamp': unified_timestamp(e.get('dateCreated')),
}) }))
elif item_type in ('Article', 'NewsArticle'): elif item_type in ('Article', 'NewsArticle'):
info.update({ info.update(valued_dict({
'timestamp': parse_iso8601(e.get('datePublished')), 'timestamp': parse_iso8601(e.get('datePublished')),
'title': unescapeHTML(e.get('headline')), 'title': unescapeHTML(e.get('headline')),
'description': unescapeHTML(e.get('articleBody')), 'description': unescapeHTML(e.get('articleBody')),
}) }))
elif item_type == 'VideoObject': elif item_type == 'VideoObject':
extract_video_object(e) extract_video_object(e)
if expected_type is None: if expected_type is None:
@ -1487,7 +1497,7 @@ class InfoExtractor(object):
continue continue
else: else:
break break
return dict((k, v) for k, v in info.items() if v is not None) return valued_dict(info)
def _search_nextjs_data(self, webpage, video_id, **kw): def _search_nextjs_data(self, webpage, video_id, **kw):
# ..., *, transform_source=None, fatal=True, default=NO_DEFAULT # ..., *, transform_source=None, fatal=True, default=NO_DEFAULT
@ -3033,7 +3043,6 @@ class InfoExtractor(object):
transform_source=transform_source, default=None) transform_source=transform_source, default=None)
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
# allow passing `transform_source` through to _find_jwplayer_data() # allow passing `transform_source` through to _find_jwplayer_data()
transform_source = kwargs.pop('transform_source', None) transform_source = kwargs.pop('transform_source', None)
kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {} kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}

View File

@ -1325,6 +1325,10 @@ from .trovo import (
TrovoIE, TrovoIE,
TrovoVodIE, TrovoVodIE,
) )
from .trt import (
TRTIE,
TRTLiveIE,
)
from .trunews import TruNewsIE from .trunews import TruNewsIE
from .trutv import TruTVIE from .trutv import TruTVIE
from .tube8 import Tube8IE from .tube8 import Tube8IE

View File

@ -8,7 +8,7 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
str_or_none, str_or_none,
try_get, traverse_obj,
) )
@ -109,7 +109,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
} }
name''' name'''
@ classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url) return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
@ -118,7 +118,8 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist'] artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
def entries(): def entries():
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []): for music in traverse_obj(artist, (
'musics', 'nodes', lambda _, m: m['musicID'])):
yield self._parse_music(music) yield self._parse_music(music)
return self.playlist_result( return self.playlist_result(
@ -137,7 +138,7 @@ class PalcoMP3VideoIE(PalcoMP3BaseIE):
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande', 'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
'description': 'md5:7043342c09a224598e93546e98e49282', 'description': 'md5:7043342c09a224598e93546e98e49282',
'upload_date': '20161107', 'upload_date': '20161107',
'uploader_id': 'maiaramaraisaoficial', 'uploader_id': '@maiaramaraisaoficial',
'uploader': 'Maiara e Maraisa', 'uploader': 'Maiara e Maraisa',
} }
}] }]

325
youtube_dl/extractor/trt.py Normal file
View File

@ -0,0 +1,325 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
import time
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
)
from ..utils import (
determine_ext,
dict_get,
merge_dicts,
parse_iso8601,
strip_or_none,
try_get,
url_or_none,
urljoin,
)
class TRTIE(InfoExtractor):
IE_DESC = 'TRT (Turkish State TV) programs and series'
_VALID_URL = r'''(?x)
(?P<list>https?://(?:www\.)trtizle\.com/
(?:diziler|programlar|belgesel|filmler|cocuk|trtarsiv|engelsiz)/
(?P<show>[\w-]+))(?:/(?P<id>[\w-]+))?'''
_TESTS = [{
'url': 'https://www.trtizle.com/belgesel/dunya-tarihinin-donum-noktalari/dunya-tarihinin-donum-noktalari-1-bolum-125583',
'md5': 'c46dc0b9b53ad372c4ac6b3982805f05',
'info_dict': {
'id': 'dunya-tarihinin-donum-noktalari-1-bolum-125583',
'ext': 'mp4',
'title': 'Dünya Tarihinin Dönüm Noktaları 1.Bölüm',
'description': 'Bedelini insanların ödeyeceği bir imparatorluk çekişmesinde Persler, Yunanlara karşı...',
'timestamp': 1617148800,
'upload_date': '20210331',
'thumbnail': r're:https?://.+\.jpe?g',
'duration': float,
'series': 'Dünya Tarihinin Dönüm Noktaları',
},
'params': {
# adaptive download
'skip_download': True,
}
}, {
'url': 'https://www.trtizle.com/belgesel/dunya-tarihinin-donum-noktalari',
'info_dict': {
'id': 'dunya-tarihinin-donum-noktalari',
'title': 'Dünya Tarihinin Dönüm Noktaları',
},
'playlist_mincount': 22,
}, {
'url': 'https://www.trtizle.com/diziler/yol-ayrimi/yol-ayrimi-1-bolum-5774583',
'md5': '67ada6b2020b5dd0d3e24646b2725676',
'info_dict': {
'id': 'yol-ayrimi-1-bolum-5774583',
'ext': 'mp4',
'title': 'Yol Ayrımı 1.Bölüm',
'description': 'Seyrisefain balosunda, herkes bir haberin akıbetini beklemektedir…',
'timestamp': 1623888000,
'upload_date': '20210617',
'thumbnail': r're:https?://.+\.jpe?g',
'duration': float,
'series': 'Yol Ayrımı',
},
'params': {
# adaptive download
'skip_download': True,
},
}, {
'url': 'https://www.trtizle.com/diziler/yol-ayrimi/',
'info_dict': {
'id': 'yol-ayrimi',
'title': 'Yol Ayrımı',
},
'playlist_mincount': 5,
}, {
'url': 'https://www.trtizle.com/programlar/sade-saz/sade-saz-1-bolum-7646201',
'md5': '8f416e64379ea4d1d3ea0a65dc922f5c',
'info_dict': {
'id': 'sade-saz-1-bolum-7646201',
'ext': 'mp4',
'title': 'Sade Saz 1.Bölüm',
'description': 'Sade Sazın ilk bölümünün konuğu, tanbur icracısı K. Alper Uzkur.',
'timestamp': 1641772800,
'upload_date': '20220110',
'thumbnail': r're:https?://.+\.jpe?g',
'duration': float,
'series': 'Sade Saz',
},
'params': {
# adaptive download
'skip_download': True,
},
}, {
'url': 'https://www.trtizle.com/programlar/sade-saz',
'info_dict': {
'id': 'sade-saz',
'title': 'Sade Saz',
},
'playlist_mincount': 6,
}, {
'url': 'https://www.trtizle.com/filmler/looking-for-eric/looking-for-eric-8414201',
'md5': '833d61e4a10606d71b3903295cfa3c63',
'info_dict': {
'id': 'looking-for-eric-8414201',
'ext': 'mp4',
'title': 'Looking for Eric',
'description': 'Postacı Eric\'in hayatı krize sürüklenirken gerçek ve hayal birbirine karışır...',
'upload_date': '20220401',
'timestamp': 1648771200,
'thumbnail': r're:https?://.+\.jpe?g',
'duration': float,
},
'params': {
# adaptive download
'skip_download': True,
},
}, {
'url': 'https://www.trtizle.com/cocuk/kaptan-pengu-ve-arkadaslari/kaptan-pengu-ve-arkadaslari-okul-aciliyor-6034815',
'md5': '551c479d1a6bc7c538356907d4ea5d19',
'info_dict': {
'id': 'kaptan-pengu-ve-arkadaslari-okul-aciliyor-6034815',
'ext': 'mp4',
'title': 'Kaptan Pengu ve Arkadaşları 1.Bölüm',
'description': 'Hayvanlar Konseyi\'nden Kaptan Pengu\'ya bir mektup vardır...',
'timestamp': 1626134400,
'upload_date': '20210713',
'thumbnail': r're:https?://.+\.jpe?g',
'duration': float,
'series': 'Kaptan Pengu ve Arkadaşları',
},
'params': {
# adaptive download
'skip_download': True,
},
}, {
'url': 'https://www.trtizle.com/cocuk/kaptan-pengu-ve-arkadaslari',
'info_dict': {
'id': 'kaptan-pengu-ve-arkadaslari',
'title': 'Kaptan Pengu ve Arkadaşları',
},
'playlist_mincount': 41,
},
]
def _extract_formats(self, fmt_url, video_id):
formats = []
ext = determine_ext(fmt_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
fmt_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
fmt_url, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': fmt_url,
})
return formats
def _extract_list(self, playlist_id, url):
webpage = self._download_webpage(url, playlist_id)
LIST_RE = (
r'''<a\s[^>]*?\b%s\s*=\s*['"](%s(?:(?<=/)|/)[\w-]+)'''
% ('data-path' if 'data-path' in webpage else 'href',
re.escape(compat_urlparse.urlparse(url).path), ))
def entries():
for item_url in re.finditer(LIST_RE, webpage):
item_url = urljoin(url, item_url.group(1))
yield self._extract_video(self._match_id(item_url), item_url)
series = self._search_json_ld(webpage, playlist_id, default={}, expected_type='TVSeries')
return self.playlist_result(entries(), playlist_id, series.get('series'))
def _extract_video(self, video_id, url):
webpage = self._download_webpage(url, video_id)
result = self._search_json_ld(webpage, video_id, default={})
result['id'] = video_id
if 'title' not in result:
result['title'] = (
self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage)
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title'))
fmt_url = result.get('url')
formats = []
if fmt_url:
del result['url']
formats = self._extract_formats(fmt_url, video_id)
self._sort_formats(formats)
result['formats'] = formats
return merge_dicts(
result, {
'description': self._html_search_meta(('description', 'og:description'), webpage, 'description'),
'thumbnail': url_or_none(self._og_search_thumbnail(webpage)),
})
def _real_extract(self, url):
show_id, video_id, playlist_url = re.match(self._VALID_URL, url).group('show', 'id', 'list')
# TODO: adapt --yes/no-playlist to make this work properly
# if not video_id or self._downloader.params.get('noplaylist') is False:
if not video_id:
return self._extract_list(show_id, playlist_url)
return self._extract_video(video_id, url)
class TRTLiveIE(TRTIE):
IE_DESC = 'TRT (Turkish State TV and radio) live channels'
_VALID_URL = r'https?://(?:www\.)?trtizle\.com/canli/(?:tv/trt-|radyo/(?:radyo-)?)(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.trtizle.com/canli/tv/trt-world',
'info_dict': {
'id': 'trtworld',
'ext': 'mp4',
'title': r're:TRT WORLD .+',
'description': 'TRT World',
'is_live': True,
},
'params': {
# adaptive download
'skip_download': True,
}
},
]
def _real_extract(self, url):
chan_id = self._match_id(url)
webpage = self._download_webpage(url, chan_id)
chan_id = self._search_regex(
r'\blivePlayer\s*\.\s*openPlayer\s*\([^)]*?\btrt\.com\.tr/trtportal/(?:[^/]+/)+thumbnails/([\w-]+)\.(?:jp|png)',
webpage, 'slug', fatal=False) or chan_id
chan_url = self._search_regex(
r'''\blivePlayerConfig\s*\.\s*baseEpgUrl\s*=\s*(?P<q>'|")(?P<url>https?://(?:(?!(?P=q)).)+)(?P=q)''',
webpage, 'player config', group='url')
chan_url = '%s%s.json' % (chan_url, chan_id)
def maybe_xml2json(src):
"""Turn unexpected XML returned from an API URL into JSON"""
m = re.match(r'''^\s*<\?xml\b(?:[^/>]*?\bencoding\s*=\s*['"](?P<enc>[\w-]+))?[^/>]*\?>\s*(?P<xml><.+>)$''', src)
if m:
# Thanks https://stackoverflow.com/a/63556250 for inspiration
ATTR_RE = (
r"""(?s)(?P<avr>\S ?)(?:\s*=\s*(?P<q>['"])(?P<avl>.*?)(?<!\\)(?P=q))?"""
)
def elt_value(attr_str, val_dict):
v = {}
attrs = dict((j.group("avr"), j.groupdict(True).get("avl"))
for j in re.finditer(ATTR_RE, attr_str.strip()))
if attrs:
v['@attributes'] = attrs
v['@values'] = val_dict
return v
def xml2dict(xml_str):
elts = re.findall(
r"(?s)<(?P<var>\S )(?P<attr>[^/>]*)(?:(?:>(?P<val>.*?)</(?P=var)>)|(?:/>))",
xml_str,
)
if elts:
elts = [{i[0]: elt_value(i[1], xml2dict(i[2]))} for i in elts]
if len(elts) == 1:
return elts[0]
return elts
return xml_str
try:
return json.dumps(xml2dict(m.group('xml').encode(m.group('enc') or 'utf-8')))
except Exception:
pass
return src
chan_info = self._download_json(
chan_url, chan_id, fatal=False,
note='Downloading player EPG JSON',
query={'_': int(time.time() * 1000)},
expected_status=403,
# errors are returned as XML
transform_source=maybe_xml2json)
if not isinstance(chan_info, dict) or 'Error' in chan_info:
chan_info = self._download_json(
'https://trtizle-api.cdn.wp.trt.com.tr/trttv/v3/livestream',
chan_id, fatal=False,
note='Downloading livestream API JSON',
query={'path': compat_urlparse.urlparse(url).path}) or {}
title = chan_info['channel']['title']
current = try_get(chan_info, lambda x: x['current'], dict) or {}
if current.get('geo_block'):
self._downloader.report_warning(
'[%s] %s' % (self.IE_NAME, 'Stream is geo-blocked'))
chan_info = chan_info['channel']
fmt_url = dict_get(chan_info, ('url', 'noneDvrUrl'))
formats = []
if fmt_url:
formats = self._extract_formats(fmt_url, chan_id)
self._sort_formats(formats)
start_end = [parse_iso8601(current.get(x)) for x in ('starttime', 'endtime')]
if None in start_end:
start_end = None
return {
'id': chan_id,
'title': self._live_title(current.get('title') or title),
'is_live': True,
'formats': formats,
'description': strip_or_none(chan_info.get('description')),
'thumbnail': next((url_or_none(chan_info.get(x))
for x in ('thumbnail', 'thumbnailYoutubeUrl', 'square_logo', 'livestreamLogoUrl')),
None),
'timestamp': start_end and start_end[0],
'duration': start_end and (start_end[1] - time.time()),
}

View File

@ -14,6 +14,7 @@ from .utils import (
remove_quotes, remove_quotes,
unified_timestamp, unified_timestamp,
variadic, variadic,
write_string,
) )
from .compat import ( from .compat import (
compat_basestring, compat_basestring,
@ -53,15 +54,16 @@ def wraps_op(op):
# NB In principle NaN cannot be checked by membership. # NB In principle NaN cannot be checked by membership.
# Here all NaN values are actually this one, so _NaN is _NaN, # Here all NaN values are actually this one, so _NaN is _NaN,
# although _NaN != _NaN. # although _NaN != _NaN. Ditto Infinity.
_NaN = float('nan') _NaN = float('nan')
_Infinity = float('inf')
def _js_bit_op(op): def _js_bit_op(op):
def zeroise(x): def zeroise(x):
return 0 if x in (None, JS_Undefined, _NaN) else x return 0 if x in (None, JS_Undefined, _NaN, _Infinity) else x
@wraps_op(op) @wraps_op(op)
def wrapped(a, b): def wrapped(a, b):
@ -84,7 +86,7 @@ def _js_arith_op(op):
def _js_div(a, b): def _js_div(a, b):
if JS_Undefined in (a, b) or not (a or b): if JS_Undefined in (a, b) or not (a or b):
return _NaN return _NaN
return operator.truediv(a or 0, b) if b else float('inf') return operator.truediv(a or 0, b) if b else _Infinity
def _js_mod(a, b): def _js_mod(a, b):
@ -220,6 +222,42 @@ class LocalNameSpace(ChainMap):
return 'LocalNameSpace%s' % (self.maps, ) return 'LocalNameSpace%s' % (self.maps, )
class Debugger(object):
ENABLED = False
@staticmethod
def write(*args, **kwargs):
level = kwargs.get('level', 100)
def truncate_string(s, left, right=0):
if s is None or len(s) <= left + right:
return s
return '...'.join((s[:left - 3], s[-right:] if right else ''))
write_string('[debug] JS: {0}{1}\n'.format(
' ' * (100 - level),
' '.join(truncate_string(compat_str(x), 50, 50) for x in args)))
@classmethod
def wrap_interpreter(cls, f):
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
if cls.ENABLED and stmt.strip():
cls.write(stmt, level=allow_recursion)
try:
ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
except Exception as e:
if cls.ENABLED:
if isinstance(e, ExtractorError):
e = e.orig_msg
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
raise
if cls.ENABLED and stmt.strip():
if should_ret or not repr(ret) == stmt:
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
return ret, should_ret
return interpret_statement
class JSInterpreter(object): class JSInterpreter(object):
__named_object_counter = 0 __named_object_counter = 0
@ -307,8 +345,7 @@ class JSInterpreter(object):
def __op_chars(cls): def __op_chars(cls):
op_chars = set(';,[') op_chars = set(';,[')
for op in cls._all_operators(): for op in cls._all_operators():
for c in op[0]: op_chars.update(op[0])
op_chars.add(c)
return op_chars return op_chars
def _named_object(self, namespace, obj): def _named_object(self, namespace, obj):
@ -326,9 +363,8 @@ class JSInterpreter(object):
# collections.Counter() is ~10% slower in both 2.7 and 3.9 # collections.Counter() is ~10% slower in both 2.7 and 3.9
counters = dict((k, 0) for k in _MATCHING_PARENS.values()) counters = dict((k, 0) for k in _MATCHING_PARENS.values())
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1 start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
in_quote, escaping, skipping = None, False, 0 in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
after_op, in_regex_char_group = True, False skipping = 0
for idx, char in enumerate(expr): for idx, char in enumerate(expr):
paren_delta = 0 paren_delta = 0
if not in_quote: if not in_quote:
@ -382,10 +418,12 @@ class JSInterpreter(object):
return separated[0][1:].strip(), separated[1].strip() return separated[0][1:].strip(), separated[1].strip()
@staticmethod @staticmethod
def _all_operators(): def _all_operators(_cached=[]):
return itertools.chain( if not _cached:
_cached.extend(itertools.chain(
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS) _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
return _cached
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion): def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
if op in ('||', '&&'): if op in ('||', '&&'):
@ -416,7 +454,7 @@ class JSInterpreter(object):
except Exception as e: except Exception as e:
if allow_undefined: if allow_undefined:
return JS_Undefined return JS_Undefined
raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e) raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
def _dump(self, obj, namespace): def _dump(self, obj, namespace):
try: try:
@ -438,6 +476,7 @@ class JSInterpreter(object):
_FINALLY_RE = re.compile(r'finally\s*\{') _FINALLY_RE = re.compile(r'finally\s*\{')
_SWITCH_RE = re.compile(r'switch\s*\(') _SWITCH_RE = re.compile(r'switch\s*\(')
@Debugger.wrap_interpreter
def interpret_statement(self, stmt, local_vars, allow_recursion=100): def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if allow_recursion < 0: if allow_recursion < 0:
raise self.Exception('Recursion limit reached') raise self.Exception('Recursion limit reached')
@ -511,7 +550,6 @@ class JSInterpreter(object):
expr = self._dump(inner, local_vars) + outer expr = self._dump(inner, local_vars) + outer
if expr.startswith('('): if expr.startswith('('):
m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr) m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
if m: if m:
# short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig` # short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
@ -693,7 +731,7 @@ class JSInterpreter(object):
(?P<op>{_OPERATOR_RE})? (?P<op>{_OPERATOR_RE})?
=(?!=)(?P<expr>.*)$ =(?!=)(?P<expr>.*)$
)|(?P<return> )|(?P<return>
(?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$ (?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
)|(?P<indexing> )|(?P<indexing>
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$ (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
)|(?P<attribute> )|(?P<attribute>
@ -727,11 +765,12 @@ class JSInterpreter(object):
raise JS_Break() raise JS_Break()
elif expr == 'continue': elif expr == 'continue':
raise JS_Continue() raise JS_Continue()
elif expr == 'undefined': elif expr == 'undefined':
return JS_Undefined, should_return return JS_Undefined, should_return
elif expr == 'NaN': elif expr == 'NaN':
return _NaN, should_return return _NaN, should_return
elif expr == 'Infinity':
return _Infinity, should_return
elif md.get('return'): elif md.get('return'):
return local_vars[m.group('name')], should_return return local_vars[m.group('name')], should_return
@ -760,17 +799,27 @@ class JSInterpreter(object):
right_expr = separated.pop() right_expr = separated.pop()
# handle operators that are both unary and binary, minimal BODMAS # handle operators that are both unary and binary, minimal BODMAS
if op in ('+', '-'): if op in ('+', '-'):
# simplify/adjust consecutive instances of these operators
undone = 0 undone = 0
while len(separated) > 1 and not separated[-1].strip(): while len(separated) > 1 and not separated[-1].strip():
undone += 1 undone += 1
separated.pop() separated.pop()
if op == '-' and undone % 2 != 0: if op == '-' and undone % 2 != 0:
right_expr = op + right_expr right_expr = op + right_expr
elif op == '+':
while len(separated) > 1 and separated[-1].strip() in self.OP_CHARS:
right_expr = separated.pop() + right_expr
# hanging op at end of left => unary + (strip) or - (push right)
left_val = separated[-1] left_val = separated[-1]
for dm_op in ('*', '%', '/', '**'): for dm_op in ('*', '%', '/', '**'):
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim)) bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
if len(bodmas) > 1 and not bodmas[-1].strip(): if len(bodmas) > 1 and not bodmas[-1].strip():
expr = op.join(separated) + op + right_expr expr = op.join(separated) + op + right_expr
if len(separated) > 1:
separated.pop()
right_expr = op.join((left_val, right_expr))
else:
separated = [op.join((left_val, right_expr))]
right_expr = None right_expr = None
break break
if right_expr is None: if right_expr is None:
@ -797,6 +846,8 @@ class JSInterpreter(object):
def eval_method(): def eval_method():
if (variable, member) == ('console', 'debug'): if (variable, member) == ('console', 'debug'):
if Debugger.ENABLED:
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
return return
types = { types = {
'String': compat_str, 'String': compat_str,

View File

@ -2406,7 +2406,7 @@ class ExtractorError(YoutubeDLError):
""" tb, if given, is the original traceback (so that it can be printed out). """ tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl. If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
""" """
self.orig_msg = msg
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
expected = True expected = True
if video_id is not None: if video_id is not None: