Compare commits
11 Commits
5fb09daf08
...
a05e5a4ae1
Author | SHA1 | Date | |
---|---|---|---|
|
a05e5a4ae1 | ||
|
4d05f84325 | ||
|
e0094e63c3 | ||
|
fd8242e3ef | ||
|
ad01fa6cca | ||
|
2eac0fa379 | ||
|
72db217289 | ||
|
fc933e686b | ||
|
ea02c40539 | ||
|
7270ecf3d6 | ||
|
dade9111f1 |
@ -577,9 +577,11 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
def test_unary_operators(self):
|
def test_unary_operators(self):
|
||||||
jsi = JSInterpreter('function f(){return 2 - - - 2;}')
|
jsi = JSInterpreter('function f(){return 2 - - - 2;}')
|
||||||
self.assertEqual(jsi.call_function('f'), 0)
|
self.assertEqual(jsi.call_function('f'), 0)
|
||||||
# fails
|
jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
|
||||||
# jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
|
self.assertEqual(jsi.call_function('f'), 0)
|
||||||
# self.assertEqual(jsi.call_function('f'), 0)
|
# https://github.com/ytdl-org/youtube-dl/issues/32815
|
||||||
|
jsi = JSInterpreter('function f(){return 0 - 7 * - 6;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 42)
|
||||||
|
|
||||||
""" # fails so far
|
""" # fails so far
|
||||||
def test_packed(self):
|
def test_packed(self):
|
||||||
|
@ -158,6 +158,10 @@ _NSIG_TESTS = [
|
|||||||
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
|
||||||
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
|
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
|
||||||
|
'1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -3033,7 +3033,6 @@ class InfoExtractor(object):
|
|||||||
transform_source=transform_source, default=None)
|
transform_source=transform_source, default=None)
|
||||||
|
|
||||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||||
|
|
||||||
# allow passing `transform_source` through to _find_jwplayer_data()
|
# allow passing `transform_source` through to _find_jwplayer_data()
|
||||||
transform_source = kwargs.pop('transform_source', None)
|
transform_source = kwargs.pop('transform_source', None)
|
||||||
kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}
|
kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}
|
||||||
|
@ -413,8 +413,6 @@ from .foxnews import (
|
|||||||
FoxNewsArticleIE,
|
FoxNewsArticleIE,
|
||||||
)
|
)
|
||||||
from .foxsports import FoxSportsIE
|
from .foxsports import FoxSportsIE
|
||||||
from .franceculture import FranceCultureIE
|
|
||||||
from .franceinter import FranceInterIE
|
|
||||||
from .francetv import (
|
from .francetv import (
|
||||||
FranceTVIE,
|
FranceTVIE,
|
||||||
FranceTVSiteIE,
|
FranceTVSiteIE,
|
||||||
@ -1011,7 +1009,11 @@ from .radiocanada import (
|
|||||||
from .radiode import RadioDeIE
|
from .radiode import RadioDeIE
|
||||||
from .radiojavan import RadioJavanIE
|
from .radiojavan import RadioJavanIE
|
||||||
from .radiobremen import RadioBremenIE
|
from .radiobremen import RadioBremenIE
|
||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import (
|
||||||
|
RadioFrancePodcastEpisodeIE,
|
||||||
|
RadioFrancePodcastPlaylistIE,
|
||||||
|
RadioFranceWebradioIE,
|
||||||
|
)
|
||||||
from .rai import (
|
from .rai import (
|
||||||
RaiPlayIE,
|
RaiPlayIE,
|
||||||
RaiPlayLiveIE,
|
RaiPlayLiveIE,
|
||||||
|
@ -1,73 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
determine_ext,
|
|
||||||
extract_attributes,
|
|
||||||
int_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FranceCultureIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'rendez-vous-au-pays-des-geeks',
|
|
||||||
'display_id': 'rendez-vous-au-pays-des-geeks',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'Rendez-vous au pays des geeks',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'upload_date': '20140301',
|
|
||||||
'timestamp': 1393700400,
|
|
||||||
'vcodec': 'none',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# no thumbnail
|
|
||||||
'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
video_data = extract_attributes(self._search_regex(
|
|
||||||
r'''(?sx)
|
|
||||||
(?:
|
|
||||||
</h1>|
|
|
||||||
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
|
|
||||||
).*?
|
|
||||||
(<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
|
|
||||||
''',
|
|
||||||
webpage, 'video data'))
|
|
||||||
|
|
||||||
video_url = video_data.get('data-url') or video_data['data-asset-source']
|
|
||||||
title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
|
|
||||||
|
|
||||||
description = self._html_search_regex(
|
|
||||||
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
|
|
||||||
webpage, 'description', default=None)
|
|
||||||
thumbnail = self._search_regex(
|
|
||||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
|
|
||||||
webpage, 'thumbnail', default=None)
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'(?s)<span class="author">(.*?)</span>',
|
|
||||||
webpage, 'uploader', default=None)
|
|
||||||
ext = determine_ext(video_url.lower())
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': display_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'ext': ext,
|
|
||||||
'vcodec': 'none' if ext == 'mp3' else None,
|
|
||||||
'uploader': uploader,
|
|
||||||
'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
|
|
||||||
'duration': int_or_none(video_data.get('data-duration')),
|
|
||||||
}
|
|
@ -1,59 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import month_by_name
|
|
||||||
|
|
||||||
|
|
||||||
class FranceInterIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
|
|
||||||
'md5': '9e54d7bdb6fdc02a841007f8a975c094',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
|
||||||
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
|
||||||
'upload_date': '20160907',
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_url = self._search_regex(
|
|
||||||
r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
|
||||||
webpage, 'video url', group='url')
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
|
||||||
description = self._og_search_description(webpage)
|
|
||||||
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
|
||||||
|
|
||||||
upload_date_str = self._search_regex(
|
|
||||||
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
|
||||||
webpage, 'upload date', fatal=False)
|
|
||||||
if upload_date_str:
|
|
||||||
upload_date_list = upload_date_str.split()
|
|
||||||
upload_date_list.reverse()
|
|
||||||
upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
|
|
||||||
upload_date_list[2] = '%02d' % int(upload_date_list[2])
|
|
||||||
upload_date = ''.join(upload_date_list)
|
|
||||||
else:
|
|
||||||
upload_date = None
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'formats': [{
|
|
||||||
'url': video_url,
|
|
||||||
'vcodec': 'none',
|
|
||||||
}],
|
|
||||||
}
|
|
@ -8,7 +8,7 @@ from ..compat import compat_str
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
traverse_obj,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -109,7 +109,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
|
|||||||
}
|
}
|
||||||
name'''
|
name'''
|
||||||
|
|
||||||
@ classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
|
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
|
||||||
|
|
||||||
@ -118,7 +118,8 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
|
|||||||
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
|
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
|
||||||
|
|
||||||
def entries():
|
def entries():
|
||||||
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
|
for music in traverse_obj(artist, (
|
||||||
|
'musics', 'nodes', lambda _, m: m['musicID'])):
|
||||||
yield self._parse_music(music)
|
yield self._parse_music(music)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
@ -137,7 +138,7 @@ class PalcoMP3VideoIE(PalcoMP3BaseIE):
|
|||||||
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
|
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
|
||||||
'description': 'md5:7043342c09a224598e93546e98e49282',
|
'description': 'md5:7043342c09a224598e93546e98e49282',
|
||||||
'upload_date': '20161107',
|
'upload_date': '20161107',
|
||||||
'uploader_id': 'maiaramaraisaoficial',
|
'uploader_id': '@maiaramaraisaoficial',
|
||||||
'uploader': 'Maiara e Maraisa',
|
'uploader': 'Maiara e Maraisa',
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
@ -4,56 +4,284 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
get_element_by_attribute,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
|
url_or_none
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RadioFranceIE(InfoExtractor):
|
class RadioFranceBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
_BASE_URL = r'https://www.radiofrance.fr/'
|
||||||
IE_NAME = 'radiofrance'
|
|
||||||
|
|
||||||
_TEST = {
|
def extract_api_data(self, api_path, id, html):
|
||||||
'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
|
pattern = r'<script [^>]*sveltekit:data-url="https://www\.radiofrance\.fr/api/v[\d.]+/%s[^>]*>(?P<json>.*)</script>' % api_path
|
||||||
'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
|
json = self._search_regex(pattern, html, 'API data', flags=re.DOTALL, group='json')
|
||||||
'info_dict': {
|
|
||||||
'id': 'one-one',
|
if not json:
|
||||||
'ext': 'ogg',
|
raise ExtractorError('%s: JSON data not found' % id)
|
||||||
'title': 'One to one',
|
|
||||||
'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
|
try:
|
||||||
'uploader': 'Thomas Hercouët',
|
json = self._parse_json(json, id)
|
||||||
},
|
json = self._parse_json(json['body'], id)
|
||||||
|
|
||||||
|
if api_path == 'path':
|
||||||
|
return json['content']
|
||||||
|
elif api_path == 'stations':
|
||||||
|
return json
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Coding error')
|
||||||
|
except KeyError:
|
||||||
|
raise ExtractorError('%s: Invalid JSON' % id)
|
||||||
|
|
||||||
|
def get_title(self, api_data, webpage=None):
|
||||||
|
title = strip_or_none(api_data.get('title'))
|
||||||
|
if not title and webpage:
|
||||||
|
title = strip_or_none(get_element_by_attribute('h1', None, webpage, False)) or strip_or_none(self._og_search_title(webpage))
|
||||||
|
return title
|
||||||
|
|
||||||
|
def get_description(self, api_data, webpage=None):
|
||||||
|
description = strip_or_none(api_data.get('standFirst'))
|
||||||
|
if not description and webpage:
|
||||||
|
description = strip_or_none(self._og_search_description(webpage))
|
||||||
|
return description
|
||||||
|
|
||||||
|
def get_thumbnail(self, api_data, webpage=None):
|
||||||
|
thumbnail = None
|
||||||
|
visual = api_data.get('visual')
|
||||||
|
if visual:
|
||||||
|
thumbnail = url_or_none(visual.get('src'))
|
||||||
|
if not thumbnail and webpage:
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
return thumbnail
|
||||||
|
|
||||||
|
def get_timestamp(self, api_data, webpage=None):
|
||||||
|
timestamp = api_data.get('publishedDate')
|
||||||
|
if not timestamp and webpage:
|
||||||
|
timestamp = parse_iso8601(self._html_search_meta('article:published_time', webpage, 'publication time', ))
|
||||||
|
return timestamp
|
||||||
|
|
||||||
|
def get_brand(self, api_data, webpage=None):
|
||||||
|
brand = strip_or_none(api_data.get('brand'))
|
||||||
|
if not brand and webpage:
|
||||||
|
brand = self._og_search_property('site_name', webpage, 'Station name', fatal=False)
|
||||||
|
return brand
|
||||||
|
|
||||||
|
def extract_episode(self, episode_id, api_data):
|
||||||
|
manifestations = api_data.get('manifestations')
|
||||||
|
if manifestations is None or len(manifestations) == 0:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
url = url_or_none(manifestations[0]['url'])
|
||||||
|
duration = int_or_none(manifestations[0].get('duration'))
|
||||||
|
return url, duration
|
||||||
|
|
||||||
|
def get_playlist_entries(self, playlist_url, playlist_id, api_data, direction):
|
||||||
|
playlist_data = api_data['expressions']
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
items = playlist_data.get('items')
|
||||||
|
for item in items:
|
||||||
|
episode_path = item.get('path')
|
||||||
|
if episode_path is None:
|
||||||
|
self.report_warning('No path found for episode "%s"', item.get('title'))
|
||||||
|
continue
|
||||||
|
episode_id = RadioFrancePodcastEpisodeIE._match_id(self._BASE_URL + episode_path)
|
||||||
|
if episode_id is None:
|
||||||
|
self.report_warning('Could not parse id of episode from path: "%s"' % episode_path)
|
||||||
|
continue
|
||||||
|
episode_url, duration = self.extract_episode(episode_id, item)
|
||||||
|
if episode_url is None:
|
||||||
|
self.to_screen('Episode "%s" is not available' % episode_path)
|
||||||
|
continue
|
||||||
|
entry = {
|
||||||
|
'id': episode_id,
|
||||||
|
'url': episode_url,
|
||||||
|
'title': self.get_title(item),
|
||||||
|
'description': self.get_description(item),
|
||||||
|
'timestamp': self.get_timestamp(item),
|
||||||
|
'thumbnail': self.get_thumbnail(item),
|
||||||
|
'duration': duration,
|
||||||
}
|
}
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
|
page_number = int_or_none(playlist_data.get('pageNumber'))
|
||||||
|
if page_number:
|
||||||
|
if direction in ['both', 'prev'] and playlist_data.get('prev') is not None:
|
||||||
|
webpage, other_api_data = self.get_data(playlist_url, 'path', playlist_id, page=page_number - 1)
|
||||||
|
entries = self.get_playlist_entries(playlist_url, playlist_id, other_api_data, direction='prev') + entries
|
||||||
|
if direction in ['both', 'next'] and playlist_data.get('next') is not None:
|
||||||
|
webpage, other_api_data = self.get_data(playlist_url, 'path', playlist_id, page=page_number + 1)
|
||||||
|
entries = entries + self.get_playlist_entries(playlist_url, playlist_id, other_api_data, direction='next')
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
|
def get_data(self, url, api_path, id, page=None):
|
||||||
|
query = {}
|
||||||
|
note = None
|
||||||
|
if page:
|
||||||
|
query['p'] = page
|
||||||
|
note = "Downloading page %i" % page
|
||||||
|
webpage = self._download_webpage(url, id, query=query, note=note)
|
||||||
|
api_data = self.extract_api_data(api_path, id, webpage)
|
||||||
|
return webpage, api_data
|
||||||
|
|
||||||
|
|
||||||
|
class RadioFrancePodcastEpisodeIE(RadioFranceBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.radiofrance\.fr/(?:francemusique|franceinter|franceculture|franceinfo|mouv|fip)/podcasts/.+/.+-(?P<id>\d+)$'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Podcast episode with audio from France Info',
|
||||||
|
'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-brief-eco/le-brief-eco-du-lundi-05-septembre-2022-8310713',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8310713',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'url': r're:^https?://.*\.mp3$',
|
||||||
|
'title': 'Pour la première fois en vingt ans, l’euro passe sous les 0,99\u00a0dollar',
|
||||||
|
'description': str,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': int,
|
||||||
|
'duration': int,
|
||||||
|
'upload_date': str
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'note': 'Podcast episode from France Musique',
|
||||||
|
'url': 'https://www.radiofrance.fr/francemusique/podcasts/allegretto/lever-du-jour-9233228',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'note': 'Podcast episode from FranceInter',
|
||||||
|
'url': 'https://www.radiofrance.fr/franceinter/podcasts/rendez-vous-avec-x/un-mysterieux-echange-digne-de-la-guerre-froide-9343281',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'note': 'Podcast episode from France Culture',
|
||||||
|
'url': 'https://www.radiofrance.fr/franceculture/podcasts/la-science-cqfd/teotihuacan-la-plus-mysterieuse-des-cites-d-or-9224610',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'note': 'Podcast episode from Le Mouv',
|
||||||
|
'url': 'https://www.radiofrance.fr/mouv/podcasts/mouv-dj-la-caution/ncr2a-ne-cherche-rien-d-autre-ailleurs-1197950',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'note': 'Podcast episode from FIP',
|
||||||
|
'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip/hommage-au-cinema-de-vangelis-4734742',
|
||||||
|
'only_matching': True
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
id = self._match_id(url)
|
||||||
video_id = m.group('id')
|
webpage, api_data = self.get_data(url, 'path', id)
|
||||||
|
url, duration = self.extract_episode(id, api_data)
|
||||||
webpage = self._download_webpage(url, video_id)
|
if url is None:
|
||||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
msg = 'Podcast file is not available. If the show is too recent, the file may not have been uploaded yet: try again later.'
|
||||||
description = self._html_search_regex(
|
raise ExtractorError(msg, expected=True, video_id=id)
|
||||||
r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
|
|
||||||
webpage, 'description', fatal=False)
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'<div class="credit"> © (.*?)</div>',
|
|
||||||
webpage, 'uploader', fatal=False)
|
|
||||||
|
|
||||||
formats_str = self._html_search_regex(
|
|
||||||
r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
|
|
||||||
webpage, 'audio URLs')
|
|
||||||
formats = [
|
|
||||||
{
|
|
||||||
'format_id': fm[0],
|
|
||||||
'url': fm[1],
|
|
||||||
'vcodec': 'none',
|
|
||||||
'preference': i,
|
|
||||||
}
|
|
||||||
for i, fm in
|
|
||||||
enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
|
|
||||||
]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': id,
|
||||||
'title': title,
|
'url': url,
|
||||||
'formats': formats,
|
'title': self.get_title(api_data, webpage),
|
||||||
'description': description,
|
'description': self.get_description(api_data, webpage),
|
||||||
'uploader': uploader,
|
'timestamp': self.get_timestamp(api_data, webpage),
|
||||||
|
'thumbnail': self.get_thumbnail(api_data, webpage),
|
||||||
|
'channel_id': self.get_brand(api_data, webpage),
|
||||||
|
'duration': duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RadioFrancePodcastPlaylistIE(RadioFranceBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.radiofrance\.fr/(?:francemusique|franceinter|franceculture|franceinfo|mouv|fip)/podcasts/(?P<id>[^/]+?)(?:[?#].*)?$'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Podcast show with multiple pages of episodes and some of them are missing',
|
||||||
|
'url': 'https://www.radiofrance.fr/franceculture/podcasts/une-semaine-dans-le-monde-10-11?p=2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'une-semaine-dans-le-monde-10-11',
|
||||||
|
'title': 'Une semaine dans le monde | 10-11',
|
||||||
|
'description': str,
|
||||||
|
'timestamp': int
|
||||||
|
},
|
||||||
|
'playlist_count': 23,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
id = self._match_id(url)
|
||||||
|
webpage, api_data = self.get_data(url, 'path', id)
|
||||||
|
|
||||||
|
entries = self.get_playlist_entries(url, id, api_data, direction='both')
|
||||||
|
entries.reverse()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': id,
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'title': self.get_title(api_data, webpage),
|
||||||
|
'description': self.get_description(api_data, webpage),
|
||||||
|
'timestamp': self.get_timestamp(api_data, webpage),
|
||||||
|
'thumbnail': self.get_thumbnail(api_data, webpage),
|
||||||
|
'channel_id': self.get_brand(api_data, webpage),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RadioFranceWebradioIE(RadioFranceBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.radiofrance\.fr/(?:francemusique|franceinter|franceculture|franceinfo|mouv|fip)/(?P<id>radio-[^/]+)$'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list of webradios available at https://www.radiofrance.fr/ecouter-musique',
|
||||||
|
'url': 'https://www.radiofrance.fr/fip/radio-metal',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'radio-metal',
|
||||||
|
'ext': 'aac',
|
||||||
|
'title': str,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'aac',
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def get_livestream_formats(self, id, api_data):
|
||||||
|
sources = api_data['media']['sources']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for source in sources:
|
||||||
|
url = source.get('url')
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
format_id = source.get('format')
|
||||||
|
format = {
|
||||||
|
'url': url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'asr': 48000,
|
||||||
|
'vcodec': 'none'
|
||||||
|
}
|
||||||
|
if format_id == 'mp3':
|
||||||
|
format['preference'] = 1
|
||||||
|
format['acodec'] = 'mp3'
|
||||||
|
format['abr'] = source.get('bitrate')
|
||||||
|
elif format_id == 'aac':
|
||||||
|
format['preference'] = 2
|
||||||
|
format['acodec'] = 'aac'
|
||||||
|
format['abr'] = source.get('bitrate')
|
||||||
|
elif format_id == 'hls':
|
||||||
|
format['preference'] = 0
|
||||||
|
format['manifest_url'] = url
|
||||||
|
formats.append(format)
|
||||||
|
|
||||||
|
if len(formats) == 0:
|
||||||
|
raise ExtractorError('No live streaming URL found')
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
id = self._match_id(url)
|
||||||
|
webpage, api_data = self.get_data(url, 'stations', id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': id,
|
||||||
|
'title': self.get_title(api_data, webpage),
|
||||||
|
'formats': self.get_livestream_formats(id, api_data),
|
||||||
|
'thumbnail': self.get_thumbnail(api_data, webpage),
|
||||||
|
'channel_id': self.get_brand(api_data, webpage),
|
||||||
|
'is_live': True
|
||||||
}
|
}
|
||||||
|
@ -14,6 +14,7 @@ from .utils import (
|
|||||||
remove_quotes,
|
remove_quotes,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
variadic,
|
variadic,
|
||||||
|
write_string,
|
||||||
)
|
)
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
@ -53,15 +54,16 @@ def wraps_op(op):
|
|||||||
|
|
||||||
# NB In principle NaN cannot be checked by membership.
|
# NB In principle NaN cannot be checked by membership.
|
||||||
# Here all NaN values are actually this one, so _NaN is _NaN,
|
# Here all NaN values are actually this one, so _NaN is _NaN,
|
||||||
# although _NaN != _NaN.
|
# although _NaN != _NaN. Ditto Infinity.
|
||||||
|
|
||||||
_NaN = float('nan')
|
_NaN = float('nan')
|
||||||
|
_Infinity = float('inf')
|
||||||
|
|
||||||
|
|
||||||
def _js_bit_op(op):
|
def _js_bit_op(op):
|
||||||
|
|
||||||
def zeroise(x):
|
def zeroise(x):
|
||||||
return 0 if x in (None, JS_Undefined, _NaN) else x
|
return 0 if x in (None, JS_Undefined, _NaN, _Infinity) else x
|
||||||
|
|
||||||
@wraps_op(op)
|
@wraps_op(op)
|
||||||
def wrapped(a, b):
|
def wrapped(a, b):
|
||||||
@ -84,7 +86,7 @@ def _js_arith_op(op):
|
|||||||
def _js_div(a, b):
|
def _js_div(a, b):
|
||||||
if JS_Undefined in (a, b) or not (a or b):
|
if JS_Undefined in (a, b) or not (a or b):
|
||||||
return _NaN
|
return _NaN
|
||||||
return operator.truediv(a or 0, b) if b else float('inf')
|
return operator.truediv(a or 0, b) if b else _Infinity
|
||||||
|
|
||||||
|
|
||||||
def _js_mod(a, b):
|
def _js_mod(a, b):
|
||||||
@ -220,6 +222,42 @@ class LocalNameSpace(ChainMap):
|
|||||||
return 'LocalNameSpace%s' % (self.maps, )
|
return 'LocalNameSpace%s' % (self.maps, )
|
||||||
|
|
||||||
|
|
||||||
|
class Debugger(object):
|
||||||
|
ENABLED = False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def write(*args, **kwargs):
|
||||||
|
level = kwargs.get('level', 100)
|
||||||
|
|
||||||
|
def truncate_string(s, left, right=0):
|
||||||
|
if s is None or len(s) <= left + right:
|
||||||
|
return s
|
||||||
|
return '...'.join((s[:left - 3], s[-right:] if right else ''))
|
||||||
|
|
||||||
|
write_string('[debug] JS: {0}{1}\n'.format(
|
||||||
|
' ' * (100 - level),
|
||||||
|
' '.join(truncate_string(compat_str(x), 50, 50) for x in args)))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def wrap_interpreter(cls, f):
|
||||||
|
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
|
||||||
|
if cls.ENABLED and stmt.strip():
|
||||||
|
cls.write(stmt, level=allow_recursion)
|
||||||
|
try:
|
||||||
|
ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
if cls.ENABLED:
|
||||||
|
if isinstance(e, ExtractorError):
|
||||||
|
e = e.orig_msg
|
||||||
|
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
|
||||||
|
raise
|
||||||
|
if cls.ENABLED and stmt.strip():
|
||||||
|
if should_ret or not repr(ret) == stmt:
|
||||||
|
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
|
||||||
|
return ret, should_ret
|
||||||
|
return interpret_statement
|
||||||
|
|
||||||
|
|
||||||
class JSInterpreter(object):
|
class JSInterpreter(object):
|
||||||
__named_object_counter = 0
|
__named_object_counter = 0
|
||||||
|
|
||||||
@ -307,8 +345,7 @@ class JSInterpreter(object):
|
|||||||
def __op_chars(cls):
|
def __op_chars(cls):
|
||||||
op_chars = set(';,[')
|
op_chars = set(';,[')
|
||||||
for op in cls._all_operators():
|
for op in cls._all_operators():
|
||||||
for c in op[0]:
|
op_chars.update(op[0])
|
||||||
op_chars.add(c)
|
|
||||||
return op_chars
|
return op_chars
|
||||||
|
|
||||||
def _named_object(self, namespace, obj):
|
def _named_object(self, namespace, obj):
|
||||||
@ -326,9 +363,8 @@ class JSInterpreter(object):
|
|||||||
# collections.Counter() is ~10% slower in both 2.7 and 3.9
|
# collections.Counter() is ~10% slower in both 2.7 and 3.9
|
||||||
counters = dict((k, 0) for k in _MATCHING_PARENS.values())
|
counters = dict((k, 0) for k in _MATCHING_PARENS.values())
|
||||||
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
||||||
in_quote, escaping, skipping = None, False, 0
|
in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
|
||||||
after_op, in_regex_char_group = True, False
|
skipping = 0
|
||||||
|
|
||||||
for idx, char in enumerate(expr):
|
for idx, char in enumerate(expr):
|
||||||
paren_delta = 0
|
paren_delta = 0
|
||||||
if not in_quote:
|
if not in_quote:
|
||||||
@ -382,10 +418,12 @@ class JSInterpreter(object):
|
|||||||
return separated[0][1:].strip(), separated[1].strip()
|
return separated[0][1:].strip(), separated[1].strip()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _all_operators():
|
def _all_operators(_cached=[]):
|
||||||
return itertools.chain(
|
if not _cached:
|
||||||
|
_cached.extend(itertools.chain(
|
||||||
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
|
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
|
||||||
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)
|
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
|
||||||
|
return _cached
|
||||||
|
|
||||||
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
||||||
if op in ('||', '&&'):
|
if op in ('||', '&&'):
|
||||||
@ -416,7 +454,7 @@ class JSInterpreter(object):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
if allow_undefined:
|
if allow_undefined:
|
||||||
return JS_Undefined
|
return JS_Undefined
|
||||||
raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e)
|
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
|
||||||
|
|
||||||
def _dump(self, obj, namespace):
|
def _dump(self, obj, namespace):
|
||||||
try:
|
try:
|
||||||
@ -438,6 +476,7 @@ class JSInterpreter(object):
|
|||||||
_FINALLY_RE = re.compile(r'finally\s*\{')
|
_FINALLY_RE = re.compile(r'finally\s*\{')
|
||||||
_SWITCH_RE = re.compile(r'switch\s*\(')
|
_SWITCH_RE = re.compile(r'switch\s*\(')
|
||||||
|
|
||||||
|
@Debugger.wrap_interpreter
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||||
if allow_recursion < 0:
|
if allow_recursion < 0:
|
||||||
raise self.Exception('Recursion limit reached')
|
raise self.Exception('Recursion limit reached')
|
||||||
@ -511,7 +550,6 @@ class JSInterpreter(object):
|
|||||||
expr = self._dump(inner, local_vars) + outer
|
expr = self._dump(inner, local_vars) + outer
|
||||||
|
|
||||||
if expr.startswith('('):
|
if expr.startswith('('):
|
||||||
|
|
||||||
m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
|
m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
|
||||||
if m:
|
if m:
|
||||||
# short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
|
# short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
|
||||||
@ -693,7 +731,7 @@ class JSInterpreter(object):
|
|||||||
(?P<op>{_OPERATOR_RE})?
|
(?P<op>{_OPERATOR_RE})?
|
||||||
=(?!=)(?P<expr>.*)$
|
=(?!=)(?P<expr>.*)$
|
||||||
)|(?P<return>
|
)|(?P<return>
|
||||||
(?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
|
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
|
||||||
)|(?P<indexing>
|
)|(?P<indexing>
|
||||||
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
|
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
|
||||||
)|(?P<attribute>
|
)|(?P<attribute>
|
||||||
@ -727,11 +765,12 @@ class JSInterpreter(object):
|
|||||||
raise JS_Break()
|
raise JS_Break()
|
||||||
elif expr == 'continue':
|
elif expr == 'continue':
|
||||||
raise JS_Continue()
|
raise JS_Continue()
|
||||||
|
|
||||||
elif expr == 'undefined':
|
elif expr == 'undefined':
|
||||||
return JS_Undefined, should_return
|
return JS_Undefined, should_return
|
||||||
elif expr == 'NaN':
|
elif expr == 'NaN':
|
||||||
return _NaN, should_return
|
return _NaN, should_return
|
||||||
|
elif expr == 'Infinity':
|
||||||
|
return _Infinity, should_return
|
||||||
|
|
||||||
elif md.get('return'):
|
elif md.get('return'):
|
||||||
return local_vars[m.group('name')], should_return
|
return local_vars[m.group('name')], should_return
|
||||||
@ -760,17 +799,27 @@ class JSInterpreter(object):
|
|||||||
right_expr = separated.pop()
|
right_expr = separated.pop()
|
||||||
# handle operators that are both unary and binary, minimal BODMAS
|
# handle operators that are both unary and binary, minimal BODMAS
|
||||||
if op in ('+', '-'):
|
if op in ('+', '-'):
|
||||||
|
# simplify/adjust consecutive instances of these operators
|
||||||
undone = 0
|
undone = 0
|
||||||
while len(separated) > 1 and not separated[-1].strip():
|
while len(separated) > 1 and not separated[-1].strip():
|
||||||
undone += 1
|
undone += 1
|
||||||
separated.pop()
|
separated.pop()
|
||||||
if op == '-' and undone % 2 != 0:
|
if op == '-' and undone % 2 != 0:
|
||||||
right_expr = op + right_expr
|
right_expr = op + right_expr
|
||||||
|
elif op == '+':
|
||||||
|
while len(separated) > 1 and separated[-1].strip() in self.OP_CHARS:
|
||||||
|
right_expr = separated.pop() + right_expr
|
||||||
|
# hanging op at end of left => unary + (strip) or - (push right)
|
||||||
left_val = separated[-1]
|
left_val = separated[-1]
|
||||||
for dm_op in ('*', '%', '/', '**'):
|
for dm_op in ('*', '%', '/', '**'):
|
||||||
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
|
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
|
||||||
if len(bodmas) > 1 and not bodmas[-1].strip():
|
if len(bodmas) > 1 and not bodmas[-1].strip():
|
||||||
expr = op.join(separated) + op + right_expr
|
expr = op.join(separated) + op + right_expr
|
||||||
|
if len(separated) > 1:
|
||||||
|
separated.pop()
|
||||||
|
right_expr = op.join((left_val, right_expr))
|
||||||
|
else:
|
||||||
|
separated = [op.join((left_val, right_expr))]
|
||||||
right_expr = None
|
right_expr = None
|
||||||
break
|
break
|
||||||
if right_expr is None:
|
if right_expr is None:
|
||||||
@ -797,6 +846,8 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
def eval_method():
|
def eval_method():
|
||||||
if (variable, member) == ('console', 'debug'):
|
if (variable, member) == ('console', 'debug'):
|
||||||
|
if Debugger.ENABLED:
|
||||||
|
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
|
||||||
return
|
return
|
||||||
types = {
|
types = {
|
||||||
'String': compat_str,
|
'String': compat_str,
|
||||||
|
@ -2406,7 +2406,7 @@ class ExtractorError(YoutubeDLError):
|
|||||||
""" tb, if given, is the original traceback (so that it can be printed out).
|
""" tb, if given, is the original traceback (so that it can be printed out).
|
||||||
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
|
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
|
||||||
"""
|
"""
|
||||||
|
self.orig_msg = msg
|
||||||
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
|
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
|
||||||
expected = True
|
expected = True
|
||||||
if video_id is not None:
|
if video_id is not None:
|
||||||
|
Loading…
Reference in New Issue
Block a user