diff --git a/test/helper.py b/test/helper.py index fc55c6b46..5b7e3dfe2 100644 --- a/test/helper.py +++ b/test/helper.py @@ -142,7 +142,7 @@ def expect_value(self, got, expected, field): self.assertTrue( contains_str in got, 'field %s (value: %r) should contain %r' % (field, got, contains_str)) - elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected): + elif isinstance(expected, compat_str) and re.match(r'lambda \w+:', expected): fn = eval(expected) suite = expected.split(':', 1)[1].strip() self.assertTrue( diff --git a/test/test_utils.py b/test/test_utils.py index fdae1f744..102420fcb 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -62,13 +62,14 @@ from youtube_dl.utils import ( OnDemandPagedList, orderedSet, parse_age_limit, + parse_bitrate, parse_duration, parse_filesize, parse_codecs, parse_count, parse_iso8601, parse_resolution, - parse_bitrate, + parse_qs, pkcs1pad, prepend_extension, read_batch_urls, @@ -125,7 +126,6 @@ from youtube_dl.compat import ( compat_setenv, compat_str, compat_urlparse, - compat_parse_qs, ) @@ -683,38 +683,36 @@ class TestUtil(unittest.TestCase): self.assertTrue(isinstance(data, bytes)) def test_update_url_query(self): - def query_dict(url): - return compat_parse_qs(compat_urlparse.urlparse(url).query) - self.assertEqual(query_dict(update_url_query( + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})), - query_dict('http://example.com/path?quality=HD&format=mp4')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?quality=HD&format=mp4')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})), - query_dict('http://example.com/path?system=LINUX&system=WINDOWS')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?system=LINUX&system=WINDOWS')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': 'id,formats,subtitles'})), - query_dict('http://example.com/path?fields=id,formats,subtitles')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?fields=id,formats,subtitles')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})), - query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path?manifest=f4m', {'manifest': []})), - query_dict('http://example.com/path')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})), - query_dict('http://example.com/path?system=LINUX')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?system=LINUX')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': b'id,formats,subtitles'})), - query_dict('http://example.com/path?fields=id,formats,subtitles')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?fields=id,formats,subtitles')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'width': 1080, 'height': 720})), - query_dict('http://example.com/path?width=1080&height=720')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?width=1080&height=720')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'bitrate': 5020.43})), - query_dict('http://example.com/path?bitrate=5020.43')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?bitrate=5020.43')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'test': '第二行тест'})), - query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) + parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) def test_multipart_encode(self): self.assertEqual( diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index afb4ee33d..91e691776 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -96,7 +96,7 @@ class FileDownloader(object): return None return int(float(remaining) / rate) start, now = (start_or_rate, now_or_remaining) - total, current = args + total, current = args[:2] if total is None: return None if now is None: diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 28a49b9e8..3cad87420 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -294,7 +294,7 @@ class HttpFD(FileDownloader): # Progress message speed = self.calc_speed(start, now, byte_counter - ctx.resume_len) - eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - ctx.resume_len)) + eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - byte_counter)) self._hook_progress({ 'status': 'downloading', diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9d335c96a..59c6fbc2c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1087,7 +1087,10 @@ from .rutube import ( from .rutv import RUTVIE from .ruutu import RuutuIE from .ruv import RuvIE -from .s4c import S4CIE +from .s4c import ( + S4CIE, + S4CSeriesIE, +) from .safari import ( SafariIE, SafariApiIE, diff --git a/youtube_dl/extractor/s4c.py b/youtube_dl/extractor/s4c.py index 21d40c2d3..b152e6680 100644 --- a/youtube_dl/extractor/s4c.py +++ b/youtube_dl/extractor/s4c.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals +from functools import partial as partial_f + from .common import InfoExtractor from ..utils import ( float_or_none, @@ -9,6 +11,7 @@ from ..utils import ( T, traverse_obj, txt_or_none, + url_or_none, ) @@ -21,7 +24,8 @@ class S4CIE(InfoExtractor): 'ext': 'mp4', 'title': 'Y Swn', 'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0', - 'duration': 5340 + 'duration': 5340, + 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg', }, }, { 'url': 'https://www.s4c.cymru/clic/programme/856636948', @@ -31,6 +35,7 @@ class S4CIE(InfoExtractor): 'title': 'Am Dro', 'duration': 2880, 'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe', + 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg', }, }] @@ -43,7 +48,7 @@ class S4CIE(InfoExtractor): 'programme_id': video_id, }, fatal=False) - filename = self._download_json( + player_config = self._download_json( 'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={ 'programme_id': video_id, 'signed': '0', @@ -51,7 +56,8 @@ class S4CIE(InfoExtractor): 'mode': 'od', 'appId': 'clic', 'streamName': '', - }, note='Downloading player config JSON')['filename'] + }, note='Downloading player config JSON') + m3u8_url = self._download_json( 'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={ 'mode': 'od', @@ -59,18 +65,60 @@ class S4CIE(InfoExtractor): 'region': 'WW', 'extra': 'false', 'thirdParty': 'false', - 'filename': filename, + 'filename': player_config['filename'], }, note='Downloading streaming urls JSON')['hls'] - # ... self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') - formats, subtitles = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'), {} + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native') + self._sort_formats(formats) + + subtitles = {} + for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))): + subtitles.setdefault(sub.get('3', 'en'), []).append({ + 'url': sub['0'], + 'name': sub.get('1'), + }) return merge_dicts({ 'id': video_id, 'formats': formats, 'subtitles': subtitles, + 'thumbnail': url_or_none(player_config.get('poster')), }, traverse_obj(details, ('full_prog_details', 0, { 'title': (('programme_title', 'series_title'), T(txt_or_none)), 'description': ('full_billing', T(txt_or_none)), - 'duration': ('duration', T(lambda x: float_or_none(x, invscale=60))), + 'duration': ('duration', T(partial_f(float_or_none, invscale=60))), }), get_all=False), rev=True) + + +class S4CSeriesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.s4c.cymru/clic/series/864982911', + 'playlist_mincount': 6, + 'info_dict': { + 'id': '864982911', + 'title': 'Iaith ar Daith', + }, + }, { + 'url': 'https://www.s4c.cymru/clic/series/866852587', + 'playlist_mincount': 8, + 'info_dict': { + 'id': '866852587', + 'title': 'FFIT Cymru', + }, + }] + + def _real_extract(self, url): + series_id = self._match_id(url) + series_details = self._download_json( + 'https://www.s4c.cymru/df/series_details', series_id, query={ + 'lang': 'e', + 'series_id': series_id, + 'show_prog_in_series': 'Y' + }, note='Downloading series details JSON') + + return self.playlist_result( + (self.url_result('https://www.s4c.cymru/clic/programme/' + episode_id, S4CIE, episode_id) + for episode_id in traverse_obj(series_details, ('other_progs_in_series', Ellipsis, 'id'))), + playlist_id=series_id, playlist_title=traverse_obj( + series_details, ('full_prog_details', 0, 'series_title', T(txt_or_none)))) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 81ff78807..443d2609c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4248,6 +4248,7 @@ def update_url(url, **kwargs): url: compat_str or parsed URL tuple if query_update is in kwargs, update query with its value instead of replacing (overrides any `query`) + NB: query_update expects parse_qs() format: [key: value_list, ...] returns: compat_str """ if not kwargs: