From 3918da628d1848ed3ec0fe98c242b5c25defd6b1 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 24 Feb 2022 23:52:47 +0000 Subject: [PATCH] Extract further fields; implement some review comments --- youtube_dl/extractor/teleportal.py | 83 ++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/teleportal.py b/youtube_dl/extractor/teleportal.py index 6c976e7f6..3becc2648 100644 --- a/youtube_dl/extractor/teleportal.py +++ b/youtube_dl/extractor/teleportal.py @@ -2,39 +2,88 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + clean_html, + float_or_none, + parse_iso8601, + str_or_none, + str_to_int, + try_get, + url_or_none, +) class TeleportalIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?teleportal\.ua(/ua)?/(?P[0-9a-z-/]+)' + _VALID_URL = r'https?://(?:www\.)?teleportal\.ua/(?:ua/)?(?P[0-9a-z-]+(?:/[0-9a-z-]+)*)' _TEST = { 'url': 'https://teleportal.ua/ua/show/stb/master-cheff/bitva-sezonov/vypusk-3', - 'md5': '07bd056c45b515fa9cc0202b8403df41', + # no permanent check on file contents as HLS may vary 'info_dict': { 'id': 'show/stb/master-cheff/bitva-sezonov/vypusk-3', 'ext': 'mp4', 'title': 'МастерШеф. Битва сезонів 3 випуск: найогидніший випуск сезону!', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': r're:^

Не пропустіть.*', - } + 'display_id': '2618466', + 'description': 'md5:4179bcc3a12edfa2f655888cd741ac09', + 'timestamp': 1644102480, + 'upload_date': '20220205', + 'thumbnail': r're:^https?://.+\.jpg$', + 'release_timestamp': 1643994000, + 'duration': 11254.0, + 'series_id': '20632', + 'series': 'МастерШеф. Битва сезонів 3 випуск: найогидніший випуск сезону!', + 'season': 'Битва сезонів', + 'episode': 'Найогидніший випуск сезону!', + 'episode_num': 3, + 'categories': ['Шоу'], + }, + 'params': { + 'hls_prefer_native': True, + # 'skip_download': True, + }, } def _real_extract(self, url): video_id = self._match_id(url) - backend_url = 'https://tp-back.starlight.digital/ua/{}'.format(video_id) - metadata = self._download_json(backend_url, video_id) - api_metadata = self._download_json('https://vcms-api2.starlight.digital/player-api/{}?referer=https://teleportal.ua/&lang=ua'.format(metadata['hash']), video_id) + backend_url = 'https://tp-back.starlight.digital/ua/' + video_id + series_metadata = self._download_json(backend_url, video_id) or {} + title = series_metadata['title'] + _hash = series_metadata.get('hash', '') + api_url = 'https://vcms-api2.starlight.digital/player-api/' + _hash + api_metadata = self._download_json( + api_url, video_id, + query={ + 'referer': 'https://teleportal.ua/', + 'lang': 'ua', + } + ) + video_info = api_metadata['video'][0] + formats = [] + for media in ('mediaHlsNoAdv', 'mediaHls'): + media = url_or_none(try_get(video_info, lambda x: x[media])) + if not media: + continue + formats.extend(self._extract_m3u8_formats(media, video_id, 'mp4', fatal=False)) + break + self._sort_formats(formats) - try: - thumbnail = api_metadata['video'][0]['poster'] - except (KeyError, IndexError): - thumbnail = None + thumbnail = url_or_none(video_info.get('poster')) + category = series_metadata.get('typeTitle') return { 'id': video_id, - 'title': metadata.get('title'), - 'description': metadata.get('description'), - 'real_id': metadata.get('id'), - 'hash': metadata.get('hash'), + 'title': title, + 'formats': formats, + 'description': clean_html(series_metadata.get('description')) or series_metadata.get('seoDescription'), + 'display_id': str_or_none(video_info.get('vcmsId')), + 'hash': _hash, 'thumbnail': thumbnail, - 'formats': self._extract_m3u8_formats(api_metadata['video'][0]['mediaHls'], video_id, 'mp4'), + 'timestamp': parse_iso8601(video_info.get('time_upload_video'), delimiter=' '), + 'release_timestamp': parse_iso8601(video_info.get('publishDate'), delimiter=' '), + 'duration': float_or_none(video_info.get('duration')), + 'series_id': str_or_none(series_metadata.get('id')), + 'series': series_metadata.get('title'), + 'season': video_info.get('seasonName') or series_metadata.get('seasonGallery', {}).get('title'), + 'episode': video_info.get('name'), + 'episode_num': str_to_int(series_metadata.get('seriesTitle')), + 'categories': [category] if category else None, }