From 911e22f9705c74cc4945c92824a6be8b47776017 Mon Sep 17 00:00:00 2001 From: Antoni Segura Puimedon Date: Mon, 31 Oct 2022 09:25:37 +0100 Subject: [PATCH 1/3] ccma fix date parsing This PR fixes the date to the way ccma provides it as well as fixing some broken tests closes #30961 Signed-off-by: Antoni Segura Puimedon --- youtube_dl/extractor/ccma.py | 41 ++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index e6ae49352..7533f96e7 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -1,16 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import calendar -import datetime import re from .common import InfoExtractor from ..utils import ( clean_html, - extract_timezone, int_or_none, parse_duration, + parse_iso8601, parse_resolution, try_get, url_or_none, @@ -25,7 +23,7 @@ class CCMAIE(InfoExtractor): 'info_dict': { 'id': '5630208', 'ext': 'mp4', - 'title': 'L\'espot de La Marató de TV3', + 'title': "L'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques", 'description': 'md5:f12987f320e2f6e988e9908e4fe97765', 'timestamp': 1478608140, 'upload_date': '20161108', @@ -39,8 +37,8 @@ class CCMAIE(InfoExtractor): 'ext': 'mp3', 'title': 'El Consell de Savis analitza el derbi', 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53', - 'upload_date': '20170512', - 'timestamp': 1494622500, + 'upload_date': '20161217', + 'timestamp': 1482011700, 'vcodec': 'none', 'categories': ['Esports'], } @@ -50,14 +48,29 @@ class CCMAIE(InfoExtractor): 'info_dict': { 'id': '6031387', 'ext': 'mp4', - 'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)', + 'title': 'Josep Talleda, l\'"Espereu-me" (part 1)', 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60', - 'timestamp': 1582577700, + 'timestamp': 1582577919, 'upload_date': '20200224', - 'subtitles': 'mincount:4', + 'subtitles': 'mincount:1', 'age_limit': 16, 'series': 'Crims', } + }, { + 'url': 'https://www.ccma.cat/tv3/sx3/la-tria-final/video/6178889/', + 'md5': '835aecbda55ba7b70d147081cf4b61f2', + 'info_dict': { + 'id': '6178889', + 'ext': 'mp4', + 'title': 'La tria final (T1 - Capítol 4)', + 'description': 'md5:7a66ef9802af0e44ce54304bb9b61eba', + 'timestamp': 1665687816, + 'episode_number': 4, + 'upload_date': '20221013', + 'categories': ['Ficció'], + 'age_limit': 12, + 'series': 'Guardians de la nit: Kimetsu no Yaiba', + } }] def _real_extract(self, url): @@ -95,15 +108,7 @@ class CCMAIE(InfoExtractor): durada = informacio.get('durada') or {} duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text')) tematica = try_get(informacio, lambda x: x['tematica']['text']) - - timestamp = None - data_utc = try_get(informacio, lambda x: x['data_emissio']['utc']) - try: - timezone, data_utc = extract_timezone(data_utc) - timestamp = calendar.timegm((datetime.datetime.strptime( - data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple()) - except TypeError: - pass + timestamp = parse_iso8601(try_get(informacio, lambda x: x['data_emissio']['utc'])) subtitles = {} subtitols = media.get('subtitols') or [] From cb2d26c472cc5afac2198b53168086a9e2778f5b Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 31 Oct 2022 13:39:55 +0000 Subject: [PATCH 2/3] Defend against non-string date value. --- youtube_dl/extractor/ccma.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index 7533f96e7..0dd0bd1cb 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -4,6 +4,8 @@ from __future__ import unicode_literals import re from .common import InfoExtractor + +from .compat import compat_str from ..utils import ( clean_html, int_or_none, @@ -108,7 +110,7 @@ class CCMAIE(InfoExtractor): durada = informacio.get('durada') or {} duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text')) tematica = try_get(informacio, lambda x: x['tematica']['text']) - timestamp = parse_iso8601(try_get(informacio, lambda x: x['data_emissio']['utc'])) + timestamp = parse_iso8601(try_get(informacio, lambda x: x['data_emissio']['utc']), compat_str) subtitles = {} subtitols = media.get('subtitols') or [] From 9b828daa1a5bea7fe1dde12cb5963d223b800e94 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 31 Oct 2022 13:42:54 +0000 Subject: [PATCH 3/3] Correct ..compat --- youtube_dl/extractor/ccma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index 0dd0bd1cb..a37104ef0 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor -from .compat import compat_str +from ..compat import compat_str from ..utils import ( clean_html, int_or_none,