Improve metadata extraction

This commit is contained in:
df 2021-09-13 14:53:58 +01:00
parent 91557e752c
commit 666a963046

View File

@ -3,10 +3,13 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
bool_or_none,
extract_attributes,
int_or_none,
smuggle_url,
try_get,
unified_timestamp,
url_or_none,
)
@ -24,17 +27,20 @@ class SlidesLiveIE(InfoExtractor):
'description': 'Watch full version of this video at https://slideslive.com/38902413.',
'uploader': 'SlidesLive Videos - A',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
'timestamp': 1597615266,
'timestamp': 1618809663,
'upload_date': '20170925',
}
}, {
# video_service_name = yoda
'url': 'https://slideslive.com/38935785',
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a', # d735b130beb40013a839de1c58a74689
'info_dict': {
'id': 'RMraDYN5ozA_',
'id': 'F31OTzeGyDK_',
'display_id': '38935785',
'ext': 'mp4',
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
'upload_date': '20210220',
'timestamp': 1613785940,
},
'params': {
'format': 'bestvideo',
@ -82,12 +88,23 @@ class SlidesLiveIE(InfoExtractor):
})
info = {
'id': video_id,
'thumbnail': video_data.get('thumbnail'),
'thumbnail': video_data.get(
'thumbnail',
self._html_search_meta(('thumbnailUrl', 'thumbnailURL'), webpage)),
'is_live': bool_or_none(video_data.get('is_live')),
'subtitles': subtitles,
'timestamp': (
int_or_none(video_data.get('updated_at'))
or unified_timestamp(
self._html_search_meta('uploadDate', webpage))),
'creator': self._og_search_property('author', webpage, fatal=False),
}
title = (
video_data.get('title')
or self._html_search_meta('name', webpage, display_name='meta title')
or self._og_search_title(webpage, fatal=False))
if service_name in ('url', 'yoda'):
info['title'] = video_data['title']
info['title'] = title or video_data['title']
if service_name == 'url':
info['url'] = service_id
else:
@ -103,6 +120,7 @@ class SlidesLiveIE(InfoExtractor):
self._sort_formats(formats)
info.update({
'id': service_id,
'display_id': video_id,
'formats': formats,
})
else:
@ -110,7 +128,7 @@ class SlidesLiveIE(InfoExtractor):
'_type': 'url_transparent',
'url': service_id,
'ie_key': service_name.capitalize(),
'title': video_data.get('title'),
'title': title,
})
if service_name == 'vimeo':
info['url'] = smuggle_url(