Improve metadata extraction
This commit is contained in:
parent
91557e752c
commit
666a963046
@ -3,10 +3,13 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
bool_or_none,
|
bool_or_none,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -24,17 +27,20 @@ class SlidesLiveIE(InfoExtractor):
|
|||||||
'description': 'Watch full version of this video at https://slideslive.com/38902413.',
|
'description': 'Watch full version of this video at https://slideslive.com/38902413.',
|
||||||
'uploader': 'SlidesLive Videos - A',
|
'uploader': 'SlidesLive Videos - A',
|
||||||
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
||||||
'timestamp': 1597615266,
|
'timestamp': 1618809663,
|
||||||
'upload_date': '20170925',
|
'upload_date': '20170925',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# video_service_name = yoda
|
# video_service_name = yoda
|
||||||
'url': 'https://slideslive.com/38935785',
|
'url': 'https://slideslive.com/38935785',
|
||||||
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
|
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a', # d735b130beb40013a839de1c58a74689
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'RMraDYN5ozA_',
|
'id': 'F31OTzeGyDK_',
|
||||||
|
'display_id': '38935785',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
|
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
|
||||||
|
'upload_date': '20210220',
|
||||||
|
'timestamp': 1613785940,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'format': 'bestvideo',
|
'format': 'bestvideo',
|
||||||
@ -82,12 +88,23 @@ class SlidesLiveIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'thumbnail': video_data.get('thumbnail'),
|
'thumbnail': video_data.get(
|
||||||
|
'thumbnail',
|
||||||
|
self._html_search_meta(('thumbnailUrl', 'thumbnailURL'), webpage)),
|
||||||
'is_live': bool_or_none(video_data.get('is_live')),
|
'is_live': bool_or_none(video_data.get('is_live')),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'timestamp': (
|
||||||
|
int_or_none(video_data.get('updated_at'))
|
||||||
|
or unified_timestamp(
|
||||||
|
self._html_search_meta('uploadDate', webpage))),
|
||||||
|
'creator': self._og_search_property('author', webpage, fatal=False),
|
||||||
}
|
}
|
||||||
|
title = (
|
||||||
|
video_data.get('title')
|
||||||
|
or self._html_search_meta('name', webpage, display_name='meta title')
|
||||||
|
or self._og_search_title(webpage, fatal=False))
|
||||||
if service_name in ('url', 'yoda'):
|
if service_name in ('url', 'yoda'):
|
||||||
info['title'] = video_data['title']
|
info['title'] = title or video_data['title']
|
||||||
if service_name == 'url':
|
if service_name == 'url':
|
||||||
info['url'] = service_id
|
info['url'] = service_id
|
||||||
else:
|
else:
|
||||||
@ -103,6 +120,7 @@ class SlidesLiveIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
info.update({
|
info.update({
|
||||||
'id': service_id,
|
'id': service_id,
|
||||||
|
'display_id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
@ -110,7 +128,7 @@ class SlidesLiveIE(InfoExtractor):
|
|||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': service_id,
|
'url': service_id,
|
||||||
'ie_key': service_name.capitalize(),
|
'ie_key': service_name.capitalize(),
|
||||||
'title': video_data.get('title'),
|
'title': title,
|
||||||
})
|
})
|
||||||
if service_name == 'vimeo':
|
if service_name == 'vimeo':
|
||||||
info['url'] = smuggle_url(
|
info['url'] = smuggle_url(
|
||||||
|
Loading…
Reference in New Issue
Block a user