[extractor/common] Expose fragments interface for dashsegments formats
This commit is contained in:
parent
a0d5077c8d
commit
b4c1d6e800
@ -1551,21 +1551,12 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def extract_multisegment_info(element, ms_parent_info):
|
def extract_multisegment_info(element, ms_parent_info):
|
||||||
ms_info = ms_parent_info.copy()
|
ms_info = ms_parent_info.copy()
|
||||||
segment_list = element.find(_add_ns('SegmentList'))
|
|
||||||
if segment_list is not None:
|
# As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
|
||||||
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
# common attributes and elements. We will only extract relevant
|
||||||
if segment_urls_e:
|
# for us.
|
||||||
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
def extract_common(source):
|
||||||
initialization = segment_list.find(_add_ns('Initialization'))
|
segment_timeline = source.find(_add_ns('SegmentTimeline'))
|
||||||
if initialization is not None:
|
|
||||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
|
||||||
else:
|
|
||||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
|
||||||
if segment_template is not None:
|
|
||||||
start_number = segment_template.get('startNumber')
|
|
||||||
if start_number:
|
|
||||||
ms_info['start_number'] = int(start_number)
|
|
||||||
segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
|
|
||||||
if segment_timeline is not None:
|
if segment_timeline is not None:
|
||||||
s_e = segment_timeline.findall(_add_ns('S'))
|
s_e = segment_timeline.findall(_add_ns('S'))
|
||||||
if s_e:
|
if s_e:
|
||||||
@ -1580,13 +1571,32 @@ class InfoExtractor(object):
|
|||||||
'd': int(s.attrib['d']),
|
'd': int(s.attrib['d']),
|
||||||
'r': r,
|
'r': r,
|
||||||
})
|
})
|
||||||
else:
|
start_number = source.get('startNumber')
|
||||||
timescale = segment_template.get('timescale')
|
if start_number:
|
||||||
|
ms_info['start_number'] = int(start_number)
|
||||||
|
timescale = source.get('timescale')
|
||||||
if timescale:
|
if timescale:
|
||||||
ms_info['timescale'] = int(timescale)
|
ms_info['timescale'] = int(timescale)
|
||||||
segment_duration = segment_template.get('duration')
|
segment_duration = source.get('duration')
|
||||||
if segment_duration:
|
if segment_duration:
|
||||||
ms_info['segment_duration'] = int(segment_duration)
|
ms_info['segment_duration'] = int(segment_duration)
|
||||||
|
|
||||||
|
def extract_Initialization(source):
|
||||||
|
initialization = source.find(_add_ns('Initialization'))
|
||||||
|
if initialization is not None:
|
||||||
|
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||||
|
|
||||||
|
segment_list = element.find(_add_ns('SegmentList'))
|
||||||
|
if segment_list is not None:
|
||||||
|
extract_common(segment_list)
|
||||||
|
extract_Initialization(segment_list)
|
||||||
|
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
||||||
|
if segment_urls_e:
|
||||||
|
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
||||||
|
else:
|
||||||
|
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||||
|
if segment_template is not None:
|
||||||
|
extract_common(segment_template)
|
||||||
media_template = segment_template.get('media')
|
media_template = segment_template.get('media')
|
||||||
if media_template:
|
if media_template:
|
||||||
ms_info['media_template'] = media_template
|
ms_info['media_template'] = media_template
|
||||||
@ -1594,11 +1604,14 @@ class InfoExtractor(object):
|
|||||||
if initialization:
|
if initialization:
|
||||||
ms_info['initialization_url'] = initialization
|
ms_info['initialization_url'] = initialization
|
||||||
else:
|
else:
|
||||||
initialization = segment_template.find(_add_ns('Initialization'))
|
extract_Initialization(segment_template)
|
||||||
if initialization is not None:
|
|
||||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
|
||||||
return ms_info
|
return ms_info
|
||||||
|
|
||||||
|
def combine_url(base_url, target_url):
|
||||||
|
if re.match(r'^https?://', target_url):
|
||||||
|
return target_url
|
||||||
|
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||||
|
|
||||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||||
formats = []
|
formats = []
|
||||||
for period in mpd_doc.findall(_add_ns('Period')):
|
for period in mpd_doc.findall(_add_ns('Period')):
|
||||||
@ -1655,9 +1668,7 @@ class InfoExtractor(object):
|
|||||||
}
|
}
|
||||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
|
||||||
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
|
|
||||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
|
||||||
media_template = representation_ms_info['media_template']
|
media_template = representation_ms_info['media_template']
|
||||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||||
@ -1666,7 +1677,11 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||||
# can't be used at the same time
|
# can't be used at the same time
|
||||||
if '%(Number' in media_template:
|
if '%(Number' in media_template and 's' not in representation_ms_info:
|
||||||
|
segment_duration = None
|
||||||
|
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||||
|
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||||
|
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||||
representation_ms_info['segment_urls'] = [
|
representation_ms_info['segment_urls'] = [
|
||||||
media_template % {
|
media_template % {
|
||||||
'Number': segment_number,
|
'Number': segment_number,
|
||||||
@ -1675,28 +1690,65 @@ class InfoExtractor(object):
|
|||||||
for segment_number in range(
|
for segment_number in range(
|
||||||
representation_ms_info['start_number'],
|
representation_ms_info['start_number'],
|
||||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||||
|
representation_ms_info['fragments'] = [{
|
||||||
|
'url': media_template % {
|
||||||
|
'Number': segment_number,
|
||||||
|
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||||
|
},
|
||||||
|
'duration': segment_duration,
|
||||||
|
} for segment_number in range(
|
||||||
|
representation_ms_info['start_number'],
|
||||||
|
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||||
else:
|
else:
|
||||||
|
# $Number*$ or $Time$ in media template with S list available
|
||||||
|
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
|
||||||
|
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
|
||||||
representation_ms_info['segment_urls'] = []
|
representation_ms_info['segment_urls'] = []
|
||||||
|
representation_ms_info['fragments'] = []
|
||||||
segment_time = 0
|
segment_time = 0
|
||||||
|
segment_d = None
|
||||||
|
segment_number = representation_ms_info['start_number']
|
||||||
|
|
||||||
def add_segment_url():
|
def add_segment_url():
|
||||||
representation_ms_info['segment_urls'].append(
|
segment_url = media_template % {
|
||||||
media_template % {
|
|
||||||
'Time': segment_time,
|
'Time': segment_time,
|
||||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||||
|
'Number': segment_number,
|
||||||
}
|
}
|
||||||
)
|
representation_ms_info['segment_urls'].append(segment_url)
|
||||||
|
representation_ms_info['fragments'].append({
|
||||||
|
'url': segment_url,
|
||||||
|
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
||||||
|
})
|
||||||
|
|
||||||
for num, s in enumerate(representation_ms_info['s']):
|
for num, s in enumerate(representation_ms_info['s']):
|
||||||
segment_time = s.get('t') or segment_time
|
segment_time = s.get('t') or segment_time
|
||||||
|
segment_d = s['d']
|
||||||
add_segment_url()
|
add_segment_url()
|
||||||
|
segment_number += 1
|
||||||
for r in range(s.get('r', 0)):
|
for r in range(s.get('r', 0)):
|
||||||
segment_time += s['d']
|
segment_time += segment_d
|
||||||
add_segment_url()
|
add_segment_url()
|
||||||
segment_time += s['d']
|
segment_number += 1
|
||||||
|
segment_time += segment_d
|
||||||
|
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
|
||||||
|
# No media template
|
||||||
|
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||||
|
# or any YouTube dashsegments video
|
||||||
|
fragments = []
|
||||||
|
s_num = 0
|
||||||
|
for segment_url in representation_ms_info['segment_urls']:
|
||||||
|
s = representation_ms_info['s'][s_num]
|
||||||
|
for r in range(s.get('r', 0) + 1):
|
||||||
|
fragments.append({
|
||||||
|
'url': segment_url,
|
||||||
|
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
|
||||||
|
})
|
||||||
|
representation_ms_info['fragments'] = fragments
|
||||||
if 'segment_urls' in representation_ms_info:
|
if 'segment_urls' in representation_ms_info:
|
||||||
f.update({
|
f.update({
|
||||||
'segment_urls': representation_ms_info['segment_urls'],
|
'segment_urls': representation_ms_info['segment_urls'],
|
||||||
|
'fragments': [],
|
||||||
'protocol': 'http_dash_segments',
|
'protocol': 'http_dash_segments',
|
||||||
})
|
})
|
||||||
if 'initialization_url' in representation_ms_info:
|
if 'initialization_url' in representation_ms_info:
|
||||||
@ -1706,6 +1758,10 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
if not f.get('url'):
|
if not f.get('url'):
|
||||||
f['url'] = initialization_url
|
f['url'] = initialization_url
|
||||||
|
f['fragments'].append({'url': initialization_url})
|
||||||
|
f['fragments'].extend(representation_ms_info['fragments'])
|
||||||
|
for fragment in f['fragments']:
|
||||||
|
fragment['url'] = combine_url(base_url, fragment['url'])
|
||||||
try:
|
try:
|
||||||
existing_format = next(
|
existing_format = next(
|
||||||
fo for fo in formats
|
fo for fo in formats
|
||||||
|
Loading…
Reference in New Issue
Block a user