Compare commits

...

6 Commits

Author SHA1 Message Date
Josef Bohórquez
3432f0a00a
Merge c85000591b into 37cea84f77 2024-07-02 14:28:59 -04:00
dirkf
37cea84f77 [core,utils] Support unpublicised --no-check-extensions 2024-07-02 15:38:50 +01:00
dirkf
4652109643 [core,utils] Implement unsafe file extension mitigation
* from https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4, thx grub4k
2024-07-02 15:38:50 +01:00
dirkf
3c466186a8 [utils] Back-port Namespace and MEDIA_EXTENSIONS from yt-dlp
Thx pukkandan
* Namespace: https://github.com/yt-dlp/yt-dlp/commit/591bb9d355
* MEDIA_EXTENSIONS: https://github.com/yt-dlp/yt-dlp/commit/8dc5930511
2024-07-02 15:38:50 +01:00
Josef Bohórquez
c85000591b Update youtube.py
Some improvements were made to the login function.
2024-06-11 23:23:06 -05:00
Josef Bohórquez
1b599af1db Some improvements were added to Utils.py and YoutubeDL.py 2024-06-11 23:12:53 -05:00
6 changed files with 283 additions and 164 deletions

View File

@ -14,9 +14,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import io
import itertools
import json
import types
import xml.etree.ElementTree
from youtube_dl.utils import (
_UnsafeExtensionError,
age_restricted,
args_to_str,
base_url,
@ -270,6 +272,27 @@ class TestUtil(unittest.TestCase):
expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
'%s/expanded' % compat_getenv('HOME'))
_uncommon_extensions = [
('exe', 'abc.exe.ext'),
('de', 'abc.de.ext'),
('../.mp4', None),
('..\\.mp4', None),
]
def assertUnsafeExtension(self, ext=None):
assert_raises = self.assertRaises(_UnsafeExtensionError)
assert_raises.ext = ext
orig_exit = assert_raises.__exit__
def my_exit(self_, exc_type, exc_val, exc_tb):
did_raise = orig_exit(exc_type, exc_val, exc_tb)
if did_raise and assert_raises.ext is not None:
self.assertEqual(assert_raises.ext, assert_raises.exception.extension, 'Unsafe extension not as unexpected')
return did_raise
assert_raises.__exit__ = types.MethodType(my_exit, assert_raises)
return assert_raises
def test_prepend_extension(self):
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
@ -278,6 +301,19 @@ class TestUtil(unittest.TestCase):
self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
# Test uncommon extensions
self.assertEqual(prepend_extension('abc.ext', 'bin'), 'abc.bin.ext')
for ext, result in self._uncommon_extensions:
with self.assertUnsafeExtension(ext):
prepend_extension('abc', ext)
if result:
self.assertEqual(prepend_extension('abc.ext', ext, 'ext'), result)
else:
with self.assertUnsafeExtension(ext):
prepend_extension('abc.ext', ext, 'ext')
with self.assertUnsafeExtension(ext):
prepend_extension('abc.unexpected_ext', ext, 'ext')
def test_replace_extension(self):
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
@ -286,6 +322,16 @@ class TestUtil(unittest.TestCase):
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
# Test uncommon extensions
self.assertEqual(replace_extension('abc.ext', 'bin'), 'abc.unknown_video')
for ext, _ in self._uncommon_extensions:
with self.assertUnsafeExtension(ext):
replace_extension('abc', ext)
with self.assertUnsafeExtension(ext):
replace_extension('abc.ext', ext, 'ext')
with self.assertUnsafeExtension(ext):
replace_extension('abc.unexpected_ext', ext, 'ext')
def test_subtitles_filename(self):
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')

View File

@ -7,6 +7,7 @@ import collections
import copy
import datetime
import errno
import functools
import io
import itertools
import json
@ -53,6 +54,7 @@ from .compat import (
compat_urllib_request_DataHandler,
)
from .utils import (
_UnsafeExtensionError,
age_restricted,
args_to_str,
bug_reports_message,
@ -129,6 +131,20 @@ if compat_os_name == 'nt':
import ctypes
def _catch_unsafe_file_extension(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except _UnsafeExtensionError as error:
self.report_error(
'{0} found; to avoid damaging your system, this value is disallowed.'
' If you believe this is an error{1}').format(
error.message, bug_reports_message(','))
return wrapper
class YoutubeDL(object):
"""YoutubeDL class.
@ -1925,6 +1941,7 @@ class YoutubeDL(object):
if self.params.get('forcejson', False):
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
@_catch_unsafe_file_extension
def process_info(self, info_dict):
"""Process a single resolved IE result."""
@ -2371,60 +2388,38 @@ class YoutubeDL(object):
return res
def _format_note(self, fdict):
res = ''
if fdict.get('ext') in ['f4f', 'f4m']:
res += '(unsupported) '
note_parts = []
if fdict.get('ext') in ('f4f', 'f4m'):
note_parts.append('(unsupported)')
if fdict.get('language'):
if res:
res += ' '
res += '[%s] ' % fdict['language']
if fdict.get('format_note') is not None:
res += fdict['format_note'] + ' '
note_parts.append(f'[{fdict["language"]}]')
if fdict.get('format_note'):
note_parts.append(fdict['format_note'])
if fdict.get('tbr') is not None:
res += '%4dk ' % fdict['tbr']
note_parts.append('%4dk' % fdict['tbr'])
if fdict.get('container') is not None:
if res:
res += ', '
res += '%s container' % fdict['container']
if (fdict.get('vcodec') is not None
and fdict.get('vcodec') != 'none'):
if res:
res += ', '
res += fdict['vcodec']
if fdict.get('vbr') is not None:
res += '@'
elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
res += 'video@'
note_parts.append('%s container' % fdict['container'])
if fdict.get('vcodec') not in (None, 'none'):
note_parts.append(fdict['vcodec'] + ('@' if fdict.get('vbr') else ''))
elif fdict.get('vbr') is not None:
note_parts.append('video@')
if fdict.get('vbr') is not None:
res += '%4dk' % fdict['vbr']
note_parts.append('%4dk' % fdict['vbr'])
if fdict.get('fps') is not None:
if res:
res += ', '
res += '%sfps' % fdict['fps']
note_parts.append('%sfps' % fdict['fps'])
if fdict.get('acodec') is not None:
if res:
res += ', '
if fdict['acodec'] == 'none':
res += 'video only'
else:
res += '%-5s' % fdict['acodec']
note_parts.append('video only' if fdict['acodec'] == 'none' else '%-5s' % fdict['acodec'])
elif fdict.get('abr') is not None:
if res:
res += ', '
res += 'audio'
note_parts.append('audio')
if fdict.get('abr') is not None:
res += '@%3dk' % fdict['abr']
note_parts.append('@%3dk' % fdict['abr'])
if fdict.get('asr') is not None:
res += ' (%5dHz)' % fdict['asr']
note_parts.append('(%5dHz)' % fdict['asr'])
if fdict.get('filesize') is not None:
if res:
res += ', '
res += format_bytes(fdict['filesize'])
note_parts.append(format_bytes(fdict['filesize']))
elif fdict.get('filesize_approx') is not None:
if res:
res += ', '
res += '~' + format_bytes(fdict['filesize_approx'])
return res
note_parts.append('~' + format_bytes(fdict['filesize_approx']))
return ' '.join(note_parts)
def list_formats(self, info_dict):
formats = info_dict.get('formats', [info_dict])

View File

@ -21,6 +21,7 @@ from .compat import (
workaround_optparse_bug9161,
)
from .utils import (
_UnsafeExtensionError,
DateRange,
decodeOption,
DEFAULT_OUTTMPL,
@ -173,6 +174,9 @@ def _real_main(argv=None):
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
if opts.no_check_extensions:
_UnsafeExtensionError.lenient = True
def parse_retries(retries):
if retries in ('inf', 'infinite'):
parsed_retries = float('inf')

View File

@ -87,6 +87,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
"""
username, password = self._get_login_info()
# No authentication to be performed
if username is None:
if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
@ -129,21 +130,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self._downloader.report_warning(message)
lookup_req = [
username,
None, [], None, 'US', None, None, 2, False, True,
[
None, None,
[2, 1, None, 1,
'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
None, [], 4],
1, [None, None, []], None, None, None, True
],
username, None, [], None, 'US', None, None, 2, False, True,
[None, None, [2, 1, None, 1,
'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
None, [], 4], 1, [None, None, []], None, None, None, True],
username,
]
lookup_results = req(
self._LOOKUP_URL, lookup_req,
'Looking up account info', 'Unable to look up account info')
# --- Cambio 1: Extracción de función para mejorar la legibilidad ---
def perform_lookup(req):
return self._download_json(
self._LOOKUP_URL, req,
'Looking up account info', 'Unable to look up account info')
lookup_results = perform_lookup(lookup_req)
if lookup_results is False:
return False
@ -154,12 +154,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return False
challenge_req = [
user_hash,
None, 1, None, [1, None, None, None, [password, None, True]],
[
None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
1, [None, None, []], None, None, None, True
]]
user_hash, None, 1, None, [1, None, None, None, [password, None, True]],
[None, None, [2, 1, None, 1,
'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
None, [], 4], 1, [None, None, []], None, None, None, True]]
challenge_results = req(
self._CHALLENGE_URL, challenge_req,

View File

@ -533,6 +533,10 @@ def parseOpts(overrideArguments=None):
'--no-check-certificate',
action='store_true', dest='no_check_certificate', default=False,
help='Suppress HTTPS certificate validation')
workarounds.add_option(
'--no-check-extensions',
action='store_true', dest='no_check_extensions', default=False,
help='Suppress file extension validation')
workarounds.add_option(
'--prefer-insecure',
'--prefer-unsecure', action='store_true', dest='prefer_insecure',

View File

@ -1717,21 +1717,6 @@ TIMEZONE_NAMES = {
'PST': -8, 'PDT': -7 # Pacific
}
KNOWN_EXTENSIONS = (
'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
'flv', 'f4v', 'f4a', 'f4b',
'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
'mkv', 'mka', 'mk3d',
'avi', 'divx',
'mov',
'asf', 'wmv', 'wma',
'3gp', '3g2',
'mp3',
'flac',
'ape',
'wav',
'f4f', 'f4m', 'm3u8', 'smil')
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
@ -3959,19 +3944,22 @@ def parse_duration(s):
return duration
def prepend_extension(filename, ext, expected_real_ext=None):
def _change_extension(prepend, filename, ext, expected_real_ext=None):
name, real_ext = os.path.splitext(filename)
return (
'{0}.{1}{2}'.format(name, ext, real_ext)
if not expected_real_ext or real_ext[1:] == expected_real_ext
else '{0}.{1}'.format(filename, ext))
sanitize_extension = _UnsafeExtensionError.sanitize_extension
if not expected_real_ext or real_ext.partition('.')[0::2] == ('', expected_real_ext):
filename = name
if prepend and real_ext:
sanitize_extension(ext, prepend=prepend)
return ''.join((filename, '.', ext, real_ext))
# Mitigate path traversal and file impersonation attacks
return '.'.join((filename, sanitize_extension(ext)))
def replace_extension(filename, ext, expected_real_ext=None):
name, real_ext = os.path.splitext(filename)
return '{0}.{1}'.format(
name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
ext)
prepend_extension = functools.partial(_change_extension, True)
replace_extension = functools.partial(_change_extension, False)
def check_executable(exe, args=[]):
@ -6002,114 +5990,63 @@ def parse_m3u8_attributes(attrib):
def urshift(val, n):
return val >> n if val >= 0 else (val + 0x100000000) >> n
# Based on png2str() written by @gdkchan and improved by @yokrysty
# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
def decode_png(png_data):
# Reference: https://www.w3.org/TR/PNG/
header = png_data[8:]
if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a':
raise IOError('Not a valid PNG file.')
int_map = {1: '>B', 2: '>H', 4: '>I'}
unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
def unpack_integer(data):
return compat_struct_unpack(f'>{int_map[len(data)]}', data)[0]
int_map = {1: 'B', 2: 'H', 4: 'I'}
header = png_data[8:]
chunks = []
while header:
length = unpack_integer(header[:4])
header = header[4:]
chunk_type, chunk_data, header = header[4:8], header[8:8 + length], header[8 + length + 4:]
chunks.append({'type': chunk_type, 'data': chunk_data})
chunk_type = header[:4]
header = header[4:]
if not (ihdr := next((c["data"] for c in chunks if c["type"] == b'IHDR'), None)):
raise IOError("Unable to read PNG header.")
chunk_data = header[:length]
header = header[length:]
header = header[4:] # Skip CRC
chunks.append({
'type': chunk_type,
'length': length,
'data': chunk_data
})
ihdr = chunks[0]['data']
width = unpack_integer(ihdr[:4])
height = unpack_integer(ihdr[4:8])
idat = b''
for chunk in chunks:
if chunk['type'] == b'IDAT':
idat += chunk['data']
width, height = unpack_integer(ihdr[:4]), unpack_integer(ihdr[4:8])
idat = b''.join(c['data'] for c in chunks if c['type'] == b'IDAT')
if not idat:
raise IOError('Unable to read PNG data.')
decompressed_data = bytearray(zlib.decompress(idat))
stride = width * 3
pixels = []
pixels = [[] for _ in range(height)]
def _get_pixel(idx):
x = idx % stride
y = idx // stride
return pixels[y][x]
def _get_pixel(x, y):
return pixels[y][x] if x >= 0 and y >= 0 else 0
for y in range(height):
basePos = y * (1 + stride)
filter_type = decompressed_data[basePos]
current_row = []
pixels.append(current_row)
filter_type = decompressed_data[y * (1 + stride)]
for x in range(stride):
color = decompressed_data[1 + basePos + x]
basex = y * stride + x
left = 0
up = 0
color = decompressed_data[1 + y * (1 + stride) + x]
left, up = _get_pixel(x - 3, y), _get_pixel(x, y - 1)
if x > 2:
left = _get_pixel(basex - 3)
if y > 0:
up = _get_pixel(basex - stride)
if filter_type == 1: # Sub
if filter_type == 1: # Sub
color = (color + left) & 0xff
elif filter_type == 2: # Up
elif filter_type == 2: # Up
color = (color + up) & 0xff
elif filter_type == 3: # Average
elif filter_type == 3: # Average
color = (color + ((left + up) >> 1)) & 0xff
elif filter_type == 4: # Paeth
a = left
b = up
c = 0
if x > 2 and y > 0:
c = _get_pixel(basex - stride - 3)
elif filter_type == 4: # Paeth
a, b, c = left, up, _get_pixel(x - 3, y - 1)
p = a + b - c
pa, pb, pc = abs(p - a), abs(p - b), abs(p - c)
color = (color + (a if pa <= pb and pa <= pc else b if pb <= pc else c)) & 0xff
pa = abs(p - a)
pb = abs(p - b)
pc = abs(p - c)
if pa <= pb and pa <= pc:
color = (color + a) & 0xff
elif pb <= pc:
color = (color + b) & 0xff
else:
color = (color + c) & 0xff
current_row.append(color)
pixels[y].append(color)
return width, height, pixels
def write_xattr(path, key, value):
# This mess below finds the best xattr tool for the job
try:
@ -6561,3 +6498,138 @@ def join_nonempty(*values, **kwargs):
if from_dict is not None:
values = (traverse_obj(from_dict, variadic(v)) for v in values)
return delim.join(map(compat_str, filter(None, values)))
class Namespace(object):
"""Immutable namespace"""
def __init__(self, **kw_attr):
self.__dict__.update(kw_attr)
def __iter__(self):
return iter(self.__dict__.values())
@property
def items_(self):
return self.__dict__.items()
MEDIA_EXTENSIONS = Namespace(
common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
thumbnails=('jpg', 'png', 'webp'),
# storyboards=('mhtml', ),
subtitles=('srt', 'vtt', 'ass', 'lrc', 'ttml'),
manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
)
MEDIA_EXTENSIONS.video = MEDIA_EXTENSIONS.common_video + MEDIA_EXTENSIONS.video
MEDIA_EXTENSIONS.audio = MEDIA_EXTENSIONS.common_audio + MEDIA_EXTENSIONS.audio
KNOWN_EXTENSIONS = (
MEDIA_EXTENSIONS.video + MEDIA_EXTENSIONS.audio
+ MEDIA_EXTENSIONS.manifests
)
class _UnsafeExtensionError(Exception):
"""
Mitigation exception for unwanted file overwrite/path traversal
Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
"""
_ALLOWED_EXTENSIONS = frozenset(itertools.chain(
( # internal
'description',
'json',
'meta',
'orig',
'part',
'temp',
'uncut',
'unknown_video',
'ytdl',
),
# video
MEDIA_EXTENSIONS.video, (
'avif',
'ismv',
'm2ts',
'm4s',
'mng',
'mpeg',
'qt',
'swf',
'ts',
'vp9',
'wvm',
),
# audio
MEDIA_EXTENSIONS.audio, (
'isma',
'mid',
'mpga',
'ra',
),
# image
MEDIA_EXTENSIONS.thumbnails, (
'bmp',
'gif',
'ico',
'heic',
'jng',
'jpeg',
'jxl',
'svg',
'tif',
'wbmp',
),
# subtitle
MEDIA_EXTENSIONS.subtitles, (
'dfxp',
'fs',
'ismt',
'sami',
'scc',
'ssa',
'tt',
),
# others
MEDIA_EXTENSIONS.manifests,
(
# not used in yt-dl
# *MEDIA_EXTENSIONS.storyboards,
# 'desktop',
# 'ism',
# 'm3u',
# 'sbv',
# 'swp',
# 'url',
# 'webloc',
# 'xml',
)))
def __init__(self, extension):
super(_UnsafeExtensionError, self).__init__('unsafe file extension: {0!r}'.format(extension))
self.extension = extension
# support --no-check-extensions
lenient = False
@classmethod
def sanitize_extension(cls, extension, **kwargs):
# ... /, *, prepend=False
prepend = kwargs.get('prepend', False)
if '/' in extension or '\\' in extension:
raise cls(extension)
if not prepend:
last = extension.rpartition('.')[-1]
if last == 'bin':
extension = last = 'unknown_video'
if not (cls.lenient or last.lower() in cls._ALLOWED_EXTENSIONS):
raise cls(extension)
return extension