2013-06-18 15:14:21 -05:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
2014-01-04 18:52:03 -06:00
from __future__ import absolute_import , unicode_literals
2013-06-18 15:14:21 -05:00
2013-12-09 15:00:42 -06:00
import collections
2014-03-13 09:30:25 -05:00
import datetime
2013-10-05 21:27:09 -05:00
import errno
2013-06-18 15:14:21 -05:00
import io
2014-12-06 07:02:19 -06:00
import itertools
2013-11-19 23:18:24 -06:00
import json
2014-03-29 23:02:41 -05:00
import locale
2015-01-22 17:04:05 -06:00
import operator
2013-06-18 15:14:21 -05:00
import os
2013-11-22 12:57:52 -06:00
import platform
2013-06-18 15:14:21 -05:00
import re
import shutil
2013-11-22 12:57:52 -06:00
import subprocess
2013-06-18 15:14:21 -05:00
import socket
import sys
import time
import traceback
2013-11-17 04:39:52 -06:00
if os . name == ' nt ' :
import ctypes
2014-11-02 04:23:40 -06:00
from . compat import (
2015-02-01 04:30:56 -06:00
compat_basestring ,
2013-11-22 12:57:52 -06:00
compat_cookiejar ,
2014-09-30 10:27:53 -05:00
compat_expanduser ,
2013-11-17 09:47:52 -06:00
compat_http_client ,
2014-12-14 18:06:25 -06:00
compat_kwargs ,
2013-11-17 09:47:52 -06:00
compat_str ,
compat_urllib_error ,
compat_urllib_request ,
2014-11-02 04:23:40 -06:00
)
from . utils import (
2014-09-13 08:59:16 -05:00
escape_url ,
2013-11-17 09:47:52 -06:00
ContentTooShortError ,
date_from_str ,
DateRange ,
2014-04-30 03:02:03 -05:00
DEFAULT_OUTTMPL ,
2013-11-17 09:47:52 -06:00
determine_ext ,
DownloadError ,
encodeFilename ,
ExtractorError ,
2013-11-24 20:12:26 -06:00
format_bytes ,
2013-12-15 21:15:10 -06:00
formatSeconds ,
2013-12-09 11:29:07 -06:00
get_term_width ,
2013-11-17 09:47:52 -06:00
locked_file ,
2013-11-22 12:57:52 -06:00
make_HTTPS_handler ,
2013-11-17 09:47:52 -06:00
MaxDownloadsReached ,
2014-01-20 04:36:47 -06:00
PagedList ,
2015-01-22 17:04:05 -06:00
parse_filesize ,
2013-11-17 09:47:52 -06:00
PostProcessingError ,
2013-11-22 12:57:52 -06:00
platform_name ,
2013-11-17 09:47:52 -06:00
preferredencoding ,
2015-01-24 19:38:47 -06:00
render_table ,
2013-11-17 09:47:52 -06:00
SameFileError ,
sanitize_filename ,
2015-01-24 11:52:26 -06:00
std_headers ,
2013-11-17 09:47:52 -06:00
subtitles_filename ,
takewhile_inclusive ,
UnavailableVideoError ,
2013-12-16 21:13:36 -06:00
url_basename ,
2015-01-10 14:02:27 -06:00
version_tuple ,
2013-11-17 09:47:52 -06:00
write_json_file ,
write_string ,
2013-11-22 12:57:52 -06:00
YoutubeDLHandler ,
2014-01-04 06:13:51 -06:00
prepend_extension ,
2014-11-23 03:49:19 -06:00
args_to_str ,
2015-01-07 00:20:20 -06:00
age_restricted ,
2013-11-17 09:47:52 -06:00
)
2014-09-03 05:41:05 -05:00
from . cache import Cache
2013-06-27 16:51:06 -05:00
from . extractor import get_info_extractor , gen_extractors
2013-09-23 10:59:27 -05:00
from . downloader import get_suitable_downloader
2014-11-02 03:55:36 -06:00
from . downloader . rtmp import rtmpdump_version
2014-12-14 18:06:25 -06:00
from . postprocessor import (
2015-01-23 11:39:12 -06:00
FFmpegFixupM4aPP ,
2015-01-09 22:45:51 -06:00
FFmpegFixupStretchedPP ,
2014-12-14 18:06:25 -06:00
FFmpegMergerPP ,
FFmpegPostProcessor ,
get_postprocessor ,
)
2013-11-22 12:57:52 -06:00
from . version import __version__
2013-06-18 15:14:21 -05:00
class YoutubeDL ( object ) :
""" YoutubeDL class.
YoutubeDL objects are the ones responsible of downloading the
actual video file and writing it to disk if the user has requested
it , among some other tasks . In most cases there should be one per
program . As , given a video URL , the downloader doesn ' t know how to
extract all the needed information , task that InfoExtractors do , it
has to pass the URL to one of them .
For this , YoutubeDL objects have a method that allows
InfoExtractors to be registered in a given order . When it is passed
a URL , the YoutubeDL object handles it to the first InfoExtractor it
finds that reports being able to handle it . The InfoExtractor extracts
all the information about the video or videos the URL refers to , and
YoutubeDL process the extracted information , possibly using a File
Downloader to download the video .
YoutubeDL objects accept a lot of parameters . In order not to saturate
the object constructor with arguments , it receives a dictionary of
options instead . These options are available through the params
attribute for the InfoExtractors to use . The YoutubeDL also
registers itself as the downloader in charge for the InfoExtractors
that are added to it , so this is a " mutual registration " .
Available options :
username : Username for authentication purposes .
password : Password for authentication purposes .
2013-06-25 15:22:32 -05:00
videopassword : Password for acces a video .
2013-06-18 15:14:21 -05:00
usenetrc : Use netrc for authentication instead .
verbose : Print additional info to stdout .
quiet : Do not print messages to stdout .
2014-03-25 18:43:46 -05:00
no_warnings : Do not print out anything for warnings .
2013-06-18 15:14:21 -05:00
forceurl : Force printing final URL .
forcetitle : Force printing title .
forceid : Force printing ID .
forcethumbnail : Force printing thumbnail URL .
forcedescription : Force printing description .
forcefilename : Force printing final filename .
2013-12-15 21:15:10 -06:00
forceduration : Force printing duration .
2013-11-19 23:18:24 -06:00
forcejson : Force printing info_dict as JSON .
2014-10-24 17:30:57 -05:00
dump_single_json : Force printing the info_dict of the whole playlist
( or video ) as a single JSON line .
2013-06-18 15:14:21 -05:00
simulate : Do not download the video files .
2014-12-15 17:22:23 -06:00
format : Video format code . See options . py for more information .
2013-06-18 15:14:21 -05:00
format_limit : Highest quality format to try .
outtmpl : Template for output names .
restrictfilenames : Do not allow " & " and spaces in file names
ignoreerrors : Do not stop on download errors .
nooverwrites : Prevent overwriting files .
playliststart : Playlist item to start at .
playlistend : Playlist item to end at .
2015-01-24 21:24:55 -06:00
playlist_items : Specific indices of playlist to download .
2014-07-10 22:11:11 -05:00
playlistreverse : Download playlist items in reverse order .
2013-06-18 15:14:21 -05:00
matchtitle : Download only matching titles .
rejecttitle : Reject downloads for matching titles .
2013-11-23 23:08:11 -06:00
logger : Log messages to a logging . Logger instance .
2013-06-18 15:14:21 -05:00
logtostderr : Log messages to stderr instead of stdout .
writedescription : Write the video description to a . description file
writeinfojson : Write the video description to a . info . json file
2013-10-14 00:18:58 -05:00
writeannotations : Write the video annotations to a . annotations . xml file
2013-06-18 15:14:21 -05:00
writethumbnail : Write the thumbnail image to a file
2015-01-24 20:11:12 -06:00
write_all_thumbnails : Write all thumbnail formats to files
2013-06-18 15:14:21 -05:00
writesubtitles : Write the video subtitles to a file
2013-06-25 16:45:16 -05:00
writeautomaticsub : Write the automatic subtitles to a file
2013-06-18 15:14:21 -05:00
allsubtitles : Downloads all the subtitles of the video
2013-09-14 04:14:40 -05:00
( requires writesubtitles or writeautomaticsub )
2013-06-18 15:14:21 -05:00
listsubtitles : Lists all available subtitles for the video
2013-06-26 04:59:29 -05:00
subtitlesformat : Subtitle format [ srt / sbv / vtt ] ( default = srt )
2013-08-23 11:34:57 -05:00
subtitleslangs : List of languages of the subtitles to download
2013-06-18 15:14:21 -05:00
keepvideo : Keep the video file after post - processing
daterange : A DateRange object , download only if the upload_date is in the range .
skip_download : Skip the actual download of the video file
2013-09-22 04:09:25 -05:00
cachedir : Location of the cache files in the filesystem .
2014-09-03 05:41:05 -05:00
False to disable filesystem cache .
2013-09-30 15:26:25 -05:00
noplaylist : Download single video instead of a playlist if in doubt .
2013-10-05 23:06:30 -05:00
age_limit : An integer representing the user ' s age in years.
Unsuitable videos for the given age are skipped .
2013-12-15 20:09:49 -06:00
min_views : An integer representing the minimum view count the video
must have in order to not be skipped .
Videos without view count information are always
downloaded . None for no limit .
max_views : An integer representing the maximum view count .
Videos that are more popular than that are not
downloaded .
Videos without view count information are always
downloaded . None for no limit .
download_archive : File name of a file where all downloads are recorded .
2013-10-05 21:27:09 -05:00
Videos already present in the file are not downloaded
again .
2013-11-22 12:57:52 -06:00
cookiefile : File name where cookies should be read from and dumped to .
2013-11-24 08:03:25 -06:00
nocheckcertificate : Do not verify SSL certificates
2014-03-20 18:33:53 -05:00
prefer_insecure : Use HTTP instead of HTTPS to retrieve information .
At the moment , this is only supported by YouTube .
2013-11-24 08:03:25 -06:00
proxy : URL of the proxy server to use
2013-12-01 04:42:02 -06:00
socket_timeout : Time to wait for unresponsive hosts , in seconds
2013-12-08 21:08:51 -06:00
bidi_workaround : Work around buggy terminals without bidirectional text
support , using fridibi
2013-12-29 08:28:32 -06:00
debug_printtraffic : Print out sent and received HTTP traffic
2014-01-20 19:09:49 -06:00
include_ads : Download ads as well
2014-01-22 07:16:43 -06:00
default_search : Prepend this string if an input url is not valid .
' auto ' for elaborate guessing
2014-03-29 23:02:41 -05:00
encoding : Use this encoding instead of the system - specified .
2014-08-21 04:52:07 -05:00
extract_flat : Do not resolve URLs , return the immediate result .
2014-10-24 07:48:12 -05:00
Pass in ' in_playlist ' to only show this behavior for
playlist items .
2014-12-14 18:06:25 -06:00
postprocessors : A list of dictionaries , each with an entry
2014-12-14 18:26:18 -06:00
* key : The name of the postprocessor . See
youtube_dl / postprocessor / __init__ . py for a list .
2014-12-14 18:06:25 -06:00
as well as any further keyword arguments for the
postprocessor .
2014-12-14 18:26:18 -06:00
progress_hooks : A list of functions that get called on download
progress , with a dictionary with the entries
2015-01-24 23:15:51 -06:00
* status : One of " downloading " and " finished " .
Check this first and ignore unknown values .
2014-12-14 18:26:18 -06:00
2015-01-24 23:15:51 -06:00
If status is one of " downloading " or " finished " , the
following properties may also be present :
* filename : The final filename ( always present )
2014-12-14 18:26:18 -06:00
* downloaded_bytes : Bytes on disk
* total_bytes : Size of the whole file , None if unknown
* tmpfilename : The filename we ' re currently writing to
* eta : The estimated time in seconds , None if unknown
* speed : The download speed in bytes / second , None if
unknown
Progress hooks are guaranteed to be called at least once
( with status " finished " ) if the download is successful .
2015-01-09 18:59:14 -06:00
merge_output_format : Extension to use when merging formats .
2015-01-09 22:45:51 -06:00
fixup : Automatically correct known faults of the file .
One of :
- " never " : do nothing
- " warn " : only emit a warning
- " detect_or_warn " : check whether we can do anything
2015-01-23 11:39:12 -06:00
about it , warn otherwise ( default )
2015-01-10 12:55:36 -06:00
source_address : ( Experimental ) Client - side IP address to bind to .
2015-01-10 14:09:15 -06:00
call_home : Boolean , true iff we are allowed to contact the
youtube - dl servers for debugging .
2015-01-23 05:05:01 -06:00
sleep_interval : Number of seconds to sleep before each download .
2015-01-23 18:38:48 -06:00
external_downloader : Executable of the external downloader to call .
2015-01-24 19:38:47 -06:00
listformats : Print an overview of available video formats and exit .
list_thumbnails : Print a table of all thumbnails and exit .
2014-12-14 18:26:18 -06:00
2013-10-22 07:49:34 -05:00
2013-06-18 15:14:21 -05:00
The following parameters are not used by YoutubeDL itself , they are used by
the FileDownloader :
nopart , updatetime , buffersize , ratelimit , min_filesize , max_filesize , test ,
2015-01-24 21:49:44 -06:00
noresizebuffer , retries , continuedl , noprogress , consoletitle ,
xattr_set_filesize .
2014-01-08 10:53:34 -06:00
The following options are used by the post processors :
prefer_ffmpeg : If True , use ffmpeg instead of avconv if both are available ,
otherwise prefer avconv .
2014-08-25 03:18:01 -05:00
exec_cmd : Arbitrary command to run after downloading
2013-06-18 15:14:21 -05:00
"""
params = None
_ies = [ ]
_pps = [ ]
_download_retcode = None
_num_downloads = None
_screen_file = None
2014-10-28 06:54:29 -05:00
def __init__ ( self , params = None , auto_init = True ) :
2013-06-18 15:14:21 -05:00
""" Create a FileDownloader object with the given options. """
2013-12-31 06:34:52 -06:00
if params is None :
params = { }
2013-06-18 15:14:21 -05:00
self . _ies = [ ]
2013-07-08 08:14:27 -05:00
self . _ies_instances = { }
2013-06-18 15:14:21 -05:00
self . _pps = [ ]
2013-12-23 03:37:27 -06:00
self . _progress_hooks = [ ]
2013-06-18 15:14:21 -05:00
self . _download_retcode = 0
self . _num_downloads = 0
self . _screen_file = [ sys . stdout , sys . stderr ] [ params . get ( ' logtostderr ' , False ) ]
2013-12-08 21:08:51 -06:00
self . _err_file = sys . stderr
2013-12-31 06:34:52 -06:00
self . params = params
2014-09-03 05:41:05 -05:00
self . cache = Cache ( self )
2013-09-21 04:48:07 -05:00
2013-12-08 21:08:51 -06:00
if params . get ( ' bidi_workaround ' , False ) :
2013-12-09 11:29:07 -06:00
try :
import pty
master , slave = pty . openpty ( )
width = get_term_width ( )
if width is None :
width_args = [ ]
else :
width_args = [ ' -w ' , str ( width ) ]
2013-12-22 21:19:20 -06:00
sp_kwargs = dict (
2013-12-09 11:29:07 -06:00
stdin = subprocess . PIPE ,
stdout = slave ,
stderr = self . _err_file )
2013-12-22 21:19:20 -06:00
try :
self . _output_process = subprocess . Popen (
[ ' bidiv ' ] + width_args , * * sp_kwargs
)
except OSError :
self . _output_process = subprocess . Popen (
[ ' fribidi ' , ' -c ' , ' UTF-8 ' ] + width_args , * * sp_kwargs )
self . _output_channel = os . fdopen ( master , ' rb ' )
2013-12-09 11:29:07 -06:00
except OSError as ose :
if ose . errno == 2 :
2014-01-04 18:52:03 -06:00
self . report_warning ( ' Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH. ' )
2013-12-09 11:29:07 -06:00
else :
raise
2013-12-08 21:08:51 -06:00
2013-09-21 04:48:07 -05:00
if ( sys . version_info > = ( 3 , ) and sys . platform != ' win32 ' and
sys . getfilesystemencoding ( ) in [ ' ascii ' , ' ANSI_X3.4-1968 ' ]
2014-10-10 03:33:41 -05:00
and not params . get ( ' restrictfilenames ' , False ) ) :
2013-09-21 04:48:07 -05:00
# On Python 3, the Unicode filesystem API will throw errors (#1474)
self . report_warning (
2014-01-04 18:52:03 -06:00
' Assuming --restrict-filenames since file system encoding '
2014-10-09 10:00:24 -05:00
' cannot encode all characters. '
2014-01-04 18:52:03 -06:00
' Set the LC_ALL environment variable to fix this. ' )
2013-11-26 11:53:36 -06:00
self . params [ ' restrictfilenames ' ] = True
2013-09-21 04:48:07 -05:00
2013-11-25 14:55:20 -06:00
if ' %(stitle)s ' in self . params . get ( ' outtmpl ' , ' ' ) :
2014-01-04 18:52:03 -06:00
self . report_warning ( ' %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead. ' )
2013-06-18 15:14:21 -05:00
2013-11-22 12:57:52 -06:00
self . _setup_opener ( )
2014-10-28 06:54:29 -05:00
if auto_init :
self . print_debug_header ( )
self . add_default_info_extractors ( )
2014-12-14 18:06:25 -06:00
for pp_def_raw in self . params . get ( ' postprocessors ' , [ ] ) :
pp_class = get_postprocessor ( pp_def_raw [ ' key ' ] )
pp_def = dict ( pp_def_raw )
del pp_def [ ' key ' ]
pp = pp_class ( self , * * compat_kwargs ( pp_def ) )
self . add_post_processor ( pp )
2014-12-14 18:26:18 -06:00
for ph in self . params . get ( ' progress_hooks ' , [ ] ) :
self . add_progress_hook ( ph )
2014-11-23 03:49:19 -06:00
def warn_if_short_id ( self , argv ) :
# short YouTube ID starting with dash?
idxs = [
i for i , a in enumerate ( argv )
if re . match ( r ' ^-[0-9A-Za-z_-] {10} $ ' , a ) ]
if idxs :
correct_argv = (
[ ' youtube-dl ' ] +
[ a for i , a in enumerate ( argv ) if i not in idxs ] +
[ ' -- ' ] + [ argv [ i ] for i in idxs ]
)
self . report_warning (
' Long argument string detected. '
' Use -- to separate parameters and URLs, like this: \n %s \n ' %
args_to_str ( correct_argv ) )
2013-06-18 15:14:21 -05:00
def add_info_extractor ( self , ie ) :
""" Add an InfoExtractor object to the end of the list. """
self . _ies . append ( ie )
2013-07-08 08:14:27 -05:00
self . _ies_instances [ ie . ie_key ( ) ] = ie
2013-06-18 15:14:21 -05:00
ie . set_downloader ( self )
2013-07-08 08:14:27 -05:00
def get_info_extractor ( self , ie_key ) :
"""
Get an instance of an IE with name ie_key , it will try to get one from
the _ies list , if there ' s no instance it will create a new one and add
it to the extractor list .
"""
ie = self . _ies_instances . get ( ie_key )
if ie is None :
ie = get_info_extractor ( ie_key ) ( )
self . add_info_extractor ( ie )
return ie
2013-06-27 16:51:06 -05:00
def add_default_info_extractors ( self ) :
"""
Add the InfoExtractors returned by gen_extractors to the end of the list
"""
for ie in gen_extractors ( ) :
self . add_info_extractor ( ie )
2013-06-18 15:14:21 -05:00
def add_post_processor ( self , pp ) :
""" Add a PostProcessor object to the end of the chain. """
self . _pps . append ( pp )
pp . set_downloader ( self )
2013-12-23 03:37:27 -06:00
def add_progress_hook ( self , ph ) :
""" Add the progress hook (currently only for the file downloader) """
self . _progress_hooks . append ( ph )
2013-09-23 11:09:28 -05:00
2013-12-09 11:29:07 -06:00
def _bidi_workaround ( self , message ) :
2013-12-22 21:19:20 -06:00
if not hasattr ( self , ' _output_channel ' ) :
2013-12-09 11:29:07 -06:00
return message
2013-12-22 21:19:20 -06:00
assert hasattr ( self , ' _output_process ' )
2014-07-25 16:37:32 -05:00
assert isinstance ( message , compat_str )
2014-01-04 18:52:03 -06:00
line_count = message . count ( ' \n ' ) + 1
self . _output_process . stdin . write ( ( message + ' \n ' ) . encode ( ' utf-8 ' ) )
2013-12-22 21:19:20 -06:00
self . _output_process . stdin . flush ( )
2014-01-04 18:52:03 -06:00
res = ' ' . join ( self . _output_channel . readline ( ) . decode ( ' utf-8 ' )
2014-11-23 14:39:15 -06:00
for _ in range ( line_count ) )
2014-01-04 18:52:03 -06:00
return res [ : - len ( ' \n ' ) ]
2013-12-09 11:29:07 -06:00
2013-06-18 15:14:21 -05:00
def to_screen ( self , message , skip_eol = False ) :
2013-12-08 21:08:51 -06:00
""" Print message to stdout if not in quiet mode. """
return self . to_stdout ( message , skip_eol , check_quiet = True )
2014-04-07 12:57:42 -05:00
def _write_string ( self , s , out = None ) :
2014-04-07 15:48:13 -05:00
write_string ( s , out = out , encoding = self . params . get ( ' encoding ' ) )
2014-04-07 12:57:42 -05:00
2013-12-08 21:08:51 -06:00
def to_stdout ( self , message , skip_eol = False , check_quiet = False ) :
2013-06-18 15:14:21 -05:00
""" Print message to stdout if not in quiet mode. """
2013-11-23 23:08:11 -06:00
if self . params . get ( ' logger ' ) :
2013-11-23 02:22:18 -06:00
self . params [ ' logger ' ] . debug ( message )
2013-12-08 21:08:51 -06:00
elif not check_quiet or not self . params . get ( ' quiet ' , False ) :
2013-12-09 11:29:07 -06:00
message = self . _bidi_workaround ( message )
2014-01-04 18:52:03 -06:00
terminator = [ ' \n ' , ' ' ] [ skip_eol ]
2013-06-18 15:14:21 -05:00
output = message + terminator
2013-12-09 11:29:07 -06:00
2014-04-07 12:57:42 -05:00
self . _write_string ( output , self . _screen_file )
2013-06-18 15:14:21 -05:00
def to_stderr ( self , message ) :
""" Print message to stderr. """
2014-07-25 16:37:32 -05:00
assert isinstance ( message , compat_str )
2013-11-23 23:08:11 -06:00
if self . params . get ( ' logger ' ) :
2013-11-23 02:22:18 -06:00
self . params [ ' logger ' ] . error ( message )
else :
2013-12-09 11:29:07 -06:00
message = self . _bidi_workaround ( message )
2014-01-04 18:52:03 -06:00
output = message + ' \n '
2014-04-07 12:57:42 -05:00
self . _write_string ( output , self . _err_file )
2013-06-18 15:14:21 -05:00
2013-11-17 04:39:52 -06:00
def to_console_title ( self , message ) :
if not self . params . get ( ' consoletitle ' , False ) :
return
if os . name == ' nt ' and ctypes . windll . kernel32 . GetConsoleWindow ( ) :
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes . windll . kernel32 . SetConsoleTitleW ( ctypes . c_wchar_p ( message ) )
elif ' TERM ' in os . environ :
2014-04-07 12:57:42 -05:00
self . _write_string ( ' \033 ]0; %s \007 ' % message , self . _screen_file )
2013-11-17 04:39:52 -06:00
2013-11-17 14:05:14 -06:00
def save_console_title ( self ) :
if not self . params . get ( ' consoletitle ' , False ) :
return
if ' TERM ' in os . environ :
2013-11-18 09:35:41 -06:00
# Save the title on stack
2014-04-07 12:57:42 -05:00
self . _write_string ( ' \033 [22;0t ' , self . _screen_file )
2013-11-17 14:05:14 -06:00
def restore_console_title ( self ) :
if not self . params . get ( ' consoletitle ' , False ) :
return
if ' TERM ' in os . environ :
2013-11-18 09:35:41 -06:00
# Restore the title from stack
2014-04-07 12:57:42 -05:00
self . _write_string ( ' \033 [23;0t ' , self . _screen_file )
2013-11-17 14:05:14 -06:00
def __enter__ ( self ) :
self . save_console_title ( )
return self
def __exit__ ( self , * args ) :
self . restore_console_title ( )
2014-01-25 05:02:43 -06:00
2013-11-22 12:57:52 -06:00
if self . params . get ( ' cookiefile ' ) is not None :
self . cookiejar . save ( )
2013-11-17 14:05:14 -06:00
2013-06-18 15:14:21 -05:00
def trouble ( self , message = None , tb = None ) :
""" Determine action to take when a download problem appears.
Depending on if the downloader has been configured to ignore
download errors or not , this method may throw an exception or
not when errors are found , after printing the message .
tb , if given , is additional traceback information .
"""
if message is not None :
self . to_stderr ( message )
if self . params . get ( ' verbose ' ) :
if tb is None :
if sys . exc_info ( ) [ 0 ] : # if .trouble has been called from an except block
2014-01-04 18:52:03 -06:00
tb = ' '
2013-06-18 15:14:21 -05:00
if hasattr ( sys . exc_info ( ) [ 1 ] , ' exc_info ' ) and sys . exc_info ( ) [ 1 ] . exc_info [ 0 ] :
2014-01-04 18:52:03 -06:00
tb + = ' ' . join ( traceback . format_exception ( * sys . exc_info ( ) [ 1 ] . exc_info ) )
2013-06-18 15:14:21 -05:00
tb + = compat_str ( traceback . format_exc ( ) )
else :
tb_data = traceback . format_list ( traceback . extract_stack ( ) )
2014-01-04 18:52:03 -06:00
tb = ' ' . join ( tb_data )
2013-06-18 15:14:21 -05:00
self . to_stderr ( tb )
if not self . params . get ( ' ignoreerrors ' , False ) :
if sys . exc_info ( ) [ 0 ] and hasattr ( sys . exc_info ( ) [ 1 ] , ' exc_info ' ) and sys . exc_info ( ) [ 1 ] . exc_info [ 0 ] :
exc_info = sys . exc_info ( ) [ 1 ] . exc_info
else :
exc_info = sys . exc_info ( )
raise DownloadError ( message , exc_info )
self . _download_retcode = 1
def report_warning ( self , message ) :
'''
Print the message to stderr , it will be prefixed with ' WARNING: '
If stderr is a tty file the ' WARNING: ' will be colored
'''
2014-03-09 08:53:07 -05:00
if self . params . get ( ' logger ' ) is not None :
self . params [ ' logger ' ] . warning ( message )
2013-06-18 15:14:21 -05:00
else :
2014-03-25 18:43:46 -05:00
if self . params . get ( ' no_warnings ' ) :
return
2014-03-09 08:53:07 -05:00
if self . _err_file . isatty ( ) and os . name != ' nt ' :
_msg_header = ' \033 [0;33mWARNING: \033 [0m '
else :
_msg_header = ' WARNING: '
warning_message = ' %s %s ' % ( _msg_header , message )
self . to_stderr ( warning_message )
2013-06-18 15:14:21 -05:00
def report_error ( self , message , tb = None ) :
'''
Do the same as trouble , but prefixes the message with ' ERROR: ' , colored
in red if stderr is a tty file .
'''
2013-12-08 21:08:51 -06:00
if self . _err_file . isatty ( ) and os . name != ' nt ' :
2014-01-04 18:52:03 -06:00
_msg_header = ' \033 [0;31mERROR: \033 [0m '
2013-06-18 15:14:21 -05:00
else :
2014-01-04 18:52:03 -06:00
_msg_header = ' ERROR: '
error_message = ' %s %s ' % ( _msg_header , message )
2013-06-18 15:14:21 -05:00
self . trouble ( error_message , tb )
def report_file_already_downloaded ( self , file_name ) :
""" Report file has already been fully downloaded. """
try :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [download] %s has already been downloaded ' % file_name )
2013-11-17 09:47:52 -06:00
except UnicodeEncodeError :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [download] The file has already been downloaded ' )
2013-06-18 15:14:21 -05:00
def prepare_filename ( self , info_dict ) :
""" Generate the output filename. """
try :
template_dict = dict ( info_dict )
template_dict [ ' epoch ' ] = int ( time . time ( ) )
autonumber_size = self . params . get ( ' autonumber_size ' )
if autonumber_size is None :
autonumber_size = 5
2014-01-04 18:52:03 -06:00
autonumber_templ = ' % 0 ' + str ( autonumber_size ) + ' d '
2013-06-18 15:14:21 -05:00
template_dict [ ' autonumber ' ] = autonumber_templ % self . _num_downloads
2013-10-28 16:01:37 -05:00
if template_dict . get ( ' playlist_index ' ) is not None :
2014-08-24 11:49:04 -05:00
template_dict [ ' playlist_index ' ] = ' %0*d ' % ( len ( str ( template_dict [ ' n_entries ' ] ) ) , template_dict [ ' playlist_index ' ] )
2014-03-03 20:49:33 -06:00
if template_dict . get ( ' resolution ' ) is None :
if template_dict . get ( ' width ' ) and template_dict . get ( ' height ' ) :
template_dict [ ' resolution ' ] = ' %d x %d ' % ( template_dict [ ' width ' ] , template_dict [ ' height ' ] )
elif template_dict . get ( ' height ' ) :
2014-03-10 04:29:25 -05:00
template_dict [ ' resolution ' ] = ' %s p ' % template_dict [ ' height ' ]
2014-03-03 20:49:33 -06:00
elif template_dict . get ( ' width ' ) :
2014-03-10 04:29:25 -05:00
template_dict [ ' resolution ' ] = ' ?x %d ' % template_dict [ ' width ' ]
2013-06-18 15:14:21 -05:00
2013-10-22 15:28:19 -05:00
sanitize = lambda k , v : sanitize_filename (
2013-12-10 04:23:35 -06:00
compat_str ( v ) ,
2013-06-18 15:14:21 -05:00
restricted = self . params . get ( ' restrictfilenames ' ) ,
2014-01-04 18:52:03 -06:00
is_id = ( k == ' id ' ) )
2013-10-22 15:28:19 -05:00
template_dict = dict ( ( k , sanitize ( k , v ) )
2013-12-10 04:23:35 -06:00
for k , v in template_dict . items ( )
if v is not None )
2014-01-04 18:52:03 -06:00
template_dict = collections . defaultdict ( lambda : ' NA ' , template_dict )
2013-06-18 15:14:21 -05:00
2014-04-30 03:02:03 -05:00
outtmpl = self . params . get ( ' outtmpl ' , DEFAULT_OUTTMPL )
2014-09-30 10:27:53 -05:00
tmpl = compat_expanduser ( outtmpl )
2013-10-22 15:28:19 -05:00
filename = tmpl % template_dict
2015-01-27 10:38:28 -06:00
# Temporary fix for #4787
# 'Treat' all problem characters by passing filename through preferredencoding
# to workaround encoding issues with subprocess on python2 @ Windows
if sys . version_info < ( 3 , 0 ) and sys . platform == ' win32 ' :
filename = encodeFilename ( filename , True ) . decode ( preferredencoding ( ) )
2013-06-18 15:14:21 -05:00
return filename
except ValueError as err :
2014-01-04 18:52:03 -06:00
self . report_error ( ' Error in output template: ' + str ( err ) + ' (encoding: ' + repr ( preferredencoding ( ) ) + ' ) ' )
2013-06-18 15:14:21 -05:00
return None
def _match_entry ( self , info_dict ) :
""" Returns None iff the file should be downloaded """
2014-01-04 18:52:03 -06:00
video_title = info_dict . get ( ' title ' , info_dict . get ( ' id ' , ' video ' ) )
2013-11-22 15:46:46 -06:00
if ' title ' in info_dict :
# This can happen when we're just evaluating the playlist
title = info_dict [ ' title ' ]
matchtitle = self . params . get ( ' matchtitle ' , False )
if matchtitle :
if not re . search ( matchtitle , title , re . IGNORECASE ) :
2014-01-04 18:52:03 -06:00
return ' " ' + title + ' " title did not match pattern " ' + matchtitle + ' " '
2013-11-22 15:46:46 -06:00
rejecttitle = self . params . get ( ' rejecttitle ' , False )
if rejecttitle :
if re . search ( rejecttitle , title , re . IGNORECASE ) :
2014-01-04 18:52:03 -06:00
return ' " ' + title + ' " title matched reject pattern " ' + rejecttitle + ' " '
2013-06-18 15:14:21 -05:00
date = info_dict . get ( ' upload_date ' , None )
if date is not None :
dateRange = self . params . get ( ' daterange ' , DateRange ( ) )
if date not in dateRange :
2014-01-04 18:52:03 -06:00
return ' %s upload date is not in range %s ' % ( date_from_str ( date ) . isoformat ( ) , dateRange )
2013-12-15 20:09:49 -06:00
view_count = info_dict . get ( ' view_count ' , None )
if view_count is not None :
min_views = self . params . get ( ' min_views ' )
if min_views is not None and view_count < min_views :
2014-01-04 18:52:03 -06:00
return ' Skipping %s , because it has not reached minimum view count ( %d / %d ) ' % ( video_title , view_count , min_views )
2013-12-15 20:09:49 -06:00
max_views = self . params . get ( ' max_views ' )
if max_views is not None and view_count > max_views :
2014-01-04 18:52:03 -06:00
return ' Skipping %s , because it has exceeded the maximum view count ( %d / %d ) ' % ( video_title , view_count , max_views )
2015-01-07 00:20:20 -06:00
if age_restricted ( info_dict . get ( ' age_limit ' ) , self . params . get ( ' age_limit ' ) ) :
return ' Skipping " %s " because it is age restricted ' % title
2013-10-05 21:27:09 -05:00
if self . in_download_archive ( info_dict ) :
2014-01-04 18:52:03 -06:00
return ' %s has already been recorded in archive ' % video_title
2013-06-18 15:14:21 -05:00
return None
2013-10-22 07:49:34 -05:00
2013-11-03 04:56:45 -06:00
@staticmethod
def add_extra_info ( info_dict , extra_info ) :
''' Set the keys from extra_info in info dict if they are missing '''
for key , value in extra_info . items ( ) :
info_dict . setdefault ( key , value )
2013-12-05 07:29:08 -06:00
def extract_info ( self , url , download = True , ie_key = None , extra_info = { } ,
process = True ) :
2013-06-18 15:14:21 -05:00
'''
Returns a list with a dictionary for each video we find .
If ' download ' , also downloads the videos .
extra_info is a dict containing the extra values to add to each result
'''
2013-10-22 07:49:34 -05:00
2013-06-18 15:14:21 -05:00
if ie_key :
2013-07-08 08:14:27 -05:00
ies = [ self . get_info_extractor ( ie_key ) ]
2013-06-18 15:14:21 -05:00
else :
ies = self . _ies
for ie in ies :
if not ie . suitable ( url ) :
continue
if not ie . working ( ) :
2014-01-04 18:52:03 -06:00
self . report_warning ( ' The program functionality for this site has been marked as broken, '
' and will probably not work. ' )
2013-06-18 15:14:21 -05:00
try :
ie_result = ie . extract ( url )
2014-11-23 13:41:03 -06:00
if ie_result is None : # Finished already (backwards compatibility; listformats and friends should be moved here)
2013-06-18 15:14:21 -05:00
break
if isinstance ( ie_result , list ) :
# Backwards compatibility: old IE result format
ie_result = {
' _type ' : ' compat_list ' ,
' entries ' : ie_result ,
}
2014-03-23 10:06:03 -05:00
self . add_default_extra_info ( ie_result , ie , url )
2013-12-05 07:29:08 -06:00
if process :
return self . process_ie_result ( ie_result , download , extra_info )
else :
return ie_result
2014-11-23 13:41:03 -06:00
except ExtractorError as de : # An error we somewhat expected
2013-06-18 15:14:21 -05:00
self . report_error ( compat_str ( de ) , de . format_traceback ( ) )
break
2014-01-23 03:36:47 -06:00
except MaxDownloadsReached :
raise
2013-06-18 15:14:21 -05:00
except Exception as e :
if self . params . get ( ' ignoreerrors ' , False ) :
self . report_error ( compat_str ( e ) , tb = compat_str ( traceback . format_exc ( ) ) )
break
else :
raise
else :
2014-03-20 10:33:42 -05:00
self . report_error ( ' no suitable InfoExtractor for URL %s ' % url )
2013-10-22 07:49:34 -05:00
2014-03-23 10:06:03 -05:00
def add_default_extra_info ( self , ie_result , ie , url ) :
self . add_extra_info ( ie_result , {
' extractor ' : ie . IE_NAME ,
' webpage_url ' : url ,
' webpage_url_basename ' : url_basename ( url ) ,
' extractor_key ' : ie . ie_key ( ) ,
} )
2013-06-18 15:14:21 -05:00
def process_ie_result ( self , ie_result , download = True , extra_info = { } ) :
"""
Take the result of the ie ( may be modified ) and resolve all unresolved
references ( URLs , playlist items ) .
It will also download the videos if ' download ' .
Returns the resolved ie_result .
"""
2014-08-21 04:52:07 -05:00
result_type = ie_result . get ( ' _type ' , ' video ' )
2014-10-24 07:48:12 -05:00
if result_type in ( ' url ' , ' url_transparent ' ) :
extract_flat = self . params . get ( ' extract_flat ' , False )
if ( ( extract_flat == ' in_playlist ' and ' playlist ' in extra_info ) or
extract_flat is True ) :
if self . params . get ( ' forcejson ' , False ) :
self . to_stdout ( json . dumps ( ie_result ) )
2014-08-21 04:52:07 -05:00
return ie_result
2013-06-18 15:14:21 -05:00
if result_type == ' video ' :
2013-11-03 04:56:45 -06:00
self . add_extra_info ( ie_result , extra_info )
2013-11-15 04:04:26 -06:00
return self . process_video_result ( ie_result , download = download )
2013-06-18 15:14:21 -05:00
elif result_type == ' url ' :
# We have to add extra_info to the results because it may be
# contained in a playlist
return self . extract_info ( ie_result [ ' url ' ] ,
download ,
ie_key = ie_result . get ( ' ie_key ' ) ,
extra_info = extra_info )
2013-12-05 07:29:08 -06:00
elif result_type == ' url_transparent ' :
# Use the information from the embedding page
info = self . extract_info (
ie_result [ ' url ' ] , ie_key = ie_result . get ( ' ie_key ' ) ,
extra_info = extra_info , download = False , process = False )
2014-12-12 08:55:55 -06:00
force_properties = dict (
( k , v ) for k , v in ie_result . items ( ) if v is not None )
for f in ( ' _type ' , ' url ' ) :
if f in force_properties :
del force_properties [ f ]
new_result = info . copy ( )
new_result . update ( force_properties )
2013-12-05 07:29:08 -06:00
assert new_result . get ( ' _type ' ) != ' url_transparent '
return self . process_ie_result (
new_result , download = download , extra_info = extra_info )
2014-11-21 15:38:16 -06:00
elif result_type == ' playlist ' or result_type == ' multi_video ' :
2013-06-18 15:14:21 -05:00
# We process each entry in the playlist
playlist = ie_result . get ( ' title ' , None ) or ie_result . get ( ' id ' , None )
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [download] Downloading playlist: %s ' % playlist )
2013-06-18 15:14:21 -05:00
playlist_results = [ ]
playliststart = self . params . get ( ' playliststart ' , 1 ) - 1
2013-12-16 06:16:20 -06:00
playlistend = self . params . get ( ' playlistend ' , None )
# For backwards compatibility, interpret -1 as whole list
2013-06-18 15:14:21 -05:00
if playlistend == - 1 :
2013-12-16 06:16:20 -06:00
playlistend = None
2013-06-18 15:14:21 -05:00
2015-01-24 21:24:55 -06:00
playlistitems_str = self . params . get ( ' playlist_items ' , None )
playlistitems = None
if playlistitems_str is not None :
def iter_playlistitems ( format ) :
for string_segment in format . split ( ' , ' ) :
if ' - ' in string_segment :
start , end = string_segment . split ( ' - ' )
for item in range ( int ( start ) , int ( end ) + 1 ) :
yield int ( item )
else :
yield int ( string_segment )
playlistitems = iter_playlistitems ( playlistitems_str )
2014-12-06 07:02:19 -06:00
ie_entries = ie_result [ ' entries ' ]
if isinstance ( ie_entries , list ) :
n_all_entries = len ( ie_entries )
2015-01-24 21:24:55 -06:00
if playlistitems :
entries = [ ie_entries [ i - 1 ] for i in playlistitems ]
else :
entries = ie_entries [ playliststart : playlistend ]
2014-01-20 04:36:47 -06:00
n_entries = len ( entries )
self . to_screen (
" [ %s ] playlist %s : Collected %d video ids (downloading %d of them) " %
( ie_result [ ' extractor ' ] , playlist , n_all_entries , n_entries ) )
2014-12-06 07:02:19 -06:00
elif isinstance ( ie_entries , PagedList ) :
2015-01-24 21:24:55 -06:00
if playlistitems :
entries = [ ]
for item in playlistitems :
entries . extend ( ie_entries . getslice (
item - 1 , item
) )
else :
entries = ie_entries . getslice (
playliststart , playlistend )
2014-01-20 04:36:47 -06:00
n_entries = len ( entries )
self . to_screen (
" [ %s ] playlist %s : Downloading %d videos " %
( ie_result [ ' extractor ' ] , playlist , n_entries ) )
2014-12-06 07:02:19 -06:00
else : # iterable
2015-01-24 21:24:55 -06:00
if playlistitems :
entry_list = list ( ie_entries )
entries = [ entry_list [ i - 1 ] for i in playlistitems ]
else :
entries = list ( itertools . islice (
ie_entries , playliststart , playlistend ) )
2014-12-06 07:02:19 -06:00
n_entries = len ( entries )
self . to_screen (
" [ %s ] playlist %s : Downloading %d videos " %
( ie_result [ ' extractor ' ] , playlist , n_entries ) )
2013-06-18 15:14:21 -05:00
2014-07-10 22:11:11 -05:00
if self . params . get ( ' playlistreverse ' , False ) :
entries = entries [ : : - 1 ]
2013-10-22 07:49:34 -05:00
for i , entry in enumerate ( entries , 1 ) :
2014-12-15 17:37:42 -06:00
self . to_screen ( ' [download] Downloading video %s of %s ' % ( i , n_entries ) )
2013-06-18 15:14:21 -05:00
extra = {
2014-08-24 11:49:04 -05:00
' n_entries ' : n_entries ,
2013-10-22 07:49:34 -05:00
' playlist ' : playlist ,
2014-11-09 15:32:26 -06:00
' playlist_id ' : ie_result . get ( ' id ' ) ,
' playlist_title ' : ie_result . get ( ' title ' ) ,
2013-10-22 07:49:34 -05:00
' playlist_index ' : i + playliststart ,
2013-11-03 04:56:45 -06:00
' extractor ' : ie_result [ ' extractor ' ] ,
2013-11-03 05:11:13 -06:00
' webpage_url ' : ie_result [ ' webpage_url ' ] ,
2013-12-16 21:13:36 -06:00
' webpage_url_basename ' : url_basename ( ie_result [ ' webpage_url ' ] ) ,
2013-11-03 05:14:44 -06:00
' extractor_key ' : ie_result [ ' extractor_key ' ] ,
2013-10-22 07:49:34 -05:00
}
2013-11-22 15:46:46 -06:00
reason = self . _match_entry ( entry )
if reason is not None :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [download] ' + reason )
2013-11-22 15:46:46 -06:00
continue
2013-06-18 15:14:21 -05:00
entry_result = self . process_ie_result ( entry ,
download = download ,
extra_info = extra )
playlist_results . append ( entry_result )
ie_result [ ' entries ' ] = playlist_results
return ie_result
elif result_type == ' compat_list ' :
2014-11-20 09:29:31 -06:00
self . report_warning (
' Extractor %s returned a compat_list result. '
' It needs to be updated. ' % ie_result . get ( ' extractor ' ) )
2014-11-23 13:41:03 -06:00
2013-06-18 15:14:21 -05:00
def _fixup ( r ) :
2014-11-23 14:39:15 -06:00
self . add_extra_info (
r ,
2013-11-03 05:11:13 -06:00
{
' extractor ' : ie_result [ ' extractor ' ] ,
' webpage_url ' : ie_result [ ' webpage_url ' ] ,
2013-12-16 21:13:36 -06:00
' webpage_url_basename ' : url_basename ( ie_result [ ' webpage_url ' ] ) ,
2013-11-03 05:14:44 -06:00
' extractor_key ' : ie_result [ ' extractor_key ' ] ,
2014-11-23 14:39:15 -06:00
}
)
2013-06-18 15:14:21 -05:00
return r
ie_result [ ' entries ' ] = [
2013-11-03 04:56:45 -06:00
self . process_ie_result ( _fixup ( r ) , download , extra_info )
2013-06-18 15:14:21 -05:00
for r in ie_result [ ' entries ' ]
]
return ie_result
else :
raise Exception ( ' Invalid result type: %s ' % result_type )
2015-01-22 17:04:05 -06:00
def _apply_format_filter ( self , format_spec , available_formats ) :
" Returns a tuple of the remaining format_spec and filtered formats "
OPERATORS = {
' < ' : operator . lt ,
' <= ' : operator . le ,
' > ' : operator . gt ,
' >= ' : operator . ge ,
' = ' : operator . eq ,
' != ' : operator . ne ,
}
operator_rex = re . compile ( r ''' (?x) \ s* \ [
2015-02-08 13:09:45 -06:00
( ? P < key > width | height | tbr | abr | vbr | asr | filesize | fps )
2015-01-22 17:04:05 -06:00
\s * ( ? P < op > % s ) ( ? P < none_inclusive > \s * \? ) ? \s *
( ? P < value > [ 0 - 9. ] + ( ? : [ kKmMgGtTpPeEzZyY ] i ? [ Bb ] ? ) ? )
\] $
''' % ' | ' .join(map(re.escape, OPERATORS.keys())))
m = operator_rex . search ( format_spec )
2015-02-08 13:07:43 -06:00
if m :
try :
comparison_value = int ( m . group ( ' value ' ) )
except ValueError :
comparison_value = parse_filesize ( m . group ( ' value ' ) )
if comparison_value is None :
comparison_value = parse_filesize ( m . group ( ' value ' ) + ' B ' )
if comparison_value is None :
raise ValueError (
' Invalid value %r in format specification %r ' % (
m . group ( ' value ' ) , format_spec ) )
op = OPERATORS [ m . group ( ' op ' ) ]
2015-01-22 17:04:05 -06:00
if not m :
2015-02-08 13:07:43 -06:00
STR_OPERATORS = {
' = ' : operator . eq ,
' != ' : operator . ne ,
}
str_operator_rex = re . compile ( r ''' (?x) \ s* \ [
\s * ( ? P < key > ext | acodec | vcodec | container | protocol )
\s * ( ? P < op > % s ) ( ? P < none_inclusive > \s * \? ) ?
\s * ( ? P < value > [ a - zA - Z0 - 9 _ - ] + )
\s * \] $
''' % ' | ' .join(map(re.escape, STR_OPERATORS.keys())))
m = str_operator_rex . search ( format_spec )
if m :
comparison_value = m . group ( ' value ' )
op = STR_OPERATORS [ m . group ( ' op ' ) ]
2015-01-22 17:04:05 -06:00
2015-02-08 13:07:43 -06:00
if not m :
raise ValueError ( ' Invalid format specification %r ' % format_spec )
2015-01-22 17:04:05 -06:00
def _filter ( f ) :
actual_value = f . get ( m . group ( ' key ' ) )
if actual_value is None :
return m . group ( ' none_inclusive ' )
return op ( actual_value , comparison_value )
new_formats = [ f for f in available_formats if _filter ( f ) ]
new_format_spec = format_spec [ : - len ( m . group ( 0 ) ) ]
if not new_format_spec :
new_format_spec = ' best '
return ( new_format_spec , new_formats )
2013-10-21 06:19:58 -05:00
def select_format ( self , format_spec , available_formats ) :
2015-01-22 17:04:05 -06:00
while format_spec . endswith ( ' ] ' ) :
format_spec , available_formats = self . _apply_format_filter (
format_spec , available_formats )
if not available_formats :
return None
2013-10-21 06:19:58 -05:00
if format_spec == ' best ' or format_spec is None :
return available_formats [ - 1 ]
elif format_spec == ' worst ' :
return available_formats [ 0 ]
2014-01-22 07:47:29 -06:00
elif format_spec == ' bestaudio ' :
audio_formats = [
f for f in available_formats
if f . get ( ' vcodec ' ) == ' none ' ]
if audio_formats :
return audio_formats [ - 1 ]
elif format_spec == ' worstaudio ' :
audio_formats = [
f for f in available_formats
if f . get ( ' vcodec ' ) == ' none ' ]
if audio_formats :
return audio_formats [ 0 ]
2014-03-14 11:01:47 -05:00
elif format_spec == ' bestvideo ' :
video_formats = [
f for f in available_formats
if f . get ( ' acodec ' ) == ' none ' ]
if video_formats :
return video_formats [ - 1 ]
elif format_spec == ' worstvideo ' :
video_formats = [
f for f in available_formats
if f . get ( ' acodec ' ) == ' none ' ]
if video_formats :
return video_formats [ 0 ]
2013-10-21 06:19:58 -05:00
else :
2015-01-08 09:14:16 -06:00
extensions = [ ' mp4 ' , ' flv ' , ' webm ' , ' 3gp ' , ' m4a ' , ' mp3 ' , ' ogg ' , ' aac ' , ' wav ' ]
2013-10-21 06:31:55 -05:00
if format_spec in extensions :
filter_f = lambda f : f [ ' ext ' ] == format_spec
else :
filter_f = lambda f : f [ ' format_id ' ] == format_spec
2013-10-22 07:49:34 -05:00
matches = list ( filter ( filter_f , available_formats ) )
2013-10-21 06:19:58 -05:00
if matches :
return matches [ - 1 ]
return None
2015-01-24 11:52:26 -06:00
def _calc_headers ( self , info_dict ) :
res = std_headers . copy ( )
add_headers = info_dict . get ( ' http_headers ' )
if add_headers :
res . update ( add_headers )
cookies = self . _calc_cookies ( info_dict )
if cookies :
res [ ' Cookie ' ] = cookies
return res
def _calc_cookies ( self , info_dict ) :
class _PseudoRequest ( object ) :
def __init__ ( self , url ) :
self . url = url
self . headers = { }
self . unverifiable = False
def add_unredirected_header ( self , k , v ) :
self . headers [ k ] = v
def get_full_url ( self ) :
return self . url
2015-01-24 13:12:47 -06:00
def is_unverifiable ( self ) :
return self . unverifiable
2015-01-24 11:52:26 -06:00
2015-01-24 13:05:35 -06:00
def has_header ( self , h ) :
return h in self . headers
2015-02-06 15:23:06 -06:00
def get_header ( self , h , default = None ) :
return self . headers . get ( h , default )
2015-01-24 11:52:26 -06:00
pr = _PseudoRequest ( info_dict [ ' url ' ] )
self . cookiejar . add_cookie_header ( pr )
return pr . headers . get ( ' Cookie ' )
2013-07-02 03:08:58 -05:00
def process_video_result ( self , info_dict , download = True ) :
assert info_dict . get ( ' _type ' , ' video ' ) == ' video '
2014-04-03 07:36:40 -05:00
if ' id ' not in info_dict :
raise ExtractorError ( ' Missing " id " field in extractor result ' )
if ' title ' not in info_dict :
raise ExtractorError ( ' Missing " title " field in extractor result ' )
2013-07-02 03:08:58 -05:00
if ' playlist ' not in info_dict :
# It isn't part of a playlist
info_dict [ ' playlist ' ] = None
info_dict [ ' playlist_index ' ] = None
2014-06-07 08:33:45 -05:00
thumbnails = info_dict . get ( ' thumbnails ' )
2015-01-24 19:38:47 -06:00
if thumbnails is None :
thumbnail = info_dict . get ( ' thumbnail ' )
if thumbnail :
2015-01-29 13:15:38 -06:00
info_dict [ ' thumbnails ' ] = thumbnails = [ { ' url ' : thumbnail } ]
2014-06-07 08:33:45 -05:00
if thumbnails :
2014-06-07 08:39:21 -05:00
thumbnails . sort ( key = lambda t : (
2015-01-24 19:38:47 -06:00
t . get ( ' preference ' ) , t . get ( ' width ' ) , t . get ( ' height ' ) ,
t . get ( ' id ' ) , t . get ( ' url ' ) ) )
2015-02-03 03:52:22 -06:00
for i , t in enumerate ( thumbnails ) :
2014-06-07 08:33:45 -05:00
if ' width ' in t and ' height ' in t :
t [ ' resolution ' ] = ' %d x %d ' % ( t [ ' width ' ] , t [ ' height ' ] )
2015-02-03 03:52:22 -06:00
if t . get ( ' id ' ) is None :
t [ ' id ' ] = ' %d ' % i
2014-06-07 08:33:45 -05:00
if thumbnails and ' thumbnail ' not in info_dict :
info_dict [ ' thumbnail ' ] = thumbnails [ - 1 ] [ ' url ' ]
2014-03-03 20:32:28 -06:00
if ' display_id ' not in info_dict and ' id ' in info_dict :
2014-03-03 05:06:28 -06:00
info_dict [ ' display_id ' ] = info_dict [ ' id ' ]
2014-03-13 12:21:55 -05:00
if info_dict . get ( ' upload_date ' ) is None and info_dict . get ( ' timestamp ' ) is not None :
2014-12-02 09:18:07 -06:00
# Working around negative timestamps in Windows
# (see http://bugs.python.org/issue1646728)
if info_dict [ ' timestamp ' ] < 0 and os . name == ' nt ' :
info_dict [ ' timestamp ' ] = 0
2014-03-13 09:30:25 -05:00
upload_date = datetime . datetime . utcfromtimestamp (
2014-03-13 12:21:55 -05:00
info_dict [ ' timestamp ' ] )
2014-03-13 09:30:25 -05:00
info_dict [ ' upload_date ' ] = upload_date . strftime ( ' % Y % m %d ' )
2013-07-13 10:51:26 -05:00
# This extractors handle format selection themselves
2014-01-04 18:52:03 -06:00
if info_dict [ ' extractor ' ] in [ ' Youku ' ] :
2013-10-21 17:01:59 -05:00
if download :
self . process_info ( info_dict )
2013-07-13 10:51:26 -05:00
return info_dict
2013-07-02 03:08:58 -05:00
# We now pick which formats have to be downloaded
if info_dict . get ( ' formats ' ) is None :
# There's only one format available
formats = [ info_dict ]
else :
formats = info_dict [ ' formats ' ]
2014-03-10 14:55:47 -05:00
if not formats :
raise ExtractorError ( ' No video formats found! ' )
2013-07-02 03:08:58 -05:00
# We check that all the formats have the format and format_id fields
2014-03-10 14:55:47 -05:00
for i , format in enumerate ( formats ) :
2014-04-03 07:36:40 -05:00
if ' url ' not in format :
raise ExtractorError ( ' Missing " url " key in result (index %d ) ' % i )
2013-07-02 03:08:58 -05:00
if format . get ( ' format_id ' ) is None :
2013-07-14 10:31:52 -05:00
format [ ' format_id ' ] = compat_str ( i )
2013-10-21 07:09:38 -05:00
if format . get ( ' format ' ) is None :
2014-01-04 18:52:03 -06:00
format [ ' format ' ] = ' {id} - {res} {note} ' . format (
2013-10-21 07:09:38 -05:00
id = format [ ' format_id ' ] ,
res = self . format_resolution ( format ) ,
2014-01-04 18:52:03 -06:00
note = ' ( {0} ) ' . format ( format [ ' format_note ' ] ) if format . get ( ' format_note ' ) is not None else ' ' ,
2013-10-21 07:09:38 -05:00
)
2013-10-28 05:28:02 -05:00
# Automatically determine file extension if missing
if ' ext ' not in format :
2014-04-03 01:55:38 -05:00
format [ ' ext ' ] = determine_ext ( format [ ' url ' ] ) . lower ( )
2015-01-24 11:52:26 -06:00
# Add HTTP headers, so that external programs can use them from the
# json output
full_format_info = info_dict . copy ( )
full_format_info . update ( format )
format [ ' http_headers ' ] = self . _calc_headers ( full_format_info )
2013-07-02 03:08:58 -05:00
2013-07-08 05:10:47 -05:00
format_limit = self . params . get ( ' format_limit ' , None )
if format_limit :
2013-10-17 17:46:35 -05:00
formats = list ( takewhile_inclusive (
lambda f : f [ ' format_id ' ] != format_limit , formats
) )
2013-12-24 05:25:22 -06:00
# TODO Central sorting goes here
2013-07-08 05:10:47 -05:00
2014-01-25 05:02:43 -06:00
if formats [ 0 ] is not info_dict :
2013-12-23 03:23:13 -06:00
# only set the 'formats' fields if the original info_dict list them
# otherwise we end up with a circular reference, the first (and unique)
2014-01-25 05:02:43 -06:00
# element in the 'formats' field in info_dict is info_dict itself,
2013-12-23 03:23:13 -06:00
# wich can't be exported to json
info_dict [ ' formats ' ] = formats
2015-01-24 19:38:47 -06:00
if self . params . get ( ' listformats ' ) :
2013-12-18 14:24:39 -06:00
self . list_formats ( info_dict )
return
2015-01-24 19:38:47 -06:00
if self . params . get ( ' list_thumbnails ' ) :
self . list_thumbnails ( info_dict )
return
2013-12-18 14:24:39 -06:00
2014-01-22 07:53:23 -06:00
req_format = self . params . get ( ' format ' )
2013-10-21 06:19:58 -05:00
if req_format is None :
req_format = ' best '
2013-07-02 03:08:58 -05:00
formats_to_download = [ ]
# The -1 is for supporting YoutubeIE
2013-10-21 06:19:58 -05:00
if req_format in ( ' -1 ' , ' all ' ) :
2013-07-02 03:08:58 -05:00
formats_to_download = formats
else :
2014-09-18 11:43:49 -05:00
for rfstr in req_format . split ( ' , ' ) :
# We can accept formats requested in the format: 34/5/best, we pick
# the first that is available, starting from left
req_formats = rfstr . split ( ' / ' )
for rf in req_formats :
if re . match ( r ' .+? \ +.+? ' , rf ) is not None :
# Two formats have been requested like '137+139'
format_1 , format_2 = rf . split ( ' + ' )
formats_info = ( self . select_format ( format_1 , formats ) ,
2014-11-23 14:39:15 -06:00
self . select_format ( format_2 , formats ) )
2014-09-18 11:43:49 -05:00
if all ( formats_info ) :
2014-11-12 02:42:35 -06:00
# The first format must contain the video and the
# second the audio
if formats_info [ 0 ] . get ( ' vcodec ' ) == ' none ' :
self . report_error ( ' The first format must '
2014-11-23 14:39:15 -06:00
' contain the video, try using '
' " -f %s + %s " ' % ( format_2 , format_1 ) )
2014-11-12 02:42:35 -06:00
return
2015-01-09 18:59:14 -06:00
output_ext = (
formats_info [ 0 ] [ ' ext ' ]
if self . params . get ( ' merge_output_format ' ) is None
else self . params [ ' merge_output_format ' ] )
2014-09-18 11:43:49 -05:00
selected_format = {
' requested_formats ' : formats_info ,
2015-02-05 12:51:16 -06:00
' format ' : ' %s + %s ' % ( formats_info [ 0 ] . get ( ' format ' ) ,
formats_info [ 1 ] . get ( ' format ' ) ) ,
2015-01-30 15:50:11 -06:00
' format_id ' : ' %s + %s ' % ( formats_info [ 0 ] . get ( ' format_id ' ) ,
formats_info [ 1 ] . get ( ' format_id ' ) ) ,
2015-01-09 13:50:23 -06:00
' width ' : formats_info [ 0 ] . get ( ' width ' ) ,
' height ' : formats_info [ 0 ] . get ( ' height ' ) ,
' resolution ' : formats_info [ 0 ] . get ( ' resolution ' ) ,
' fps ' : formats_info [ 0 ] . get ( ' fps ' ) ,
' vcodec ' : formats_info [ 0 ] . get ( ' vcodec ' ) ,
' vbr ' : formats_info [ 0 ] . get ( ' vbr ' ) ,
2015-01-09 22:45:51 -06:00
' stretched_ratio ' : formats_info [ 0 ] . get ( ' stretched_ratio ' ) ,
2015-01-09 13:50:23 -06:00
' acodec ' : formats_info [ 1 ] . get ( ' acodec ' ) ,
' abr ' : formats_info [ 1 ] . get ( ' abr ' ) ,
2015-01-09 18:59:14 -06:00
' ext ' : output_ext ,
2014-09-18 11:43:49 -05:00
}
else :
selected_format = None
2014-01-04 06:13:51 -06:00
else :
2014-09-18 11:43:49 -05:00
selected_format = self . select_format ( rf , formats )
if selected_format is not None :
formats_to_download . append ( selected_format )
break
2013-07-02 03:08:58 -05:00
if not formats_to_download :
2014-01-04 18:52:03 -06:00
raise ExtractorError ( ' requested format not available ' ,
2013-10-28 05:41:43 -05:00
expected = True )
2013-07-02 03:08:58 -05:00
if download :
if len ( formats_to_download ) > 1 :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [info] %s : downloading video in %s formats ' % ( info_dict [ ' id ' ] , len ( formats_to_download ) ) )
2013-07-02 03:08:58 -05:00
for format in formats_to_download :
new_info = dict ( info_dict )
new_info . update ( format )
self . process_info ( new_info )
# We update the info dict with the best quality format (backwards compatibility)
info_dict . update ( formats_to_download [ - 1 ] )
return info_dict
2013-06-18 15:14:21 -05:00
def process_info ( self , info_dict ) :
""" Process a single resolved IE result. """
assert info_dict . get ( ' _type ' , ' video ' ) == ' video '
2014-01-23 11:56:36 -06:00
max_downloads = self . params . get ( ' max_downloads ' )
if max_downloads is not None :
if self . _num_downloads > = int ( max_downloads ) :
raise MaxDownloadsReached ( )
2013-06-18 15:14:21 -05:00
info_dict [ ' fulltitle ' ] = info_dict [ ' title ' ]
if len ( info_dict [ ' title ' ] ) > 200 :
2014-01-04 18:52:03 -06:00
info_dict [ ' title ' ] = info_dict [ ' title ' ] [ : 197 ] + ' ... '
2013-06-18 15:14:21 -05:00
# Keep for backwards compatibility
info_dict [ ' stitle ' ] = info_dict [ ' title ' ]
2014-07-25 16:37:32 -05:00
if ' format ' not in info_dict :
2013-06-18 15:14:21 -05:00
info_dict [ ' format ' ] = info_dict [ ' ext ' ]
reason = self . _match_entry ( info_dict )
if reason is not None :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [download] ' + reason )
2013-06-18 15:14:21 -05:00
return
2014-01-23 11:56:36 -06:00
self . _num_downloads + = 1
2013-06-18 15:14:21 -05:00
2015-01-26 05:01:43 -06:00
info_dict [ ' _filename ' ] = filename = self . prepare_filename ( info_dict )
2013-06-18 15:14:21 -05:00
# Forced printings
if self . params . get ( ' forcetitle ' , False ) :
2013-12-08 21:08:51 -06:00
self . to_stdout ( info_dict [ ' fulltitle ' ] )
2013-06-18 15:14:21 -05:00
if self . params . get ( ' forceid ' , False ) :
2013-12-08 21:08:51 -06:00
self . to_stdout ( info_dict [ ' id ' ] )
2013-06-18 15:14:21 -05:00
if self . params . get ( ' forceurl ' , False ) :
2014-12-06 14:47:29 -06:00
if info_dict . get ( ' requested_formats ' ) is not None :
for f in info_dict [ ' requested_formats ' ] :
self . to_stdout ( f [ ' url ' ] + f . get ( ' play_path ' , ' ' ) )
else :
# For RTMP URLs, also include the playpath
self . to_stdout ( info_dict [ ' url ' ] + info_dict . get ( ' play_path ' , ' ' ) )
2013-10-28 10:28:35 -05:00
if self . params . get ( ' forcethumbnail ' , False ) and info_dict . get ( ' thumbnail ' ) is not None :
2013-12-08 21:08:51 -06:00
self . to_stdout ( info_dict [ ' thumbnail ' ] )
2013-10-28 10:28:35 -05:00
if self . params . get ( ' forcedescription ' , False ) and info_dict . get ( ' description ' ) is not None :
2013-12-08 21:08:51 -06:00
self . to_stdout ( info_dict [ ' description ' ] )
2013-06-18 15:14:21 -05:00
if self . params . get ( ' forcefilename ' , False ) and filename is not None :
2013-12-08 21:08:51 -06:00
self . to_stdout ( filename )
2013-12-15 21:15:10 -06:00
if self . params . get ( ' forceduration ' , False ) and info_dict . get ( ' duration ' ) is not None :
self . to_stdout ( formatSeconds ( info_dict [ ' duration ' ] ) )
2013-06-18 15:14:21 -05:00
if self . params . get ( ' forceformat ' , False ) :
2013-12-08 21:08:51 -06:00
self . to_stdout ( info_dict [ ' format ' ] )
2013-11-19 07:59:22 -06:00
if self . params . get ( ' forcejson ' , False ) :
2013-12-08 21:08:51 -06:00
self . to_stdout ( json . dumps ( info_dict ) )
2013-06-18 15:14:21 -05:00
# Do nothing else if in simulate mode
if self . params . get ( ' simulate ' , False ) :
return
if filename is None :
return
try :
dn = os . path . dirname ( encodeFilename ( filename ) )
2014-04-03 08:28:39 -05:00
if dn and not os . path . exists ( dn ) :
2013-06-18 15:14:21 -05:00
os . makedirs ( dn )
except ( OSError , IOError ) as err :
2014-01-04 18:52:03 -06:00
self . report_error ( ' unable to create directory ' + compat_str ( err ) )
2013-06-18 15:14:21 -05:00
return
if self . params . get ( ' writedescription ' , False ) :
2014-01-04 18:52:03 -06:00
descfn = filename + ' .description '
2013-12-15 21:39:04 -06:00
if self . params . get ( ' nooverwrites ' , False ) and os . path . exists ( encodeFilename ( descfn ) ) :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [info] Video description is already present ' )
2014-12-21 13:49:14 -06:00
elif info_dict . get ( ' description ' ) is None :
self . report_warning ( ' There \' s no description to write. ' )
2013-12-15 21:39:04 -06:00
else :
try :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [info] Writing video description to: ' + descfn )
2013-12-15 21:39:04 -06:00
with io . open ( encodeFilename ( descfn ) , ' w ' , encoding = ' utf-8 ' ) as descfile :
descfile . write ( info_dict [ ' description ' ] )
except ( OSError , IOError ) :
2014-01-04 18:52:03 -06:00
self . report_error ( ' Cannot write description file ' + descfn )
2013-12-15 21:39:04 -06:00
return
2013-06-18 15:14:21 -05:00
2013-10-14 00:18:58 -05:00
if self . params . get ( ' writeannotations ' , False ) :
2014-01-04 18:52:03 -06:00
annofn = filename + ' .annotations.xml '
2013-12-15 21:39:04 -06:00
if self . params . get ( ' nooverwrites ' , False ) and os . path . exists ( encodeFilename ( annofn ) ) :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [info] Video annotations are already present ' )
2013-12-15 21:39:04 -06:00
else :
try :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [info] Writing video annotations to: ' + annofn )
2013-12-15 21:39:04 -06:00
with io . open ( encodeFilename ( annofn ) , ' w ' , encoding = ' utf-8 ' ) as annofile :
annofile . write ( info_dict [ ' annotations ' ] )
except ( KeyError , TypeError ) :
2014-01-04 18:52:03 -06:00
self . report_warning ( ' There are no annotations to write. ' )
2013-12-15 21:39:04 -06:00
except ( OSError , IOError ) :
2014-01-04 18:52:03 -06:00
self . report_error ( ' Cannot write annotations file: ' + annofn )
2013-12-15 21:39:04 -06:00
return
2013-10-14 00:18:58 -05:00
2013-06-25 17:02:15 -05:00
subtitles_are_requested = any ( [ self . params . get ( ' writesubtitles ' , False ) ,
2013-09-14 04:14:40 -05:00
self . params . get ( ' writeautomaticsub ' ) ] )
2013-06-25 17:02:15 -05:00
2013-10-22 07:49:34 -05:00
if subtitles_are_requested and ' subtitles ' in info_dict and info_dict [ ' subtitles ' ] :
2013-06-18 15:14:21 -05:00
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict [ ' subtitles ' ]
2013-11-13 10:06:02 -06:00
sub_format = self . params . get ( ' subtitlesformat ' , ' srt ' )
2013-06-26 04:03:44 -05:00
for sub_lang in subtitles . keys ( ) :
sub = subtitles [ sub_lang ]
2013-07-20 05:59:47 -05:00
if sub is None :
continue
2013-06-18 15:14:21 -05:00
try :
2013-07-20 05:48:57 -05:00
sub_filename = subtitles_filename ( filename , sub_lang , sub_format )
2013-12-15 21:39:04 -06:00
if self . params . get ( ' nooverwrites ' , False ) and os . path . exists ( encodeFilename ( sub_filename ) ) :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [info] Video subtitle %s . %s is already_present ' % ( sub_lang , sub_format ) )
2013-12-15 21:39:04 -06:00
else :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [info] Writing video subtitles to: ' + sub_filename )
2013-12-15 21:39:04 -06:00
with io . open ( encodeFilename ( sub_filename ) , ' w ' , encoding = ' utf-8 ' ) as subfile :
2014-11-23 13:41:03 -06:00
subfile . write ( sub )
2013-06-18 15:14:21 -05:00
except ( OSError , IOError ) :
2014-04-08 09:55:55 -05:00
self . report_error ( ' Cannot write subtitles file ' + sub_filename )
2013-06-18 15:14:21 -05:00
return
if self . params . get ( ' writeinfojson ' , False ) :
2014-01-04 18:52:03 -06:00
infofn = os . path . splitext ( filename ) [ 0 ] + ' .info.json '
2013-12-15 21:39:04 -06:00
if self . params . get ( ' nooverwrites ' , False ) and os . path . exists ( encodeFilename ( infofn ) ) :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [info] Video description metadata is already present ' )
2013-12-15 21:39:04 -06:00
else :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [info] Writing video description metadata as JSON to: ' + infofn )
2013-12-15 21:39:04 -06:00
try :
2014-11-18 16:28:42 -06:00
write_json_file ( info_dict , infofn )
2013-12-15 21:39:04 -06:00
except ( OSError , IOError ) :
2014-01-04 18:52:03 -06:00
self . report_error ( ' Cannot write metadata to JSON file ' + infofn )
2013-12-15 21:39:04 -06:00
return
2013-06-18 15:14:21 -05:00
2015-01-24 20:11:12 -06:00
self . _write_thumbnails ( info_dict , filename )
2013-06-18 15:14:21 -05:00
if not self . params . get ( ' skip_download ' , False ) :
2014-09-25 11:37:20 -05:00
try :
def dl ( name , info ) :
2015-01-23 16:50:31 -06:00
fd = get_suitable_downloader ( info , self . params ) ( self , self . params )
2014-09-25 11:37:20 -05:00
for ph in self . _progress_hooks :
fd . add_progress_hook ( ph )
if self . params . get ( ' verbose ' ) :
self . to_stdout ( ' [debug] Invoking downloader on %r ' % info . get ( ' url ' ) )
return fd . download ( name , info )
2015-01-24 23:15:51 -06:00
2014-09-25 11:37:20 -05:00
if info_dict . get ( ' requested_formats ' ) is not None :
downloaded = [ ]
success = True
merger = FFmpegMergerPP ( self , not self . params . get ( ' keepvideo ' ) )
if not merger . _executable :
postprocessors = [ ]
self . report_warning ( ' You have requested multiple '
' formats but ffmpeg or avconv are not installed. '
' The formats won \' t be merged ' )
2014-01-04 06:13:51 -06:00
else :
2014-09-25 11:37:20 -05:00
postprocessors = [ merger ]
for f in info_dict [ ' requested_formats ' ] :
new_info = dict ( info_dict )
new_info . update ( f )
fname = self . prepare_filename ( new_info )
fname = prepend_extension ( fname , ' f %s ' % f [ ' format_id ' ] )
downloaded . append ( fname )
partial_success = dl ( fname , new_info )
success = success and partial_success
info_dict [ ' __postprocessors ' ] = postprocessors
info_dict [ ' __files_to_merge ' ] = downloaded
else :
# Just a single file
success = dl ( filename , info_dict )
except ( compat_urllib_error . URLError , compat_http_client . HTTPException , socket . error ) as err :
self . report_error ( ' unable to download video data: %s ' % str ( err ) )
return
except ( OSError , IOError ) as err :
raise UnavailableVideoError ( err )
except ( ContentTooShortError , ) as err :
self . report_error ( ' content too short (expected %s bytes and served %s ) ' % ( err . expected , err . downloaded ) )
return
2013-06-18 15:14:21 -05:00
if success :
2015-01-09 22:45:51 -06:00
# Fixup content
2015-01-23 11:39:12 -06:00
fixup_policy = self . params . get ( ' fixup ' )
if fixup_policy is None :
fixup_policy = ' detect_or_warn '
2015-01-09 22:45:51 -06:00
stretched_ratio = info_dict . get ( ' stretched_ratio ' )
if stretched_ratio is not None and stretched_ratio != 1 :
if fixup_policy == ' warn ' :
self . report_warning ( ' %s : Non-uniform pixel ratio ( %s ) ' % (
info_dict [ ' id ' ] , stretched_ratio ) )
elif fixup_policy == ' detect_or_warn ' :
stretched_pp = FFmpegFixupStretchedPP ( self )
if stretched_pp . available :
info_dict . setdefault ( ' __postprocessors ' , [ ] )
info_dict [ ' __postprocessors ' ] . append ( stretched_pp )
else :
self . report_warning (
' %s : Non-uniform pixel ratio ( %s ). Install ffmpeg or avconv to fix this automatically. ' % (
info_dict [ ' id ' ] , stretched_ratio ) )
else :
2015-01-23 11:39:12 -06:00
assert fixup_policy in ( ' ignore ' , ' never ' )
if info_dict . get ( ' requested_formats ' ) is None and info_dict . get ( ' container ' ) == ' m4a_dash ' :
if fixup_policy == ' warn ' :
self . report_warning ( ' %s : writing DASH m4a. Only some players support this container. ' % (
info_dict [ ' id ' ] ) )
elif fixup_policy == ' detect_or_warn ' :
fixup_pp = FFmpegFixupM4aPP ( self )
if fixup_pp . available :
info_dict . setdefault ( ' __postprocessors ' , [ ] )
info_dict [ ' __postprocessors ' ] . append ( fixup_pp )
else :
self . report_warning (
' %s : writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically. ' % (
info_dict [ ' id ' ] ) )
else :
assert fixup_policy in ( ' ignore ' , ' never ' )
2015-01-09 22:45:51 -06:00
2013-06-18 15:14:21 -05:00
try :
self . post_process ( filename , info_dict )
except ( PostProcessingError ) as err :
2014-01-04 18:52:03 -06:00
self . report_error ( ' postprocessing: %s ' % str ( err ) )
2013-06-18 15:14:21 -05:00
return
2014-12-17 06:21:22 -06:00
self . record_download_archive ( info_dict )
2013-06-18 15:14:21 -05:00
def download ( self , url_list ) :
""" Download a given list of URLs. """
2014-04-30 03:02:03 -05:00
outtmpl = self . params . get ( ' outtmpl ' , DEFAULT_OUTTMPL )
2013-11-25 15:15:20 -06:00
if ( len ( url_list ) > 1 and
2014-04-30 03:02:03 -05:00
' % ' not in outtmpl
2013-11-25 15:15:20 -06:00
and self . params . get ( ' max_downloads ' ) != 1 ) :
2014-04-30 03:02:03 -05:00
raise SameFileError ( outtmpl )
2013-06-18 15:14:21 -05:00
for url in url_list :
try :
2014-11-23 13:41:03 -06:00
# It also downloads the videos
2014-10-24 17:30:57 -05:00
res = self . extract_info ( url )
2013-06-18 15:14:21 -05:00
except UnavailableVideoError :
2014-01-04 18:52:03 -06:00
self . report_error ( ' unable to download video ' )
2013-06-18 15:14:21 -05:00
except MaxDownloadsReached :
2014-01-04 18:52:03 -06:00
self . to_screen ( ' [info] Maximum number of downloaded files reached. ' )
2013-06-18 15:14:21 -05:00
raise
2014-10-24 17:30:57 -05:00
else :
if self . params . get ( ' dump_single_json ' , False ) :
self . to_stdout ( json . dumps ( res ) )
2013-06-18 15:14:21 -05:00
return self . _download_retcode
2013-11-22 07:57:53 -06:00
def download_with_info_file ( self , info_filename ) :
2013-12-08 21:59:50 -06:00
with io . open ( info_filename , ' r ' , encoding = ' utf-8 ' ) as f :
2013-11-22 07:57:53 -06:00
info = json . load ( f )
2013-12-03 13:16:52 -06:00
try :
self . process_ie_result ( info , download = True )
except DownloadError :
webpage_url = info . get ( ' webpage_url ' )
if webpage_url is not None :
2014-01-04 18:52:03 -06:00
self . report_warning ( ' The info failed to download, trying with " %s " ' % webpage_url )
2013-12-03 13:16:52 -06:00
return self . download ( [ webpage_url ] )
else :
raise
return self . _download_retcode
2013-11-22 07:57:53 -06:00
2013-06-18 15:14:21 -05:00
def post_process ( self , filename , ie_info ) :
""" Run all the postprocessors on the given file. """
info = dict ( ie_info )
info [ ' filepath ' ] = filename
2014-01-04 06:13:51 -06:00
pps_chain = [ ]
if ie_info . get ( ' __postprocessors ' ) is not None :
pps_chain . extend ( ie_info [ ' __postprocessors ' ] )
pps_chain . extend ( self . _pps )
for pp in pps_chain :
2014-02-23 04:29:42 -06:00
keep_video = None
old_filename = info [ ' filepath ' ]
2013-06-18 15:14:21 -05:00
try :
2014-02-23 04:29:42 -06:00
keep_video_wish , info = pp . run ( info )
2013-06-18 15:14:21 -05:00
if keep_video_wish is not None :
if keep_video_wish :
keep_video = keep_video_wish
elif keep_video is None :
# No clear decision yet, let IE decide
keep_video = keep_video_wish
except PostProcessingError as e :
2013-07-31 14:20:46 -05:00
self . report_error ( e . msg )
2014-02-23 04:29:42 -06:00
if keep_video is False and not self . params . get ( ' keepvideo ' , False ) :
try :
self . to_screen ( ' Deleting original file %s (pass -k to keep) ' % old_filename )
os . remove ( encodeFilename ( old_filename ) )
except ( IOError , OSError ) :
self . report_warning ( ' Unable to remove downloaded video file ' )
2013-10-05 21:27:09 -05:00
2013-11-25 08:46:54 -06:00
def _make_archive_id ( self , info_dict ) :
# Future-proof against any change in case
# and backwards compatibility with prior versions
2013-11-25 15:57:15 -06:00
extractor = info_dict . get ( ' extractor_key ' )
2013-11-22 15:46:46 -06:00
if extractor is None :
if ' id ' in info_dict :
extractor = info_dict . get ( ' ie_key ' ) # key in a playlist
if extractor is None :
2013-11-25 08:46:54 -06:00
return None # Incomplete video information
2014-01-04 18:52:03 -06:00
return extractor . lower ( ) + ' ' + info_dict [ ' id ' ]
2013-11-25 08:46:54 -06:00
def in_download_archive ( self , info_dict ) :
fn = self . params . get ( ' download_archive ' )
if fn is None :
return False
vid_id = self . _make_archive_id ( info_dict )
if vid_id is None :
2013-11-22 15:46:46 -06:00
return False # Incomplete video information
2013-11-25 08:46:54 -06:00
2013-10-05 21:27:09 -05:00
try :
with locked_file ( fn , ' r ' , encoding = ' utf-8 ' ) as archive_file :
for line in archive_file :
if line . strip ( ) == vid_id :
return True
except IOError as ioe :
if ioe . errno != errno . ENOENT :
raise
return False
def record_download_archive ( self , info_dict ) :
fn = self . params . get ( ' download_archive ' )
if fn is None :
return
2013-11-25 08:46:54 -06:00
vid_id = self . _make_archive_id ( info_dict )
assert vid_id
2013-10-05 21:27:09 -05:00
with locked_file ( fn , ' a ' , encoding = ' utf-8 ' ) as archive_file :
2014-01-04 18:52:03 -06:00
archive_file . write ( vid_id + ' \n ' )
2013-07-02 03:08:58 -05:00
2013-10-21 07:09:38 -05:00
@staticmethod
2013-10-28 05:31:12 -05:00
def format_resolution ( format , default = ' unknown ' ) :
2013-11-25 15:34:56 -06:00
if format . get ( ' vcodec ' ) == ' none ' :
return ' audio only '
2013-12-24 04:56:02 -06:00
if format . get ( ' resolution ' ) is not None :
return format [ ' resolution ' ]
2013-10-21 07:09:38 -05:00
if format . get ( ' height ' ) is not None :
if format . get ( ' width ' ) is not None :
2014-01-04 18:52:03 -06:00
res = ' %s x %s ' % ( format [ ' width ' ] , format [ ' height ' ] )
2013-10-21 07:09:38 -05:00
else :
2014-01-04 18:52:03 -06:00
res = ' %s p ' % format [ ' height ' ]
2013-12-24 04:56:02 -06:00
elif format . get ( ' width ' ) is not None :
2014-01-04 18:52:03 -06:00
res = ' ?x %d ' % format [ ' width ' ]
2013-10-21 07:09:38 -05:00
else :
2013-10-28 05:31:12 -05:00
res = default
2013-10-21 07:09:38 -05:00
return res
2014-04-29 19:02:41 -05:00
def _format_note ( self , fdict ) :
res = ' '
if fdict . get ( ' ext ' ) in [ ' f4f ' , ' f4m ' ] :
res + = ' (unsupported) '
if fdict . get ( ' format_note ' ) is not None :
res + = fdict [ ' format_note ' ] + ' '
if fdict . get ( ' tbr ' ) is not None :
res + = ' %4d k ' % fdict [ ' tbr ' ]
if fdict . get ( ' container ' ) is not None :
if res :
res + = ' , '
res + = ' %s container ' % fdict [ ' container ' ]
if ( fdict . get ( ' vcodec ' ) is not None and
fdict . get ( ' vcodec ' ) != ' none ' ) :
if res :
res + = ' , '
res + = fdict [ ' vcodec ' ]
2013-11-15 18:08:43 -06:00
if fdict . get ( ' vbr ' ) is not None :
2014-04-29 19:02:41 -05:00
res + = ' @ '
elif fdict . get ( ' vbr ' ) is not None and fdict . get ( ' abr ' ) is not None :
res + = ' video@ '
if fdict . get ( ' vbr ' ) is not None :
res + = ' %4d k ' % fdict [ ' vbr ' ]
2014-10-30 03:34:13 -05:00
if fdict . get ( ' fps ' ) is not None :
res + = ' , %s fps ' % fdict [ ' fps ' ]
2014-04-29 19:02:41 -05:00
if fdict . get ( ' acodec ' ) is not None :
if res :
res + = ' , '
if fdict [ ' acodec ' ] == ' none ' :
res + = ' video only '
else :
res + = ' %-5s ' % fdict [ ' acodec ' ]
elif fdict . get ( ' abr ' ) is not None :
if res :
res + = ' , '
res + = ' audio '
if fdict . get ( ' abr ' ) is not None :
res + = ' @ %3d k ' % fdict [ ' abr ' ]
if fdict . get ( ' asr ' ) is not None :
res + = ' ( %5d Hz) ' % fdict [ ' asr ' ]
if fdict . get ( ' filesize ' ) is not None :
if res :
res + = ' , '
res + = format_bytes ( fdict [ ' filesize ' ] )
2014-07-21 05:02:44 -05:00
elif fdict . get ( ' filesize_approx ' ) is not None :
if res :
res + = ' , '
res + = ' ~ ' + format_bytes ( fdict [ ' filesize_approx ' ] )
2014-04-29 19:02:41 -05:00
return res
2013-11-15 18:08:43 -06:00
2014-04-29 19:02:41 -05:00
def list_formats ( self , info_dict ) :
2013-11-24 20:12:26 -06:00
def line ( format , idlen = 20 ) :
2014-01-04 18:52:03 -06:00
return ( ( ' % - ' + compat_str ( idlen + 1 ) + ' s %-10s %-12s %s ' ) % (
2013-10-21 07:09:38 -05:00
format [ ' format_id ' ] ,
format [ ' ext ' ] ,
self . format_resolution ( format ) ,
2014-04-29 19:02:41 -05:00
self . _format_note ( format ) ,
2013-11-24 20:12:26 -06:00
) )
2013-10-29 09:09:45 -05:00
2013-10-29 19:09:26 -05:00
formats = info_dict . get ( ' formats ' , [ info_dict ] )
2014-01-04 18:52:03 -06:00
idlen = max ( len ( ' format code ' ) ,
2013-11-24 20:12:26 -06:00
max ( len ( f [ ' format_id ' ] ) for f in formats ) )
2015-01-03 11:33:38 -06:00
formats_s = [
line ( f , idlen ) for f in formats
if f . get ( ' preference ' ) is None or f [ ' preference ' ] > = - 1000 ]
2013-10-29 19:09:26 -05:00
if len ( formats ) > 1 :
2014-04-29 19:02:41 -05:00
formats_s [ - 1 ] + = ( ' ' if self . _format_note ( formats [ - 1 ] ) else ' ' ) + ' (best) '
2013-10-29 09:09:45 -05:00
header_line = line ( {
2014-01-04 18:52:03 -06:00
' format_id ' : ' format code ' , ' ext ' : ' extension ' ,
' resolution ' : ' resolution ' , ' format_note ' : ' note ' } , idlen = idlen )
2015-01-24 19:38:47 -06:00
self . to_screen (
' [info] Available formats for %s : \n %s \n %s ' %
( info_dict [ ' id ' ] , header_line , ' \n ' . join ( formats_s ) ) )
def list_thumbnails ( self , info_dict ) :
thumbnails = info_dict . get ( ' thumbnails ' )
if not thumbnails :
tn_url = info_dict . get ( ' thumbnail ' )
if tn_url :
thumbnails = [ { ' id ' : ' 0 ' , ' url ' : tn_url } ]
else :
self . to_screen (
' [info] No thumbnails present for %s ' % info_dict [ ' id ' ] )
return
self . to_screen (
' [info] Thumbnails for %s : ' % info_dict [ ' id ' ] )
self . to_screen ( render_table (
[ ' ID ' , ' width ' , ' height ' , ' URL ' ] ,
[ [ t [ ' id ' ] , t . get ( ' width ' , ' unknown ' ) , t . get ( ' height ' , ' unknown ' ) , t [ ' url ' ] ] for t in thumbnails ] ) )
2013-11-22 12:57:52 -06:00
def urlopen ( self , req ) :
""" Start an HTTP download """
2014-09-12 11:20:17 -05:00
2014-09-13 08:59:16 -05:00
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
# always respected by websites, some tend to give out URLs with non percent-encoded
# non-ASCII characters (see telemb.py, ard.py [#3412])
2014-09-12 11:20:17 -05:00
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2014-09-13 08:59:16 -05:00
# To work around aforementioned issue we will replace request's original URL with
# percent-encoded one
2015-02-01 04:30:56 -06:00
req_is_string = isinstance ( req , compat_basestring )
2014-09-27 14:07:42 -05:00
url = req if req_is_string else req . get_full_url ( )
2014-09-13 08:59:16 -05:00
url_escaped = escape_url ( url )
2014-09-12 11:20:17 -05:00
# Substitute URL if any change after escaping
if url != url_escaped :
2014-09-27 14:07:42 -05:00
if req_is_string :
2014-09-12 11:20:17 -05:00
req = url_escaped
else :
req = compat_urllib_request . Request (
url_escaped , data = req . data , headers = req . headers ,
origin_req_host = req . origin_req_host , unverifiable = req . unverifiable )
2014-03-10 13:01:29 -05:00
return self . _opener . open ( req , timeout = self . _socket_timeout )
2013-11-22 12:57:52 -06:00
def print_debug_header ( self ) :
if not self . params . get ( ' verbose ' ) :
return
2014-03-29 23:02:41 -05:00
2014-07-24 06:29:44 -05:00
if type ( ' ' ) is not compat_str :
# Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
self . report_warning (
' Your Python is broken! Update to a newer and supported version ' )
2014-11-12 08:30:26 -06:00
stdout_encoding = getattr (
sys . stdout , ' encoding ' , ' missing ( %s ) ' % type ( sys . stdout ) . __name__ )
2014-07-22 19:24:50 -05:00
encoding_str = (
2014-04-07 12:57:42 -05:00
' [debug] Encodings: locale %s , fs %s , out %s , pref %s \n ' % (
locale . getpreferredencoding ( ) ,
sys . getfilesystemencoding ( ) ,
2014-11-12 08:30:26 -06:00
stdout_encoding ,
2014-07-22 19:24:50 -05:00
self . get_encoding ( ) ) )
2014-07-24 06:29:44 -05:00
write_string ( encoding_str , encoding = None )
2014-04-07 12:57:42 -05:00
self . _write_string ( ' [debug] youtube-dl version ' + __version__ + ' \n ' )
2013-11-22 12:57:52 -06:00
try :
sp = subprocess . Popen (
[ ' git ' , ' rev-parse ' , ' --short ' , ' HEAD ' ] ,
stdout = subprocess . PIPE , stderr = subprocess . PIPE ,
cwd = os . path . dirname ( os . path . abspath ( __file__ ) ) )
out , err = sp . communicate ( )
out = out . decode ( ) . strip ( )
if re . match ( ' [0-9a-f]+ ' , out ) :
2014-04-07 12:57:42 -05:00
self . _write_string ( ' [debug] Git HEAD: ' + out + ' \n ' )
2013-11-22 12:57:52 -06:00
except :
try :
sys . exc_clear ( )
except :
pass
2014-10-26 10:31:52 -05:00
self . _write_string ( ' [debug] Python version %s - %s \n ' % (
platform . python_version ( ) , platform_name ( ) ) )
exe_versions = FFmpegPostProcessor . get_versions ( )
2014-11-02 03:55:36 -06:00
exe_versions [ ' rtmpdump ' ] = rtmpdump_version ( )
2014-10-26 10:31:52 -05:00
exe_str = ' , ' . join (
' %s %s ' % ( exe , v )
for exe , v in sorted ( exe_versions . items ( ) )
if v
)
if not exe_str :
exe_str = ' none '
self . _write_string ( ' [debug] exe versions: %s \n ' % exe_str )
2013-11-22 12:57:52 -06:00
proxy_map = { }
for handler in self . _opener . handlers :
if hasattr ( handler , ' proxies ' ) :
proxy_map . update ( handler . proxies )
2014-04-07 12:57:42 -05:00
self . _write_string ( ' [debug] Proxy map: ' + compat_str ( proxy_map ) + ' \n ' )
2013-11-22 12:57:52 -06:00
2015-01-10 14:02:27 -06:00
if self . params . get ( ' call_home ' , False ) :
ipaddr = self . urlopen ( ' https://yt-dl.org/ip ' ) . read ( ) . decode ( ' utf-8 ' )
self . _write_string ( ' [debug] Public IP address: %s \n ' % ipaddr )
latest_version = self . urlopen (
' https://yt-dl.org/latest/version ' ) . read ( ) . decode ( ' utf-8 ' )
if version_tuple ( latest_version ) > version_tuple ( __version__ ) :
self . report_warning (
' You are using an outdated version (newest version: %s )! '
' See https://yt-dl.org/update if you need help updating. ' %
latest_version )
2013-12-01 04:42:02 -06:00
def _setup_opener ( self ) :
2013-12-02 06:37:05 -06:00
timeout_val = self . params . get ( ' socket_timeout ' )
2014-03-10 13:01:29 -05:00
self . _socket_timeout = 600 if timeout_val is None else float ( timeout_val )
2013-12-02 06:37:05 -06:00
2013-11-22 12:57:52 -06:00
opts_cookiefile = self . params . get ( ' cookiefile ' )
opts_proxy = self . params . get ( ' proxy ' )
if opts_cookiefile is None :
self . cookiejar = compat_cookiejar . CookieJar ( )
else :
self . cookiejar = compat_cookiejar . MozillaCookieJar (
opts_cookiefile )
if os . access ( opts_cookiefile , os . R_OK ) :
self . cookiejar . load ( )
cookie_processor = compat_urllib_request . HTTPCookieProcessor (
self . cookiejar )
if opts_proxy is not None :
if opts_proxy == ' ' :
proxies = { }
else :
proxies = { ' http ' : opts_proxy , ' https ' : opts_proxy }
else :
proxies = compat_urllib_request . getproxies ( )
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if ' http ' in proxies and ' https ' not in proxies :
proxies [ ' https ' ] = proxies [ ' http ' ]
proxy_handler = compat_urllib_request . ProxyHandler ( proxies )
2013-12-29 08:28:32 -06:00
debuglevel = 1 if self . params . get ( ' debug_printtraffic ' ) else 0
2015-01-10 12:55:36 -06:00
https_handler = make_HTTPS_handler ( self . params , debuglevel = debuglevel )
ydlh = YoutubeDLHandler ( self . params , debuglevel = debuglevel )
2013-11-22 12:57:52 -06:00
opener = compat_urllib_request . build_opener (
2013-12-29 08:28:32 -06:00
https_handler , proxy_handler , cookie_processor , ydlh )
2013-11-22 12:57:52 -06:00
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener . addheaders = [ ]
self . _opener = opener
2014-03-29 23:02:41 -05:00
def encode ( self , s ) :
if isinstance ( s , bytes ) :
return s # Already encoded
try :
return s . encode ( self . get_encoding ( ) )
except UnicodeEncodeError as err :
err . reason = err . reason + ' . Check your system encoding configuration or use the --encoding option. '
raise
def get_encoding ( self ) :
encoding = self . params . get ( ' encoding ' )
if encoding is None :
encoding = preferredencoding ( )
return encoding
2015-01-24 20:11:12 -06:00
def _write_thumbnails ( self , info_dict , filename ) :
if self . params . get ( ' writethumbnail ' , False ) :
thumbnails = info_dict . get ( ' thumbnails ' )
if thumbnails :
thumbnails = [ thumbnails [ - 1 ] ]
elif self . params . get ( ' write_all_thumbnails ' , False ) :
thumbnails = info_dict . get ( ' thumbnails ' )
else :
return
if not thumbnails :
# No thumbnails present, so return immediately
return
for t in thumbnails :
thumb_ext = determine_ext ( t [ ' url ' ] , ' jpg ' )
suffix = ' _ %s ' % t [ ' id ' ] if len ( thumbnails ) > 1 else ' '
thumb_display_id = ' %s ' % t [ ' id ' ] if len ( thumbnails ) > 1 else ' '
thumb_filename = os . path . splitext ( filename ) [ 0 ] + suffix + ' . ' + thumb_ext
if self . params . get ( ' nooverwrites ' , False ) and os . path . exists ( encodeFilename ( thumb_filename ) ) :
self . to_screen ( ' [ %s ] %s : Thumbnail %s is already present ' %
( info_dict [ ' extractor ' ] , info_dict [ ' id ' ] , thumb_display_id ) )
else :
self . to_screen ( ' [ %s ] %s : Downloading thumbnail %s ... ' %
( info_dict [ ' extractor ' ] , info_dict [ ' id ' ] , thumb_display_id ) )
try :
uf = self . urlopen ( t [ ' url ' ] )
with open ( thumb_filename , ' wb ' ) as thumbf :
shutil . copyfileobj ( uf , thumbf )
self . to_screen ( ' [ %s ] %s : Writing thumbnail %s to: %s ' %
( info_dict [ ' extractor ' ] , info_dict [ ' id ' ] , thumb_display_id , thumb_filename ) )
except ( compat_urllib_error . URLError , compat_http_client . HTTPException , socket . error ) as err :
self . report_warning ( ' Unable to download thumbnail " %s " : %s ' %
( t [ ' url ' ] , compat_str ( err ) ) )