2013-07-07 18:13:55 -05:00
# coding: utf-8
import re
from . common import InfoExtractor
from . . utils import (
unified_strdate ,
)
class DreiSatIE ( InfoExtractor ) :
IE_NAME = ' 3sat '
2014-01-03 05:02:08 -06:00
_VALID_URL = r ' (?:http://)?(?:www \ .)?3sat \ .de/mediathek/(?:index \ .php)? \ ?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$ '
2013-07-07 18:13:55 -05:00
_TEST = {
u " url " : u " http://www.3sat.de/mediathek/index.php?obj=36983 " ,
2014-01-01 06:30:58 -06:00
u ' file ' : u ' 36983.mp4 ' ,
u ' md5 ' : u ' 9dcfe344732808dbfcc901537973c922 ' ,
2013-07-07 18:13:55 -05:00
u ' info_dict ' : {
u " title " : u " Kaffeeland Schweiz " ,
u " description " : u " Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die... " ,
u " uploader " : u " 3sat " ,
u " upload_date " : u " 20130622 "
}
}
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
details_url = ' http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id= %s ' % video_id
2013-11-26 11:48:52 -06:00
details_doc = self . _download_xml ( details_url , video_id , note = u ' Downloading video details ' )
2013-07-07 18:13:55 -05:00
thumbnail_els = details_doc . findall ( ' .//teaserimage ' )
thumbnails = [ {
2014-06-08 10:41:24 -05:00
' width ' : int ( te . attrib [ ' key ' ] . partition ( ' x ' ) [ 0 ] ) ,
' height ' : int ( te . attrib [ ' key ' ] . partition ( ' x ' ) [ 2 ] ) ,
2013-07-07 18:13:55 -05:00
' url ' : te . text ,
} for te in thumbnail_els ]
information_el = details_doc . find ( ' .//information ' )
video_title = information_el . find ( ' ./title ' ) . text
video_description = information_el . find ( ' ./detail ' ) . text
details_el = details_doc . find ( ' .//details ' )
video_uploader = details_el . find ( ' ./channel ' ) . text
upload_date = unified_strdate ( details_el . find ( ' ./airtime ' ) . text )
format_els = details_doc . findall ( ' .//formitaet ' )
formats = [ {
' format_id ' : fe . attrib [ ' basetype ' ] ,
' width ' : int ( fe . find ( ' ./width ' ) . text ) ,
' height ' : int ( fe . find ( ' ./height ' ) . text ) ,
' url ' : fe . find ( ' ./url ' ) . text ,
' filesize ' : int ( fe . find ( ' ./filesize ' ) . text ) ,
' video_bitrate ' : int ( fe . find ( ' ./videoBitrate ' ) . text ) ,
} for fe in format_els
if not fe . find ( ' ./url ' ) . text . startswith ( ' http://www.metafilegenerator.de/ ' ) ]
2013-12-24 05:35:02 -06:00
self . _sort_formats ( formats )
2013-07-07 18:13:55 -05:00
2013-12-03 07:21:06 -06:00
return {
2013-07-11 05:12:23 -05:00
' _type ' : ' video ' ,
2013-07-07 18:13:55 -05:00
' id ' : video_id ,
' title ' : video_title ,
' formats ' : formats ,
' description ' : video_description ,
' thumbnails ' : thumbnails ,
' thumbnail ' : thumbnails [ - 1 ] [ ' url ' ] ,
' uploader ' : video_uploader ,
' upload_date ' : upload_date ,
}