2021-01-30 23:33:24 -06:00
# coding: utf-8
from __future__ import unicode_literals
2021-02-13 18:26:33 -06:00
from __future__ import print_function #XXX
import pprint #XXX
2021-01-30 23:33:24 -06:00
import re
from . common import InfoExtractor
class Pac12IE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:[a-z]+ \ .)?pac-12.com/(?:embed/)?(?P<id>.*) '
2021-01-30 23:42:31 -06:00
_TESTS = [ {
2021-01-30 23:33:24 -06:00
' url ' : ' https://pac-12.com/videos/2020-pac-12-womens-basketball-media-day-arizona-cal-stanford ' ,
' md5 ' : ' b2e3c0cb99458c8b8e2dc22cb5ac922d ' ,
' info_dict ' : {
' id ' : ' vod-VGQNKGlo9Go ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 2020 Pac-12 Women \' s Basketball Media Day - Arizona, Cal & Stanford | Pac-12 ' ,
' description ' : ' During the 2020 Pac-12 Women \' s Basketball Media Day, Ros Gold-Onwude moderates a discussion with Arizona \' s Adia Barnes & Aari McDonald, Cal \' s Charmin Smith & Evelien Lutje Schipholt & Stanford \' s Tara VanDerveer & Kiana Williams. ' ,
}
2021-01-30 23:42:31 -06:00
} , {
' url ' : ' https://pac-12.com/article/2020/11/24/sonoran-dog-dish-presented-tums ' ,
' md5 ' : ' a7a8ac72273b9468924bc058cc220d37 ' ,
' info_dict ' : {
' id ' : ' vod-YLMKpNLZvR0 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Sonoran Dog | The Dish, presented by TUMS | Pac-12 ' ,
' description ' : ' Pac-12 Networks introduces " The Dish, " presented by Tums. Jaymee Sire is bringing fans a closeup to game day treats from around the Conference with each treat connecting to a Pac-12 school, bringing the flavor and recipes fans know and love right to the dish! As Arizona and USC basketball seasons tip off, the first feature item from " The Dish " is the Sonoran Dog, a beloved treat by Trojans & Wildcat fans. ' ,
}
} ]
2021-01-30 23:33:24 -06:00
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
2021-02-13 18:26:33 -06:00
drupal_settings = self . _parse_json (
self . _search_regex (
r ' <script[^>]+type= " application/json " [^>]*data-drupal-selector= " drupal-settings-json " >([^<]+)</script> ' ,
webpage , ' drupal settings ' ) , video_id )
pprint . pprint ( drupal_settings . get ( ' currentVideo ' ) )
video_url = drupal_settings . get ( ' currentVideo ' , { } ) . get ( ' manifest_url ' )
2021-01-30 23:33:24 -06:00
vod_url = None
if ( video_url is None ) or ( ' vod- ' not in url ) :
vod_url = self . _search_regex ( r ' (https?://(?:embed \ .)?pac-12 \ .com/(?:embed/)?vod-[0-9a-zA-Z]+) ' ,
webpage , ' url ' , default = None )
if video_url is None :
if vod_url is None :
return None
return self . url_result ( vod_url )
video_url = re . sub ( r ' \\ ' , ' ' , video_url )
if ' vod- ' not in url and vod_url is not None :
video_id = self . _match_id ( vod_url )
2021-01-31 00:50:18 -06:00
title = self . _html_search_regex ( r ' <title>(.+?)</title> ' ,
webpage , ' title ' )
2021-02-13 14:27:22 -06:00
description = self . _og_search_description ( webpage , default = None )
2021-02-13 14:44:54 -06:00
if description is None :
2021-02-13 14:27:22 -06:00
d = self . _search_regex ( r ' " description " : " (?P<description>[^ " ]+) " ' ,
webpage , ' description ' , default = None )
2021-02-13 14:44:54 -06:00
if d is not None :
2021-02-13 14:27:22 -06:00
description = d . encode ( ' utf-8 ' ) . decode ( ' unicode_escape ' )
2021-01-30 23:33:24 -06:00
return {
' id ' : video_id ,
' title ' : title ,
2021-01-31 00:50:18 -06:00
' description ' : description ,
2021-01-30 23:33:24 -06:00
' url ' : video_url ,
' ext ' : ' mp4 ' ,
}