youtube-dl/youtube_dl/extractor/francetv.py

# coding: utf-8

from __future__ import unicode_literals

import re
import json

from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
    clean_html,
    ExtractorError,
    int_or_none,
    parse_duration,
    determine_ext,
)
from .dailymotion import (
    DailymotionIE,
    DailymotionCloudIE,
)


class FranceTVBaseInfoExtractor(InfoExtractor):
    def _extract_video(self, video_id, catalogue):
        info = self._download_json(
            'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s'
            % (video_id, catalogue),
            video_id, 'Downloading video JSON')

        if info.get('status') == 'NOK':
            raise ExtractorError(
                '%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
        allowed_countries = info['videos'][0].get('geoblocage')
        if allowed_countries:
            georestricted = True
            geo_info = self._download_json(
                'http://geo.francetv.fr/ws/edgescape.json', video_id,
                'Downloading geo restriction info')
            country = geo_info['reponse']['geo_info']['country_code']
            if country not in allowed_countries:
                raise ExtractorError(
                    'The video is not available from your location',
                    expected=True)
        else:
            georestricted = False

        formats = []
        for video in info['videos']:
            if video['statut'] != 'ONLINE':
                continue
            video_url = video['url']
            if not video_url:
                continue
            format_id = video['format']
            ext = determine_ext(video_url)
            if ext == 'f4m':
                if georestricted:
                    # See https://github.com/rg3/youtube-dl/issues/3963
                    # m3u8 urls work fine
                    continue
                f4m_url = self._download_webpage(
                    'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
                    video_id, 'Downloading f4m manifest token', fatal=False)
                if f4m_url:
                    formats.extend(self._extract_f4m_formats(
                        f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
                        video_id, f4m_id=format_id, fatal=False))
            elif ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id=format_id, fatal=False))
            elif video_url.startswith('rtmp'):
                formats.append({
                    'url': video_url,
                    'format_id': 'rtmp-%s' % format_id,
                    'ext': 'flv',
                })
            else:
                if self._is_valid_url(video_url, video_id, format_id):
                    formats.append({
                        'url': video_url,
                        'format_id': format_id,
                    })
        self._sort_formats(formats)

        title = info['titre']
        subtitle = info.get('sous_titre')
        if subtitle:
            title += ' - %s' % subtitle
        title = title.strip()

        subtitles = {}
        subtitles_list = [{
            'url': subformat['url'],
            'ext': subformat.get('format'),
        } for subformat in info.get('subtitles', []) if subformat.get('url')]
        if subtitles_list:
            subtitles['fr'] = subtitles_list

        return {
            'id': video_id,
            'title': title,
            'description': clean_html(info['synopsis']),
            'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
            'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
            'timestamp': int_or_none(info['diffusion']['timestamp']),
            'formats': formats,
            'subtitles': subtitles,
        }


class PluzzIE(FranceTVBaseInfoExtractor):
    IE_NAME = 'pluzz.francetv.fr'
    _VALID_URL = r'https?://(?:m\.)?pluzz\.francetv\.fr/videos/(?P<id>.+?)\.html'

    # Can't use tests, videos expire in 7 days

    def _real_extract(self, url):
        display_id = self._match_id(url)

        webpage = self._download_webpage(url, display_id)

        video_id = self._html_search_meta(
            'id_video', webpage, 'video id', default=None)
        if not video_id:
            video_id = self._search_regex(
                r'data-diffusion=["\'](\d+)', webpage, 'video id')

        return self._extract_video(video_id, 'Pluzz')


class FranceTvInfoIE(FranceTVBaseInfoExtractor):
    IE_NAME = 'francetvinfo.fr'
    _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'

    _TESTS = [{
        'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
        'info_dict': {
            'id': '84981923',
            'ext': 'mp4',
            'title': 'Soir 3',
            'upload_date': '20130826',
            'timestamp': 1377548400,
            'subtitles': {
                'fr': 'mincount:2',
            },
        },
        'params': {
            # m3u8 downloads
            'skip_download': True,
        },
    }, {
        'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
        'info_dict': {
            'id': 'EV_20019',
            'ext': 'mp4',
            'title': 'Débat des candidats à la Commission européenne',
            'description': 'Débat des candidats à la Commission européenne',
        },
        'params': {
            'skip_download': 'HLS (reqires ffmpeg)'
        },
        'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
    }, {
        'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
        'md5': 'f485bda6e185e7d15dbc69b72bae993e',
        'info_dict': {
            'id': 'NI_173343',
            'ext': 'mp4',
            'title': 'Les entreprises familiales : le secret de la réussite',
            'thumbnail': r're:^https?://.*\.jpe?g$',
            'timestamp': 1433273139,
            'upload_date': '20150602',
        },
        'params': {
            # m3u8 downloads
            'skip_download': True,
        },
    }, {
        'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
        'md5': 'f485bda6e185e7d15dbc69b72bae993e',
        'info_dict': {
            'id': 'NI_657393',
            'ext': 'mp4',
            'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"',
            'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
            'thumbnail': r're:^https?://.*\.jpe?g$',
            'timestamp': 1458300695,
            'upload_date': '20160318',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # Dailymotion embed
        'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
        'md5': 'ee7f1828f25a648addc90cb2687b1f12',
        'info_dict': {
            'id': 'x4iiko0',
            'ext': 'mp4',
            'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
            'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016',
            'timestamp': 1467011958,
            'upload_date': '20160627',
            'uploader': 'France Inter',
            'uploader_id': 'x2q2ez',
        },
        'add_ie': ['Dailymotion'],
    }, {
        'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        page_title = mobj.group('title')
        webpage = self._download_webpage(url, page_title)

        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
        if dmcloud_url:
            return self.url_result(dmcloud_url, DailymotionCloudIE.ie_key())

        dailymotion_urls = DailymotionIE._extract_urls(webpage)
        if dailymotion_urls:
            return self.playlist_result([
                self.url_result(dailymotion_url, DailymotionIE.ie_key())
                for dailymotion_url in dailymotion_urls])

        video_id, catalogue = self._search_regex(
            (r'id-video=([^@]+@[^"]+)',
             r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
            webpage, 'video id').split('@')
        return self._extract_video(video_id, catalogue)


class FranceTVIE(FranceTVBaseInfoExtractor):
    IE_NAME = 'francetv'
    IE_DESC = 'France 2, 3, 4, 5 and Ô'
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
                            (?:www\.)?france[2345o]\.fr/
                                (?:
                                    emissions/[^/]+/(?:videos|diffusions)|
                                    emission/[^/]+|
                                    videos|
                                    jt
                                )
                            /|
                            embed\.francetv\.fr/\?ue=
                        )
                        (?P<id>[^/?]+)
                    '''

    _TESTS = [
        # france2
        {
            'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
            'md5': 'c03fc87cb85429ffd55df32b9fc05523',
            'info_dict': {
                'id': '109169362',
                'ext': 'flv',
                'title': '13h15, le dimanche...',
                'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7',
                'upload_date': '20140914',
                'timestamp': 1410693600,
            },
        },
        # france3
        {
            'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
            'md5': '679bb8f8921f8623bd658fa2f8364da0',
            'info_dict': {
                'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
                'ext': 'mp4',
                'title': 'Le scandale du prix des médicaments',
                'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
                'upload_date': '20131113',
                'timestamp': 1384380000,
            },
        },
        # france4
        {
            'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
            'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c',
            'info_dict': {
                'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
                'ext': 'mp4',
                'title': 'Hero Corp Making of - Extrait 1',
                'description': 'md5:c87d54871b1790679aec1197e73d650a',
                'upload_date': '20131106',
                'timestamp': 1383766500,
            },
        },
        # france5
        {
            'url': 'http://www.france5.fr/emissions/c-a-dire/videos/quels_sont_les_enjeux_de_cette_rentree_politique__31-08-2015_908948?onglet=tous&page=1',
            'md5': 'f6c577df3806e26471b3d21631241fd0',
            'info_dict': {
                'id': '123327454',
                'ext': 'flv',
                'title': 'C à dire ?! - Quels sont les enjeux de cette rentrée politique ?',
                'description': 'md5:4a0d5cb5dce89d353522a84462bae5a4',
                'upload_date': '20150831',
                'timestamp': 1441035120,
            },
        },
        # franceo
        {
            'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015',
            'md5': '47d5816d3b24351cdce512ad7ab31da8',
            'info_dict': {
                'id': '125377621',
                'ext': 'flv',
                'title': 'Infô soir',
                'description': 'md5:01b8c6915a3d93d8bbbd692651714309',
                'upload_date': '20150718',
                'timestamp': 1437241200,
                'duration': 414,
            },
        },
        {
            # francetv embed
            'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87',
            'info_dict': {
                'id': 'EV_30231',
                'ext': 'flv',
                'title': 'Alcaline, le concert avec Calogero',
                'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
                'upload_date': '20150226',
                'timestamp': 1424989860,
                'duration': 5400,
            },
        },
        {
            'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05',
            'only_matching': True,
        },
        {
            'url': 'http://www.franceo.fr/videos/125377617',
            'only_matching': True,
        }
    ]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        video_id, catalogue = self._html_search_regex(
            r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
            webpage, 'video ID').split('@')
        return self._extract_video(video_id, catalogue)


class GenerationQuoiIE(InfoExtractor):
    IE_NAME = 'france2.fr:generation-quoi'
    _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'

    _TEST = {
        'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
        'info_dict': {
            'id': 'k7FJX8VBcvvLmX4wA5Q',
            'ext': 'mp4',
            'title': 'Génération Quoi - Garde à Vous',
            'uploader': 'Génération Quoi',
        },
        'params': {
            # It uses Dailymotion
            'skip_download': True,
        },
    }

    def _real_extract(self, url):
        display_id = self._match_id(url)
        info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id)
        info_json = self._download_webpage(info_url, display_id)
        info = json.loads(info_json)
        return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
                               ie='Dailymotion')


class CultureboxIE(FranceTVBaseInfoExtractor):
    IE_NAME = 'culturebox.francetvinfo.fr'
    _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'

    _TEST = {
        'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
        'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
        'info_dict': {
            'id': 'EV_50111',
            'ext': 'flv',
            'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
            'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
            'upload_date': '20150320',
            'timestamp': 1426892400,
            'duration': 2760.9,
        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')

        webpage = self._download_webpage(url, name)

        if ">Ce live n'est plus disponible en replay<" in webpage:
            raise ExtractorError('Video %s is not available' % name, expected=True)

        video_id, catalogue = self._search_regex(
            r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')

        return self._extract_video(video_id, catalogue)
-												Unify coding cookie

											
										
										
											2016-10-02 06:39:18 -05:00
+								# coding: utf-8
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
 								from __future__ import unicode_literals
-												Add an extractor for pluzz.francetv.fr (closes PR #1399)

											
										
										
											2013-09-08 14:55:11 -05:00
+								import re
-												[francetv] Add an extractor for Generation Quoi (closes #1475)

											
										
										
											2013-09-23 14:41:54 -05:00
+								import json
-												Add an extractor for pluzz.francetv.fr (closes PR #1399)

											
										
										
											2013-09-08 14:55:11 -05:00
 								from .common import InfoExtractor
-												[francetv] Remove unused import

											
										
										
											2015-07-19 10:54:12 -05:00
+								from ..compat import compat_urlparse
-												Fix imports and general cleanup

· Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail.
· Use _match_id consistently whenever possible
· Fix some outdated tests
· Use consistent valid URL (always match the whole protocol, no ^ at start required)
· Use modern test definitions

											
										
										
											2014-12-13 05:24:42 -06:00
+								from ..utils import (
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								    clean_html,
-												Fix imports and general cleanup

· Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail.
· Use _match_id consistently whenever possible
· Fix some outdated tests
· Use consistent valid URL (always match the whole protocol, no ^ at start required)
· Use modern test definitions

											
										
										
											2014-12-13 05:24:42 -06:00
+								    ExtractorError,
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								    int_or_none,
-												Fix imports and general cleanup

· Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail.
· Use _match_id consistently whenever possible
· Fix some outdated tests
· Use consistent valid URL (always match the whole protocol, no ^ at start required)
· Use modern test definitions

											
										
										
											2014-12-13 05:24:42 -06:00
+								    parse_duration,
-												[francetv] Improve formats extraction

											
										
										
											2015-04-04 12:02:04 -05:00
+								    determine_ext,
-												Add an extractor for pluzz.francetv.fr (closes PR #1399)

											
										
										
											2013-09-08 14:55:11 -05:00
+								)
-												[francetv] Recognize more Dailymotion embedded videos

Closes #9955

											
										
										
											2016-07-06 10:37:54 -05:00
+								from .dailymotion import (
 								    DailymotionIE,
 								    DailymotionCloudIE,
 								)
-												Add an extractor for pluzz.francetv.fr (closes PR #1399)

											
										
										
											2013-09-08 14:55:11 -05:00
-												[francetv] Add an extractor for francetvinfo.fr (closes #1317)

It uses the same system as Pluzz, create a base class for both extractors.

											
										
										
											2013-09-10 08:50:34 -05:00
+								class FranceTVBaseInfoExtractor(InfoExtractor):
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								    def _extract_video(self, video_id, catalogue):
 								        info = self._download_json(
 								            'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s'
 								            % (video_id, catalogue),
 								            video_id, 'Downloading video JSON')
 								        if info.get('status') == 'NOK':
 								            raise ExtractorError(
 								                '%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
-												[francetv] Use the m3u8 manifest for georestricted videos (closes #3963)

Generating the correct urls for the f4m segments seems to require a lot of work.
Also raise an error if the video is not available from your location.

											
										
										
											2014-11-24 12:37:20 -06:00
+								        allowed_countries = info['videos'][0].get('geoblocage')
 								        if allowed_countries:
 								            georestricted = True
 								            geo_info = self._download_json(
 								                'http://geo.francetv.fr/ws/edgescape.json', video_id,
 								                'Downloading geo restriction info')
 								            country = geo_info['reponse']['geo_info']['country_code']
 								            if country not in allowed_countries:
 								                raise ExtractorError(
 								                    'The video is not available from your location',
 								                    expected=True)
 								        else:
 								            georestricted = False
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								        formats = []
 								        for video in info['videos']:
 								            if video['statut'] != 'ONLINE':
 								                continue
 								            video_url = video['url']
 								            if not video_url:
 								                continue
 								            format_id = video['format']
-												[francetv] Improve formats extraction

											
										
										
											2015-04-04 12:02:04 -05:00
+								            ext = determine_ext(video_url)
 								            if ext == 'f4m':
-												[francetv] Use the m3u8 manifest for georestricted videos (closes #3963)

Generating the correct urls for the f4m segments seems to require a lot of work.
Also raise an error if the video is not available from your location.

											
										
										
											2014-11-24 12:37:20 -06:00
+								                if georestricted:
 								                    # See https://github.com/rg3/youtube-dl/issues/3963
 								                    # m3u8 urls work fine
 								                    continue
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								                f4m_url = self._download_webpage(
-												[francetv] Fix f4m extraction

											
										
										
											2015-07-19 10:45:49 -05:00
+								                    'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								                    video_id, 'Downloading f4m manifest token', fatal=False)
 								                if f4m_url:
-												[francetv] Fix f4m extraction completely

											
										
										
											2015-07-19 10:51:06 -05:00
+								                    formats.extend(self._extract_f4m_formats(
-												[francetv] Improve formats extraction

											
										
										
											2016-03-20 02:00:46 -05:00
+								                        f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
 								                        video_id, f4m_id=format_id, fatal=False))
-												[francetv] Improve formats extraction

											
										
										
											2015-04-04 12:02:04 -05:00
+								            elif ext == 'm3u8':
-												[francetv] Improve formats extraction

											
										
										
											2016-03-20 02:00:46 -05:00
+								                formats.extend(self._extract_m3u8_formats(
 								                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
 								                    m3u8_id=format_id, fatal=False))
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								            elif video_url.startswith('rtmp'):
 								                formats.append({
 								                    'url': video_url,
 								                    'format_id': 'rtmp-%s' % format_id,
 								                    'ext': 'flv',
 								                })
 								            else:
-												[francetv] Improve formats extraction

											
										
										
											2016-03-20 02:00:46 -05:00
+								                if self._is_valid_url(video_url, video_id, format_id):
 								                    formats.append({
 								                        'url': video_url,
 								                        'format_id': format_id,
 								                    })
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								        self._sort_formats(formats)
-												[francetv] Add an extractor for francetvinfo.fr (closes #1317)

It uses the same system as Pluzz, create a base class for both extractors.

											
										
										
											2013-09-10 08:50:34 -05:00
-												[francetv] Use subtitle when present (Closes #6715)

											
										
										
											2015-09-01 11:37:42 -05:00
+								        title = info['titre']
 								        subtitle = info.get('sous_titre')
 								        if subtitle:
 								            title += ' - %s' % subtitle
-												[francetvinfo] Add support for france3-regions and strip title (Closes #7673)

											
										
										
											2016-03-20 01:44:04 -05:00
+								        title = title.strip()
-												[francetv] Use subtitle when present (Closes #6715)

											
										
										
											2015-09-01 11:37:42 -05:00
-												[francetv] Add subtitles support

											
										
										
											2015-10-26 09:11:09 -05:00
+								        subtitles = {}
-												[francetv] Make subtitles more robust (Closes #7298)

											
										
										
											2015-10-26 09:35:28 -05:00
+								        subtitles_list = [{
-												[francetv] fix style issues reported by flake8

* Don't redefine variable in list comprehension
* Line missing indentation

											
										
										
											2015-10-28 02:22:04 -05:00
+								            'url': subformat['url'],
 								            'ext': subformat.get('format'),
 								        } for subformat in info.get('subtitles', []) if subformat.get('url')]
-												[francetv] Make subtitles more robust (Closes #7298)

											
										
										
											2015-10-26 09:35:28 -05:00
+								        if subtitles_list:
 								            subtitles['fr'] = subtitles_list
-												[francetv] Add subtitles support

											
										
										
											2015-10-26 09:11:09 -05:00
-												[francetv] Extract all the available formats (#3278)

For some videos the resolution is not included in the url, we will need to look in the m3u8 manifest.

											
										
										
											2014-07-28 07:37:13 -05:00
+								        return {
 								            'id': video_id,
-												[francetv] Use subtitle when present (Closes #6715)

											
										
										
											2015-09-01 11:37:42 -05:00
+								            'title': title,
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								            'description': clean_html(info['synopsis']),
 								            'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
-												[francetv] Fix duration extraction

											
										
										
											2015-07-18 11:43:42 -05:00
+								            'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								            'timestamp': int_or_none(info['diffusion']['timestamp']),
-												[francetv] Extract all the available formats (#3278)

For some videos the resolution is not included in the url, we will need to look in the m3u8 manifest.

											
										
										
											2014-07-28 07:37:13 -05:00
+								            'formats': formats,
-												[francetv] Add subtitles support

											
										
										
											2015-10-26 09:11:09 -05:00
+								            'subtitles': subtitles,
-												[francetv] Extract all the available formats (#3278)

For some videos the resolution is not included in the url, we will need to look in the m3u8 manifest.

											
										
										
											2014-07-28 07:37:13 -05:00
+								        }
-												[francetv] Add an extractor for francetvinfo.fr (closes #1317)

It uses the same system as Pluzz, create a base class for both extractors.

											
										
										
											2013-09-10 08:50:34 -05:00
 								class PluzzIE(FranceTVBaseInfoExtractor):
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								    IE_NAME = 'pluzz.francetv.fr'
-												[pluzz] Fix mobile support and modernize (Closes #7305)

											
										
										
											2015-10-27 10:43:29 -05:00
+								    _VALID_URL = r'https?://(?:m\.)?pluzz\.francetv\.fr/videos/(?P<id>.+?)\.html'
-												Add an extractor for pluzz.francetv.fr (closes PR #1399)

											
										
										
											2013-09-08 14:55:11 -05:00
-												[francetv] Remove Pluzz test

Videos expire in 7 days

											
										
										
											2013-09-17 15:49:43 -05:00
+								    # Can't use tests, videos expire in 7 days
-												Add an extractor for pluzz.francetv.fr (closes PR #1399)

											
										
										
											2013-09-08 14:55:11 -05:00
 								    def _real_extract(self, url):
-												[pluzz] Fix mobile support and modernize (Closes #7305)

											
										
										
											2015-10-27 10:43:29 -05:00
+								        display_id = self._match_id(url)
 								        webpage = self._download_webpage(url, display_id)
 								        video_id = self._html_search_meta(
 								            'id_video', webpage, 'video id', default=None)
 								        if not video_id:
 								            video_id = self._search_regex(
-												[francetv] fix style issues reported by flake8

* Don't redefine variable in list comprehension
* Line missing indentation

											
										
										
											2015-10-28 02:22:04 -05:00
+								                r'data-diffusion=["\'](\d+)', webpage, 'video id')
-												[pluzz] Fix mobile support and modernize (Closes #7305)

											
										
										
											2015-10-27 10:43:29 -05:00
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								        return self._extract_video(video_id, 'Pluzz')
-												Add an extractor for pluzz.francetv.fr (closes PR #1399)

											
										
										
											2013-09-08 14:55:11 -05:00
-												[francetv] Add an extractor for francetvinfo.fr (closes #1317)

It uses the same system as Pluzz, create a base class for both extractors.

											
										
										
											2013-09-10 08:50:34 -05:00
+								class FranceTvInfoIE(FranceTVBaseInfoExtractor):
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								    IE_NAME = 'francetvinfo.fr'
-												[francetvinfo] Relax _VALID_URL

											
										
										
											2016-08-11 09:42:55 -05:00
+								    _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'
-												Add an extractor for pluzz.francetv.fr (closes PR #1399)

											
										
										
											2013-09-08 14:55:11 -05:00
-												[francetv] Add support for non-numeric video IDs (Fixes #2927)

											
										
										
											2014-05-16 08:51:01 -05:00
+								    _TESTS = [{
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								        'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
 								        'info_dict': {
-												[francetv] Add support for non-numeric video IDs (Fixes #2927)

											
										
										
											2014-05-16 08:51:01 -05:00
+								            'id': '84981923',
-												[francetv] Improve formats extraction

											
										
										
											2016-03-20 02:00:46 -05:00
+								            'ext': 'mp4',
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								            'title': 'Soir 3',
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								            'upload_date': '20130826',
 								            'timestamp': 1377548400,
-												[francetv] Add subtitles test

											
										
										
											2015-10-26 09:35:45 -05:00
+								            'subtitles': {
 								                'fr': 'mincount:2',
 								            },
-												[francetv] Add an extractor for francetvinfo.fr (closes #1317)

It uses the same system as Pluzz, create a base class for both extractors.

											
										
										
											2013-09-10 08:50:34 -05:00
+								        },
-												[francetv] Improve formats extraction

											
										
										
											2016-03-20 02:00:46 -05:00
+								        'params': {
 								            # m3u8 downloads
 								            'skip_download': True,
 								        },
-												[francetv] Add support for non-numeric video IDs (Fixes #2927)

											
										
										
											2014-05-16 08:51:01 -05:00
+								    }, {
 								        'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
 								        'info_dict': {
 								            'id': 'EV_20019',
 								            'ext': 'mp4',
 								            'title': 'Débat des candidats à la Commission européenne',
 								            'description': 'Débat des candidats à la Commission européenne',
 								        },
 								        'params': {
 								            'skip_download': 'HLS (reqires ffmpeg)'
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								        },
 								        'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
-												[francetvinfo.fr] Support dmcloud embeds (fixes #6034)

											
										
										
											2015-06-21 08:31:33 -05:00
+								    }, {
 								        'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
 								        'md5': 'f485bda6e185e7d15dbc69b72bae993e',
 								        'info_dict': {
-												[francetv] Improve formats extraction

											
										
										
											2016-03-20 02:00:46 -05:00
+								            'id': 'NI_173343',
-												[francetvinfo.fr] Support dmcloud embeds (fixes #6034)

											
										
										
											2015-06-21 08:31:33 -05:00
+								            'ext': 'mp4',
 								            'title': 'Les entreprises familiales : le secret de la réussite',
-												Fix "invalid escape sequences" error on Python 3.6

											
										
										
											2017-01-02 06:08:07 -06:00
+								            'thumbnail': r're:^https?://.*\.jpe?g$',
-												[francetv] Improve formats extraction

											
										
										
											2016-03-20 02:00:46 -05:00
+								            'timestamp': 1433273139,
 								            'upload_date': '20150602',
 								        },
 								        'params': {
 								            # m3u8 downloads
 								            'skip_download': True,
 								        },
-												[francetvinfo] Add support for france3-regions and strip title (Closes #7673)

											
										
										
											2016-03-20 01:44:04 -05:00
+								    }, {
 								        'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
 								        'md5': 'f485bda6e185e7d15dbc69b72bae993e',
 								        'info_dict': {
 								            'id': 'NI_657393',
-												[francetv] Improve formats extraction

											
										
										
											2016-03-20 02:00:46 -05:00
+								            'ext': 'mp4',
-												[francetvinfo] Add support for france3-regions and strip title (Closes #7673)

											
										
										
											2016-03-20 01:44:04 -05:00
+								            'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"',
 								            'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
-												Fix "invalid escape sequences" error on Python 3.6

											
										
										
											2017-01-02 06:08:07 -06:00
+								            'thumbnail': r're:^https?://.*\.jpe?g$',
-												[francetvinfo] Add support for france3-regions and strip title (Closes #7673)

											
										
										
											2016-03-20 01:44:04 -05:00
+								            'timestamp': 1458300695,
 								            'upload_date': '20160318',
 								        },
 								        'params': {
 								            'skip_download': True,
 								        },
-												[francetv] Recognize more Dailymotion embedded videos

Closes #9955

											
										
										
											2016-07-06 10:37:54 -05:00
+								    }, {
 								        # Dailymotion embed
 								        'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
 								        'md5': 'ee7f1828f25a648addc90cb2687b1f12',
 								        'info_dict': {
 								            'id': 'x4iiko0',
 								            'ext': 'mp4',
 								            'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
 								            'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016',
 								            'timestamp': 1467011958,
 								            'upload_date': '20160627',
 								            'uploader': 'France Inter',
 								            'uploader_id': 'x2q2ez',
 								        },
 								        'add_ie': ['Dailymotion'],
-												[francetvinfo] Relax _VALID_URL

											
										
										
											2016-08-11 09:42:55 -05:00
+								    }, {
 								        'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
 								        'only_matching': True,
-												[francetv] Add support for non-numeric video IDs (Fixes #2927)

											
										
										
											2014-05-16 08:51:01 -05:00
+								    }]
-												[francetv] Add an extractor for francetvinfo.fr (closes #1317)

It uses the same system as Pluzz, create a base class for both extractors.

											
										
										
											2013-09-10 08:50:34 -05:00
 								    def _real_extract(self, url):
 								        mobj = re.match(self._VALID_URL, url)
 								        page_title = mobj.group('title')
 								        webpage = self._download_webpage(url, page_title)
-												[francetvinfo.fr] Support dmcloud embeds (fixes #6034)

											
										
										
											2015-06-21 08:31:33 -05:00
 								        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
 								        if dmcloud_url:
-												[francetv] Recognize more Dailymotion embedded videos

Closes #9955

											
										
										
											2016-07-06 10:37:54 -05:00
+								            return self.url_result(dmcloud_url, DailymotionCloudIE.ie_key())
 								        dailymotion_urls = DailymotionIE._extract_urls(webpage)
 								        if dailymotion_urls:
 								            return self.playlist_result([
 								                self.url_result(dailymotion_url, DailymotionIE.ie_key())
 								                for dailymotion_url in dailymotion_urls])
-												[francetvinfo.fr] Support dmcloud embeds (fixes #6034)

											
										
										
											2015-06-21 08:31:33 -05:00
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								        video_id, catalogue = self._search_regex(
-												[francetvinfo] Add support for france3-regions and strip title (Closes #7673)

											
										
										
											2016-03-20 01:44:04 -05:00
+								            (r'id-video=([^@]+@[^"]+)',
 								             r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
 								            webpage, 'video id').split('@')
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								        return self._extract_video(video_id, catalogue)
-												[francetv] Add an extractor for France2

											
										
										
											2013-09-23 14:28:33 -05:00
-												[francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)

Rename the France2IE extractor to FranceTVIE

											
										
										
											2013-12-05 13:45:53 -06:00
+								class FranceTVIE(FranceTVBaseInfoExtractor):
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								    IE_NAME = 'francetv'
 								    IE_DESC = 'France 2, 3, 4, 5 and Ô'
-												[francetv] Add support for embeds and clean up _VALID_URL

											
										
										
											2015-07-18 11:41:34 -05:00
+								    _VALID_URL = r'''(?x)
 								                    https?://
 								                        (?:
 								                            (?:www\.)?france[2345o]\.fr/
 								                                (?:
-												[francetv] Improve _VALID_URL

											
										
										
											2015-07-19 14:03:43 -05:00
+								                                    emissions/[^/]+/(?:videos|diffusions)|
 								                                    emission/[^/]+|
-												[francetv] Restore support for jt videos

											
										
										
											2015-07-19 10:50:25 -05:00
+								                                    videos|
 								                                    jt
-												[francetv] Add support for embeds and clean up _VALID_URL

											
										
										
											2015-07-18 11:41:34 -05:00
+								                                )
 								                            /|
 								                            embed\.francetv\.fr/\?ue=
 								                        )
 								                        (?P<id>[^/?]+)
 								                    '''
-												[francetv] Add an extractor for France2

											
										
										
											2013-09-23 14:28:33 -05:00
-												[francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)

Rename the France2IE extractor to FranceTVIE

											
										
										
											2013-12-05 13:45:53 -06:00
+								    _TESTS = [
 								        # france2
 								        {
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								            'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								            'md5': 'c03fc87cb85429ffd55df32b9fc05523',
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								            'info_dict': {
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								                'id': '109169362',
 								                'ext': 'flv',
 								                'title': '13h15, le dimanche...',
 								                'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7',
 								                'upload_date': '20140914',
 								                'timestamp': 1410693600,
-												[francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)

Rename the France2IE extractor to FranceTVIE

											
										
										
											2013-12-05 13:45:53 -06:00
+								            },
-												[francetv] Add an extractor for France2

											
										
										
											2013-09-23 14:28:33 -05:00
+								        },
-												[francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)

Rename the France2IE extractor to FranceTVIE

											
										
										
											2013-12-05 13:45:53 -06:00
+								        # france3
 								        {
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								            'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								            'md5': '679bb8f8921f8623bd658fa2f8364da0',
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								            'info_dict': {
 								                'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								                'ext': 'mp4',
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								                'title': 'Le scandale du prix des médicaments',
 								                'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								                'upload_date': '20131113',
 								                'timestamp': 1384380000,
-												[francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)

Rename the France2IE extractor to FranceTVIE

											
										
										
											2013-12-05 13:45:53 -06:00
+								            },
-												[francetv] Add an extractor for France2

											
										
										
											2013-09-23 14:28:33 -05:00
+								        },
-												[francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)

Rename the France2IE extractor to FranceTVIE

											
										
										
											2013-12-05 13:45:53 -06:00
+								        # france4
 								        {
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								            'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								            'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c',
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								            'info_dict': {
 								                'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								                'ext': 'mp4',
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								                'title': 'Hero Corp Making of - Extrait 1',
 								                'description': 'md5:c87d54871b1790679aec1197e73d650a',
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								                'upload_date': '20131106',
 								                'timestamp': 1383766500,
-												[francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)

Rename the France2IE extractor to FranceTVIE

											
										
										
											2013-12-05 13:45:53 -06:00
+								            },
 								        },
 								        # france5
 								        {
-												[francetv] Update tests (2)

											
										
										
											2015-09-01 11:42:43 -05:00
+								            'url': 'http://www.france5.fr/emissions/c-a-dire/videos/quels_sont_les_enjeux_de_cette_rentree_politique__31-08-2015_908948?onglet=tous&page=1',
 								            'md5': 'f6c577df3806e26471b3d21631241fd0',
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								            'info_dict': {
-												[francetv] Update tests (2)

											
										
										
											2015-09-01 11:42:43 -05:00
+								                'id': '123327454',
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								                'ext': 'flv',
-												[francetv] Update tests

											
										
										
											2015-09-01 11:39:26 -05:00
+								                'title': 'C à dire ?! - Quels sont les enjeux de cette rentrée politique ?',
-												[francetv] Update tests (2)

											
										
										
											2015-09-01 11:42:43 -05:00
+								                'description': 'md5:4a0d5cb5dce89d353522a84462bae5a4',
 								                'upload_date': '20150831',
 								                'timestamp': 1441035120,
-												[francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)

Rename the France2IE extractor to FranceTVIE

											
										
										
											2013-12-05 13:45:53 -06:00
+								            },
 								        },
 								        # franceo
 								        {
-												[francetv] Update jt test

											
										
										
											2015-07-19 10:53:09 -05:00
+								            'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015',
 								            'md5': '47d5816d3b24351cdce512ad7ab31da8',
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								            'info_dict': {
-												[francetv] Update jt test

											
										
										
											2015-07-19 10:53:09 -05:00
+								                'id': '125377621',
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								                'ext': 'flv',
-												[francetv] Update jt test

											
										
										
											2015-07-19 10:53:09 -05:00
+								                'title': 'Infô soir',
 								                'description': 'md5:01b8c6915a3d93d8bbbd692651714309',
 								                'upload_date': '20150718',
 								                'timestamp': 1437241200,
 								                'duration': 414,
-												[francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)

Rename the France2IE extractor to FranceTVIE

											
										
										
											2013-12-05 13:45:53 -06:00
+								            },
 								        },
-												[francetv] Add support for embeds and clean up _VALID_URL

											
										
										
											2015-07-18 11:41:34 -05:00
+								        {
 								            # francetv embed
 								            'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87',
 								            'info_dict': {
 								                'id': 'EV_30231',
 								                'ext': 'flv',
 								                'title': 'Alcaline, le concert avec Calogero',
-												[francetv] Fix embed test

											
										
										
											2015-07-19 10:53:54 -05:00
+								                'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
-												[francetv] Add support for embeds and clean up _VALID_URL

											
										
										
											2015-07-18 11:41:34 -05:00
+								                'upload_date': '20150226',
 								                'timestamp': 1424989860,
-												[francetv] Fix duration extraction

											
										
										
											2015-07-18 11:43:42 -05:00
+								                'duration': 5400,
-												[francetv] Add support for embeds and clean up _VALID_URL

											
										
										
											2015-07-18 11:41:34 -05:00
+								            },
 								        },
 								        {
 								            'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05',
 								            'only_matching': True,
 								        },
 								        {
 								            'url': 'http://www.franceo.fr/videos/125377617',
 								            'only_matching': True,
 								        }
-												[francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)

Rename the France2IE extractor to FranceTVIE

											
										
										
											2013-12-05 13:45:53 -06:00
+								    ]
-												[francetv] Add an extractor for France2

											
										
										
											2013-09-23 14:28:33 -05:00
 								    def _real_extract(self, url):
-												[francetv] Add support for embeds and clean up _VALID_URL

											
										
										
											2015-07-18 11:41:34 -05:00
+								        video_id = self._match_id(url)
 								        webpage = self._download_webpage(url, video_id)
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								        video_id, catalogue = self._html_search_regex(
-												[francetv] Improve video id regex (Closes #8563)

											
										
										
											2016-02-18 10:09:21 -06:00
+								            r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								            webpage, 'video ID').split('@')
 								        return self._extract_video(video_id, catalogue)
-												[francetv] Add an extractor for Generation Quoi (closes #1475)

											
										
										
											2013-09-23 14:41:54 -05:00
 								class GenerationQuoiIE(InfoExtractor):
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								    IE_NAME = 'france2.fr:generation-quoi'
-												[france2.fr:generation-quoi] Modernize

											
										
										
											2015-02-01 08:06:55 -06:00
+								    _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
-												[francetv] Add an extractor for Generation Quoi (closes #1475)

											
										
										
											2013-09-23 14:41:54 -05:00
 								    _TEST = {
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								        'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
 								        'info_dict': {
-												[france2.fr:generation-quoi] Modernize

											
										
										
											2015-02-01 08:06:55 -06:00
+								            'id': 'k7FJX8VBcvvLmX4wA5Q',
 								            'ext': 'mp4',
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								            'title': 'Génération Quoi - Garde à Vous',
 								            'uploader': 'Génération Quoi',
-												[francetv] Add an extractor for Generation Quoi (closes #1475)

											
										
										
											2013-09-23 14:41:54 -05:00
+								        },
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								        'params': {
-												[francetv] Add an extractor for Generation Quoi (closes #1475)

											
										
										
											2013-09-23 14:41:54 -05:00
+								            # It uses Dailymotion
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								            'skip_download': True,
-												[francetv] Add an extractor for Generation Quoi (closes #1475)

											
										
										
											2013-09-23 14:41:54 -05:00
+								        },
 								    }
 								    def _real_extract(self, url):
-												[france2.fr:generation-quoi] Modernize

											
										
										
											2015-02-01 08:06:55 -06:00
+								        display_id = self._match_id(url)
 								        info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id)
 								        info_json = self._download_webpage(info_url, display_id)
-												[francetv] Add an extractor for Generation Quoi (closes #1475)

											
										
										
											2013-09-23 14:41:54 -05:00
+								        info = json.loads(info_json)
 								        return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
-												PEP8: applied even more rules

											
										
										
											2014-11-23 14:39:15 -06:00
+								                               ie='Dailymotion')
-												[francetv] Add extractor for Culturebox (closes #2117)

											
										
										
											2014-01-08 09:16:34 -06:00
 								class CultureboxIE(FranceTVBaseInfoExtractor):
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								    IE_NAME = 'culturebox.francetvinfo.fr'
-												[francetv] Add support for mobile URLs (Closes #3275)

											
										
										
											2014-07-19 05:47:50 -05:00
+								    _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
-												[francetv] Add extractor for Culturebox (closes #2117)

											
										
										
											2014-01-08 09:16:34 -06:00
 								    _TEST = {
-												[culturebox] Replace test

											
										
										
											2015-04-04 11:50:13 -05:00
+								        'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
-												[culturebox] Fix test

											
										
										
											2015-04-04 12:06:16 -05:00
+								        'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
-												[francetv] Use unicode_literals

											
										
										
											2014-01-29 23:13:57 -06:00
+								        'info_dict': {
-												[culturebox] Replace test

											
										
										
											2015-04-04 11:50:13 -05:00
+								            'id': 'EV_50111',
-												[culturebox] Fix test

											
										
										
											2015-04-04 12:06:16 -05:00
+								            'ext': 'flv',
-												[culturebox] Replace test

											
										
										
											2015-04-04 11:50:13 -05:00
+								            'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
 								            'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
 								            'upload_date': '20150320',
 								            'timestamp': 1426892400,
 								            'duration': 2760.9,
 								        },
-												[francetv] Add extractor for Culturebox (closes #2117)

											
										
										
											2014-01-08 09:16:34 -06:00
+								    }
 								    def _real_extract(self, url):
 								        mobj = re.match(self._VALID_URL, url)
 								        name = mobj.group('name')
-												[culturebox] Check for unavailable videos

											
										
										
											2015-04-04 11:43:34 -05:00
-												[francetv] Add extractor for Culturebox (closes #2117)

											
										
										
											2014-01-08 09:16:34 -06:00
+								        webpage = self._download_webpage(url, name)
-												[culturebox] Check for unavailable videos

											
										
										
											2015-04-04 11:43:34 -05:00
 								        if ">Ce live n'est plus disponible en replay<" in webpage:
 								            raise ExtractorError('Video %s is not available' % name, expected=True)
-												[francetv] Adapt to new API (Closes #3751, closes #3769)

											
										
										
											2014-09-16 11:47:59 -05:00
+								        video_id, catalogue = self._search_regex(
 								            r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
 								        return self._extract_video(video_id, catalogue)