[facebook] Improve Facebook embedded detection

Related to #9938.

Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5.
This commit is contained in:
Yen Chi Hsuan 2016-07-02 21:33:23 +08:00
parent bdafd88da0
commit fd6ca38262
2 changed files with 37 additions and 4 deletions

View File

@ -129,6 +129,21 @@ class FacebookIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
if mobj is not None:
return mobj.group('url')
# Facebook API embed
# see https://developers.facebook.com/docs/plugins/embedded-video-player
mobj = re.search(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\bfb-video\b[^\'"]*(?P=q1)[^>]+
data-href=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)''', webpage)
if mobj is not None:
return mobj.group('url')
def _login(self): def _login(self):
(useremail, password) = self._get_login_info() (useremail, password) = self._get_login_info()
if useremail is None: if useremail is None:

View File

@ -66,6 +66,7 @@ from .theplatform import ThePlatformIE
from .vessel import VesselIE from .vessel import VesselIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1260,6 +1261,24 @@ class GenericIE(InfoExtractor):
'uploader': 'TheAtlantic', 'uploader': 'TheAtlantic',
}, },
'add_ie': ['BrightcoveLegacy'], 'add_ie': ['BrightcoveLegacy'],
},
# Facebook <iframe> embed
{
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
'info_dict': {
'id': '599637780109885',
'ext': 'mp4',
'title': 'Facebook video #599637780109885',
},
},
# Facebook API embed
{
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
'info_dict': {
'id': '10153467542406923',
'ext': 'mp4',
'title': 'Facebook video #10153467542406923',
},
} }
] ]
@ -1759,10 +1778,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url')) return self.url_result(mobj.group('url'))
# Look for embedded Facebook player # Look for embedded Facebook player
mobj = re.search( facebook_url = FacebookIE._extract_url(webpage)
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage) if facebook_url is not None:
if mobj is not None: return self.url_result(facebook_url, 'Facebook')
return self.url_result(mobj.group('url'), 'Facebook')
# Look for embedded VK player # Look for embedded VK player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage) mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)