[vidlii] Add extractor (closes #14472, closes #14512, closes #14779)

This commit is contained in:
Sergey M․ 2018-02-25 20:26:50 +07:00
parent b5cbe3d652
commit 8c73ef37b6
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 126 additions and 0 deletions

View File

@ -1225,6 +1225,7 @@ from .videomore import (
from .videopremium import VideoPremiumIE from .videopremium import VideoPremiumIE
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .vidio import VidioIE from .vidio import VidioIE
from .vidlii import VidLiiIE
from .vidme import ( from .vidme import (
VidmeIE, VidmeIE,
VidmeUserIE, VidmeUserIE,

View File

@ -0,0 +1,125 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
float_or_none,
get_element_by_id,
int_or_none,
strip_or_none,
unified_strdate,
urljoin,
)
class VidLiiIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vidlii\.com/(?:watch|embed)\?.*?\bv=(?P<id>[0-9A-Za-z_-]{11})'
_TESTS = [{
'url': 'https://www.vidlii.com/watch?v=tJluaH4BJ3v',
'md5': '9bf7d1e005dfa909b6efb0a1ff5175e2',
'info_dict': {
'id': 'tJluaH4BJ3v',
'ext': 'mp4',
'title': 'Vidlii is against me',
'description': 'md5:fa3f119287a2bfb922623b52b1856145',
'thumbnail': 're:https://.*.jpg',
'uploader': 'APPle5auc31995',
'uploader_url': 'https://www.vidlii.com/user/APPle5auc31995',
'upload_date': '20171107',
'duration': 212,
'view_count': int,
'comment_count': int,
'average_rating': float,
'categories': ['News & Politics'],
'tags': ['Vidlii', 'Jan', 'Videogames'],
}
}, {
'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
video_url = self._search_regex(
r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage,
'video url', group='url')
title = self._search_regex(
(r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
'title')
description = self._html_search_meta(
('description', 'twitter:description'), webpage,
default=None) or strip_or_none(
get_element_by_id('des_text', webpage))
thumbnail = self._html_search_meta(
'twitter:image', webpage, default=None)
if not thumbnail:
thumbnail_path = self._search_regex(
r'img\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'thumbnail', fatal=False, group='url')
if thumbnail_path:
thumbnail = urljoin(url, thumbnail_path)
uploader = self._search_regex(
r'<div[^>]+class=["\']wt_person[^>]+>\s*<a[^>]+\bhref=["\']/user/[^>]+>([^<]+)',
webpage, 'uploader', fatal=False)
uploader_url = 'https://www.vidlii.com/user/%s' % uploader if uploader else None
upload_date = unified_strdate(self._html_search_meta(
'datePublished', webpage, default=None) or self._search_regex(
r'<date>([^<]+)', webpage, 'upload date', fatal=False))
duration = int_or_none(self._html_search_meta(
'video:duration', webpage, 'duration',
default=None) or self._search_regex(
r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
view_count = int_or_none(self._search_regex(
(r'<strong>(\d+)</strong> views',
r'Views\s*:\s*<strong>(\d+)</strong>'),
webpage, 'view count', fatal=False))
comment_count = int_or_none(self._search_regex(
(r'<span[^>]+id=["\']cmt_num[^>]+>(\d+)',
r'Comments\s*:\s*<strong>(\d+)'),
webpage, 'comment count', fatal=False))
average_rating = float_or_none(self._search_regex(
r'rating\s*:\s*([\d.]+)', webpage, 'average rating', fatal=False))
category = self._html_search_regex(
r'<div>Category\s*:\s*</div>\s*<div>\s*<a[^>]+>([^<]+)', webpage,
'category', fatal=False)
categories = [category] if category else None
tags = [
strip_or_none(tag)
for tag in re.findall(
r'<a[^>]+\bhref=["\']/results\?.*?q=[^>]*>([^<]+)',
webpage) if strip_or_none(tag)
] or None
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_url': uploader_url,
'upload_date': upload_date,
'duration': duration,
'view_count': view_count,
'comment_count': comment_count,
'average_rating': average_rating,
'categories': categories,
'tags': tags,
}