From 951aa74cbeb08a4220e265e769a263dcedf46282 Mon Sep 17 00:00:00 2001 From: nomevi <108267441+nomevi@users.noreply.github.com> Date: Mon, 27 Jun 2022 13:58:07 +0200 Subject: [PATCH 1/9] [livestreamfails] Add new extractor --- youtube_dl/extractor/extractors.py | 3 ++ youtube_dl/extractor/livestreamfails.py | 49 +++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/livestreamfails.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 751fc38b6..c01461f54 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -627,6 +627,9 @@ from .livestream import ( LivestreamOriginalIE, LivestreamShortenerIE, ) +from .livestreamfails import ( + LivestreamfailsIE, +) from .lnkgo import LnkGoIE from .localnews8 import LocalNews8IE from .lovehomeporn import LoveHomePornIE diff --git a/youtube_dl/extractor/livestreamfails.py b/youtube_dl/extractor/livestreamfails.py new file mode 100644 index 000000000..b92eca047 --- /dev/null +++ b/youtube_dl/extractor/livestreamfails.py @@ -0,0 +1,49 @@ +from .common import InfoExtractor +import json +import time +import calendar + + +class LivestreamfailsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?livestreamfails\.com/clip/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://livestreamfails.com/clip/139200', + 'md5': '8a03aea1a46e94a05af6410337463102', + 'info_dict': { + 'id': '139200', + 'ext': 'mp4', + 'display_id': 'ConcernedLitigiousSalmonPeteZaroll-O8yo9W2L8OZEKhV2', + 'title': 'Streamer jumps off a trampoline at full speed', + 'creator': 'paradeev1ch', + 'thumbnail': 'https://livestreamfails-image-prod.b-cdn.net/image/3877b1d38db083fa25c82685bbaf645637e575ea.png', + 'timestamp': 1656271785, + 'upload_date': '20220626', + } + }] + + def _real_extract(self, url): + result = {} + result['id'] = self._match_id(url) + + # https://livestreamfails.com/clip/id uses https://api.livestreamfails.com/clip/ to fetch the video metadata + # Use the same endpoint here to avoid loading and parsing the provided page (which requires JS) + apiResponse = json.loads(self._download_webpage('https://api.livestreamfails.com/clip/' + result['id'], result['id'])) + + # Twitch ID of clip + result['display_id'] = apiResponse.get('sourceId') + + # Get the input timestamp (test case gives 2022-06-26T19:29:45.515Z) + result['timestamp'] = apiResponse.get('createdAt') + if(result.get('timestamp')): + # Parse it into a struct_time + result['timestamp'] = time.strptime(result['timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ') + # Convert the struct_time to a UNIX timestamp while ignoring the local timezone attached by time.strptime() + result['timestamp'] = calendar.timegm(result['timestamp']) + + # Other fields + result['url'] = 'https://livestreamfails-video-prod.b-cdn.net/video/' + apiResponse.get('videoId') + result['title'] = apiResponse.get('label') + result['creator'] = apiResponse.get('streamer', {}).get('label') + result['thumbnail'] = 'https://livestreamfails-image-prod.b-cdn.net/image/' + apiResponse.get('imageId') + + return result From 129cfb174c8cc98d912a0565fb8c94c5f29f6c7e Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 8 Jul 2022 06:54:01 +0100 Subject: [PATCH 2/9] Add Unicode compatibility header --- youtube_dl/extractor/livestreamfails.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/livestreamfails.py b/youtube_dl/extractor/livestreamfails.py index b92eca047..64fed1bed 100644 --- a/youtube_dl/extractor/livestreamfails.py +++ b/youtube_dl/extractor/livestreamfails.py @@ -1,3 +1,6 @@ +# coding: utf-8 +from __future__ import unicode_literals + from .common import InfoExtractor import json import time From 2d82e8a9587d99b1ca24f963ce1f9b449076e057 Mon Sep 17 00:00:00 2001 From: nomevi <108267441+nomevi@users.noreply.github.com> Date: Sun, 10 Jul 2022 01:32:33 +0200 Subject: [PATCH 3/9] Refactor return --- youtube_dl/extractor/livestreamfails.py | 32 ++++++++++++------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/livestreamfails.py b/youtube_dl/extractor/livestreamfails.py index 64fed1bed..111cc19a1 100644 --- a/youtube_dl/extractor/livestreamfails.py +++ b/youtube_dl/extractor/livestreamfails.py @@ -25,28 +25,26 @@ class LivestreamfailsIE(InfoExtractor): }] def _real_extract(self, url): - result = {} - result['id'] = self._match_id(url) + id = self._match_id(url) # https://livestreamfails.com/clip/id uses https://api.livestreamfails.com/clip/ to fetch the video metadata # Use the same endpoint here to avoid loading and parsing the provided page (which requires JS) - apiResponse = json.loads(self._download_webpage('https://api.livestreamfails.com/clip/' + result['id'], result['id'])) - - # Twitch ID of clip - result['display_id'] = apiResponse.get('sourceId') + apiResponse = json.loads(self._download_webpage('https://api.livestreamfails.com/clip/' + id, id)) # Get the input timestamp (test case gives 2022-06-26T19:29:45.515Z) - result['timestamp'] = apiResponse.get('createdAt') - if(result.get('timestamp')): + timestamp = apiResponse.get('createdAt') + if(timestamp): # Parse it into a struct_time - result['timestamp'] = time.strptime(result['timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ') + timestamp = time.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ') # Convert the struct_time to a UNIX timestamp while ignoring the local timezone attached by time.strptime() - result['timestamp'] = calendar.timegm(result['timestamp']) + timestamp = calendar.timegm(timestamp) - # Other fields - result['url'] = 'https://livestreamfails-video-prod.b-cdn.net/video/' + apiResponse.get('videoId') - result['title'] = apiResponse.get('label') - result['creator'] = apiResponse.get('streamer', {}).get('label') - result['thumbnail'] = 'https://livestreamfails-image-prod.b-cdn.net/image/' + apiResponse.get('imageId') - - return result + return { + 'id': id, + 'display_id': apiResponse.get('sourceId'), # Twitch ID of clip + 'timestamp': timestamp, + 'url': 'https://livestreamfails-video-prod.b-cdn.net/video/' + apiResponse.get('videoId'), + 'title': apiResponse.get('label'), + 'creator': apiResponse.get('streamer', {}).get('label'), + 'thumbnail': 'https://livestreamfails-image-prod.b-cdn.net/image/' + apiResponse.get('imageId'), + } From c9e09d187f0ce8729ec4f877ec9e051dd0ccb7a0 Mon Sep 17 00:00:00 2001 From: nomevi <108267441+nomevi@users.noreply.github.com> Date: Sun, 10 Jul 2022 01:33:21 +0200 Subject: [PATCH 4/9] Rename apiResponse to api_response --- youtube_dl/extractor/livestreamfails.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/livestreamfails.py b/youtube_dl/extractor/livestreamfails.py index 111cc19a1..da2472601 100644 --- a/youtube_dl/extractor/livestreamfails.py +++ b/youtube_dl/extractor/livestreamfails.py @@ -29,10 +29,10 @@ class LivestreamfailsIE(InfoExtractor): # https://livestreamfails.com/clip/id uses https://api.livestreamfails.com/clip/ to fetch the video metadata # Use the same endpoint here to avoid loading and parsing the provided page (which requires JS) - apiResponse = json.loads(self._download_webpage('https://api.livestreamfails.com/clip/' + id, id)) + api_response = json.loads(self._download_webpage('https://api.livestreamfails.com/clip/' + id, id)) # Get the input timestamp (test case gives 2022-06-26T19:29:45.515Z) - timestamp = apiResponse.get('createdAt') + timestamp = api_response.get('createdAt') if(timestamp): # Parse it into a struct_time timestamp = time.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ') @@ -41,10 +41,10 @@ class LivestreamfailsIE(InfoExtractor): return { 'id': id, - 'display_id': apiResponse.get('sourceId'), # Twitch ID of clip + 'display_id': api_response.get('sourceId'), # Twitch ID of clip 'timestamp': timestamp, - 'url': 'https://livestreamfails-video-prod.b-cdn.net/video/' + apiResponse.get('videoId'), - 'title': apiResponse.get('label'), - 'creator': apiResponse.get('streamer', {}).get('label'), - 'thumbnail': 'https://livestreamfails-image-prod.b-cdn.net/image/' + apiResponse.get('imageId'), + 'url': 'https://livestreamfails-video-prod.b-cdn.net/video/' + api_response.get('videoId'), + 'title': api_response.get('label'), + 'creator': api_response.get('streamer', {}).get('label'), + 'thumbnail': 'https://livestreamfails-image-prod.b-cdn.net/image/' + api_response.get('imageId'), } From 26dc85adb44091139b0273bec7a525c7e1313e41 Mon Sep 17 00:00:00 2001 From: nomevi <108267441+nomevi@users.noreply.github.com> Date: Sun, 10 Jul 2022 01:35:21 +0200 Subject: [PATCH 5/9] Use IE._download_json() --- youtube_dl/extractor/livestreamfails.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/livestreamfails.py b/youtube_dl/extractor/livestreamfails.py index da2472601..22180d986 100644 --- a/youtube_dl/extractor/livestreamfails.py +++ b/youtube_dl/extractor/livestreamfails.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -import json import time import calendar @@ -29,7 +28,7 @@ class LivestreamfailsIE(InfoExtractor): # https://livestreamfails.com/clip/id uses https://api.livestreamfails.com/clip/ to fetch the video metadata # Use the same endpoint here to avoid loading and parsing the provided page (which requires JS) - api_response = json.loads(self._download_webpage('https://api.livestreamfails.com/clip/' + id, id)) + api_response = self._download_json('https://api.livestreamfails.com/clip/' + id, id) # Get the input timestamp (test case gives 2022-06-26T19:29:45.515Z) timestamp = api_response.get('createdAt') From ea4a9f4d9868fb99aea2d8e0755c23af509a53cf Mon Sep 17 00:00:00 2001 From: nomevi <108267441+nomevi@users.noreply.github.com> Date: Sun, 10 Jul 2022 01:45:51 +0200 Subject: [PATCH 6/9] Correct which fields are fail inducing and crash proof --- youtube_dl/extractor/livestreamfails.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/livestreamfails.py b/youtube_dl/extractor/livestreamfails.py index 22180d986..04d533e05 100644 --- a/youtube_dl/extractor/livestreamfails.py +++ b/youtube_dl/extractor/livestreamfails.py @@ -4,6 +4,11 @@ from __future__ import unicode_literals from .common import InfoExtractor import time import calendar +from ..utils import ( + try_get, + url_or_none, +) +from ..compat import compat_str class LivestreamfailsIE(InfoExtractor): @@ -40,10 +45,10 @@ class LivestreamfailsIE(InfoExtractor): return { 'id': id, + 'url': 'https://livestreamfails-video-prod.b-cdn.net/video/' + api_response['videoId'], + 'title': api_response['label'], 'display_id': api_response.get('sourceId'), # Twitch ID of clip 'timestamp': timestamp, - 'url': 'https://livestreamfails-video-prod.b-cdn.net/video/' + api_response.get('videoId'), - 'title': api_response.get('label'), - 'creator': api_response.get('streamer', {}).get('label'), - 'thumbnail': 'https://livestreamfails-image-prod.b-cdn.net/image/' + api_response.get('imageId'), + 'creator': try_get(api_response, lambda x: x['streamer']['label'], compat_str), + 'thumbnail': url_or_none(try_get(api_response, lambda x: 'https://livestreamfails-image-prod.b-cdn.net/image/' + x['imageId'])) } From 4d02ba27d7377a4313bf775f7720cd9e3268aba2 Mon Sep 17 00:00:00 2001 From: nomevi <108267441+nomevi@users.noreply.github.com> Date: Sun, 10 Jul 2022 01:48:32 +0200 Subject: [PATCH 7/9] Use parse_iso8601 --- youtube_dl/extractor/livestreamfails.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/livestreamfails.py b/youtube_dl/extractor/livestreamfails.py index 04d533e05..acf486f74 100644 --- a/youtube_dl/extractor/livestreamfails.py +++ b/youtube_dl/extractor/livestreamfails.py @@ -7,6 +7,7 @@ import calendar from ..utils import ( try_get, url_or_none, + parse_iso8601, ) from ..compat import compat_str @@ -35,20 +36,12 @@ class LivestreamfailsIE(InfoExtractor): # Use the same endpoint here to avoid loading and parsing the provided page (which requires JS) api_response = self._download_json('https://api.livestreamfails.com/clip/' + id, id) - # Get the input timestamp (test case gives 2022-06-26T19:29:45.515Z) - timestamp = api_response.get('createdAt') - if(timestamp): - # Parse it into a struct_time - timestamp = time.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ') - # Convert the struct_time to a UNIX timestamp while ignoring the local timezone attached by time.strptime() - timestamp = calendar.timegm(timestamp) - return { 'id': id, 'url': 'https://livestreamfails-video-prod.b-cdn.net/video/' + api_response['videoId'], 'title': api_response['label'], 'display_id': api_response.get('sourceId'), # Twitch ID of clip - 'timestamp': timestamp, + 'timestamp': parse_iso8601(api_response.get('createdAt')), 'creator': try_get(api_response, lambda x: x['streamer']['label'], compat_str), 'thumbnail': url_or_none(try_get(api_response, lambda x: 'https://livestreamfails-image-prod.b-cdn.net/image/' + x['imageId'])) } From 918113db3b7e66b3c450345a436379e87150afe7 Mon Sep 17 00:00:00 2001 From: nomevi <108267441+nomevi@users.noreply.github.com> Date: Sun, 10 Jul 2022 01:33:32 +0000 Subject: [PATCH 8/9] Break out video_url and title Co-authored-by: dirkf --- youtube_dl/extractor/livestreamfails.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/livestreamfails.py b/youtube_dl/extractor/livestreamfails.py index acf486f74..aabc7ecd2 100644 --- a/youtube_dl/extractor/livestreamfails.py +++ b/youtube_dl/extractor/livestreamfails.py @@ -36,10 +36,13 @@ class LivestreamfailsIE(InfoExtractor): # Use the same endpoint here to avoid loading and parsing the provided page (which requires JS) api_response = self._download_json('https://api.livestreamfails.com/clip/' + id, id) + video_url = 'https://livestreamfails-video-prod.b-cdn.net/video/' + api_response['videoId'] + title = api_response['label'] + return { 'id': id, - 'url': 'https://livestreamfails-video-prod.b-cdn.net/video/' + api_response['videoId'], - 'title': api_response['label'], + 'url': video_url, + 'title': title, 'display_id': api_response.get('sourceId'), # Twitch ID of clip 'timestamp': parse_iso8601(api_response.get('createdAt')), 'creator': try_get(api_response, lambda x: x['streamer']['label'], compat_str), From 032aed0e413b20f4fc62a6311834516e2a328cd1 Mon Sep 17 00:00:00 2001 From: nomevi <108267441+nomevi@users.noreply.github.com> Date: Sun, 10 Jul 2022 03:33:15 +0200 Subject: [PATCH 9/9] Remove unused includes --- youtube_dl/extractor/livestreamfails.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/livestreamfails.py b/youtube_dl/extractor/livestreamfails.py index aabc7ecd2..05f2cdcac 100644 --- a/youtube_dl/extractor/livestreamfails.py +++ b/youtube_dl/extractor/livestreamfails.py @@ -2,8 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -import time -import calendar from ..utils import ( try_get, url_or_none,