From e9839174a16c4640387d91aff3ea0504dfdbeadc Mon Sep 17 00:00:00 2001 From: palewire Date: Sun, 7 Aug 2022 11:14:24 -0700 Subject: [PATCH 01/20] [parler] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/parler.py | 74 ++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 youtube_dl/extractor/parler.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 751fc38b6..5c1cee28f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -908,6 +908,7 @@ from .palcomp3 import ( PalcoMP3VideoIE, ) from .pandoratv import PandoraTVIE +from .parler import ParlerIE from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py new file mode 100644 index 000000000..8cd607659 --- /dev/null +++ b/youtube_dl/extractor/parler.py @@ -0,0 +1,74 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from urllib import parse + +from .common import InfoExtractor + +from ..utils import clean_html, unified_timestamp + + +class ParlerIE(InfoExtractor): + """Extract videos from posts on Parler.""" + + _VALID_URL = r"https://parler\.com/feed/(?P[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})" + _TESTS = [ + { + "url": "https://parler.com/feed/df79fdba-07cc-48fe-b085-3293897520d7", + "md5": "16e0f447bf186bb3cf64de5bbbf4d22d", + "info_dict": { + "id": "df79fdba-07cc-48fe-b085-3293897520d7", + "ext": "mp4", + "title": "Puberty-blocking procedures promoted by the Biden/Harris Admin are child abuse. The FDA has recently confirmed these hormones/drugs have extremely dangerous side effects, like brain swelling and vision loss.", + "timestamp": 1659744000, + "upload_date": "20220806", + "uploader": "Tulsi Gabbard", + "uploader_id": "TulsiGabbard", + }, + }, + { + "url": "https://parler.com/feed/a7406eb4-91e5-4793-b5e3-ade57a24e287", + "md5": "11687e2f5bb353682cee338d181422ed", + "info_dict": { + "id": "a7406eb4-91e5-4793-b5e3-ade57a24e287", + "ext": "mp4", + "title": "This man should run for office", + "timestamp": 1659657600, + "upload_date": "20220805", + "uploader": "Benny Johnson", + "uploader_id": "BennyJohnson", + }, + }, + ] + _GEO_COUNTRIES = ["US"] # The site is only available in the US + + def _real_extract(self, url): + video_id = self._match_id(url) + + # Get data from API + api_url = "https://parler.com/open-api/ParleyDetailEndpoint.php" + payload = parse.urlencode({"uuid": video_id}).encode() + status = self._download_json(api_url, video_id, data=payload) + + # Pull out video + data = status["data"][0]["primary"] + video = data["video_data"] + url = video["videoSrc"] + + # Pull out metadata + title = clean_html(data.get("full_body")).replace("\n", "") + timestamp = unified_timestamp(data.get("date_created")) + uploader = data.get("name") + uploader_id = data.get("username") + uploader_url = "https://parler.com/" + uploader_id if uploader_id else None + + # Return the result + return { + "id": video_id, + "url": url, + "title": title, + "timestamp": timestamp, + "uploader": uploader, + "uploader_id": uploader_id, + "uploader_url": uploader_url, + } From 6d6f38bed8062ee754af342fbfddb91b004555d8 Mon Sep 17 00:00:00 2001 From: palewire Date: Sun, 7 Aug 2022 11:14:38 -0700 Subject: [PATCH 02/20] Cut geography --- youtube_dl/extractor/parler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index 8cd607659..e56607981 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -40,7 +40,6 @@ class ParlerIE(InfoExtractor): }, }, ] - _GEO_COUNTRIES = ["US"] # The site is only available in the US def _real_extract(self, url): video_id = self._match_id(url) From a6f8feef491dc4b0af0082b16788a1e28ef9c61a Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Mon, 8 Aug 2022 09:45:59 -0700 Subject: [PATCH 03/20] Update youtube_dl/extractor/parler.py Co-authored-by: dirkf --- youtube_dl/extractor/parler.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index e56607981..ecc44216c 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -5,7 +5,12 @@ from urllib import parse from .common import InfoExtractor -from ..utils import clean_html, unified_timestamp +from ..utils import ( + clean_html, + strip_or_none, + unified_timestamp, + urlencode_postdata, +) class ParlerIE(InfoExtractor): From b1bbb03e0b8f026e78748f6bfd822691522ddf13 Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Mon, 8 Aug 2022 09:46:26 -0700 Subject: [PATCH 04/20] Update youtube_dl/extractor/parler.py Co-authored-by: dirkf --- youtube_dl/extractor/parler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index ecc44216c..0b09c1955 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -16,7 +16,8 @@ from ..utils import ( class ParlerIE(InfoExtractor): """Extract videos from posts on Parler.""" - _VALID_URL = r"https://parler\.com/feed/(?P[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})" + _UUID_RE = r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}' + _VALID_URL = r'https://parler\.com/feed/(?P%s)' % (_UUID_RE, ) _TESTS = [ { "url": "https://parler.com/feed/df79fdba-07cc-48fe-b085-3293897520d7", From 1a82825b9c4ce8b88b3c5cf36742bbaba91e68ae Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Mon, 8 Aug 2022 09:46:46 -0700 Subject: [PATCH 05/20] Update youtube_dl/extractor/parler.py Co-authored-by: dirkf --- youtube_dl/extractor/parler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index 0b09c1955..ab7ddf319 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -51,7 +51,7 @@ class ParlerIE(InfoExtractor): video_id = self._match_id(url) # Get data from API - api_url = "https://parler.com/open-api/ParleyDetailEndpoint.php" + api_url = 'https://parler.com/open-api/ParleyDetailEndpoint.php' payload = parse.urlencode({"uuid": video_id}).encode() status = self._download_json(api_url, video_id, data=payload) From ff952d586b3302d706cbdc14de8c17f75990397d Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Mon, 8 Aug 2022 09:47:08 -0700 Subject: [PATCH 06/20] Update youtube_dl/extractor/parler.py Co-authored-by: dirkf --- youtube_dl/extractor/parler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index ab7ddf319..b3cd3ab85 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -56,9 +56,9 @@ class ParlerIE(InfoExtractor): status = self._download_json(api_url, video_id, data=payload) # Pull out video - data = status["data"][0]["primary"] - video = data["video_data"] - url = video["videoSrc"] + url = status['data'][0]['primary']['video_data']['videoSrc'] + # now we know this exists and is a dict + data = status['data'][0]['primary'] # Pull out metadata title = clean_html(data.get("full_body")).replace("\n", "") From b14b4b8af50c5395777671f63212220d3ce1d582 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 8 Aug 2022 11:01:58 -0700 Subject: [PATCH 07/20] Single quotes --- youtube_dl/extractor/parler.py | 40 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index e56607981..7cfd1c6cd 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -14,29 +14,29 @@ class ParlerIE(InfoExtractor): _VALID_URL = r"https://parler\.com/feed/(?P[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})" _TESTS = [ { - "url": "https://parler.com/feed/df79fdba-07cc-48fe-b085-3293897520d7", - "md5": "16e0f447bf186bb3cf64de5bbbf4d22d", - "info_dict": { - "id": "df79fdba-07cc-48fe-b085-3293897520d7", - "ext": "mp4", - "title": "Puberty-blocking procedures promoted by the Biden/Harris Admin are child abuse. The FDA has recently confirmed these hormones/drugs have extremely dangerous side effects, like brain swelling and vision loss.", - "timestamp": 1659744000, - "upload_date": "20220806", - "uploader": "Tulsi Gabbard", - "uploader_id": "TulsiGabbard", + 'url': 'https://parler.com/feed/df79fdba-07cc-48fe-b085-3293897520d7', + 'md5': '16e0f447bf186bb3cf64de5bbbf4d22d', + 'info_dict': { + 'id': 'df79fdba-07cc-48fe-b085-3293897520d7', + 'ext': 'mp4', + 'title': 'Puberty-blocking procedures promoted by the Biden/Harris Admin are child abuse. The FDA has recently confirmed these hormones/drugs have extremely dangerous side effects, like brain swelling and vision loss.', + 'timestamp': 1659744000, + 'upload_date': '20220806', + 'uploader': 'Tulsi Gabbard', + 'uploader_id': 'TulsiGabbard', }, }, { - "url": "https://parler.com/feed/a7406eb4-91e5-4793-b5e3-ade57a24e287", - "md5": "11687e2f5bb353682cee338d181422ed", - "info_dict": { - "id": "a7406eb4-91e5-4793-b5e3-ade57a24e287", - "ext": "mp4", - "title": "This man should run for office", - "timestamp": 1659657600, - "upload_date": "20220805", - "uploader": "Benny Johnson", - "uploader_id": "BennyJohnson", + 'url': 'https://parler.com/feed/a7406eb4-91e5-4793-b5e3-ade57a24e287', + 'md5': '11687e2f5bb353682cee338d181422ed', + 'info_dict': { + 'id': 'a7406eb4-91e5-4793-b5e3-ade57a24e287', + 'ext': 'mp4', + 'title': 'This man should run for office', + 'timestamp': 1659657600, + 'upload_date': '20220805', + 'uploader': 'Benny Johnson', + 'uploader_id': 'BennyJohnson', }, }, ] From 4b3bc81777bd57e26c2ce29800f76af7b8c5c12e Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Mon, 8 Aug 2022 11:03:00 -0700 Subject: [PATCH 08/20] Update youtube_dl/extractor/parler.py Co-authored-by: dirkf --- youtube_dl/extractor/parler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index 79888d8dd..267d9ff6b 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -63,8 +63,8 @@ class ParlerIE(InfoExtractor): # Pull out metadata title = clean_html(data.get("full_body")).replace("\n", "") timestamp = unified_timestamp(data.get("date_created")) - uploader = data.get("name") - uploader_id = data.get("username") + uploader = strip_or_none(data.get('name')) + uploader_id = strip_or_none(data.get('username')) uploader_url = "https://parler.com/" + uploader_id if uploader_id else None # Return the result From 40f370c358fc374bc42aa721c76c6611a60ea69b Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Mon, 8 Aug 2022 11:03:09 -0700 Subject: [PATCH 09/20] Update youtube_dl/extractor/parler.py Co-authored-by: dirkf --- youtube_dl/extractor/parler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index 267d9ff6b..b35ff6cbd 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -62,7 +62,7 @@ class ParlerIE(InfoExtractor): # Pull out metadata title = clean_html(data.get("full_body")).replace("\n", "") - timestamp = unified_timestamp(data.get("date_created")) + timestamp = unified_timestamp(data.get('date_created')) uploader = strip_or_none(data.get('name')) uploader_id = strip_or_none(data.get('username')) uploader_url = "https://parler.com/" + uploader_id if uploader_id else None From e46a2a58e200aa736eb5594c49b169848d2dbb78 Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Mon, 8 Aug 2022 11:03:18 -0700 Subject: [PATCH 10/20] Update youtube_dl/extractor/parler.py Co-authored-by: dirkf --- youtube_dl/extractor/parler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index b35ff6cbd..76033042a 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -65,7 +65,7 @@ class ParlerIE(InfoExtractor): timestamp = unified_timestamp(data.get('date_created')) uploader = strip_or_none(data.get('name')) uploader_id = strip_or_none(data.get('username')) - uploader_url = "https://parler.com/" + uploader_id if uploader_id else None + uploader_url = ('https://parler.com/' + uploader_id) if uploader_id else None # Return the result return { From 92e053f4d26c6db854472f90d1a5f6e32fa1438a Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 8 Aug 2022 11:04:11 -0700 Subject: [PATCH 11/20] Single quotes --- youtube_dl/extractor/parler.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index 79888d8dd..388af38dc 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -69,11 +69,11 @@ class ParlerIE(InfoExtractor): # Return the result return { - "id": video_id, - "url": url, - "title": title, - "timestamp": timestamp, - "uploader": uploader, - "uploader_id": uploader_id, - "uploader_url": uploader_url, + 'id': video_id, + 'url': url, + 'title': title, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, } From fec554563ad653c97acb05437ffcb3fe6bc39482 Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Mon, 8 Aug 2022 11:04:54 -0700 Subject: [PATCH 12/20] Update youtube_dl/extractor/parler.py Co-authored-by: dirkf --- youtube_dl/extractor/parler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index 05c408417..5217d71c8 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -52,7 +52,7 @@ class ParlerIE(InfoExtractor): # Get data from API api_url = 'https://parler.com/open-api/ParleyDetailEndpoint.php' - payload = parse.urlencode({"uuid": video_id}).encode() + payload = urlencode_postdata({'uuid': video_id}) status = self._download_json(api_url, video_id, data=payload) # Pull out video From 2e1c744be831d8410aee7963715d16bfcf006b05 Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Mon, 8 Aug 2022 11:05:07 -0700 Subject: [PATCH 13/20] Update youtube_dl/extractor/parler.py Co-authored-by: dirkf --- youtube_dl/extractor/parler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index 5217d71c8..b01f88e2e 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -61,7 +61,7 @@ class ParlerIE(InfoExtractor): data = status['data'][0]['primary'] # Pull out metadata - title = clean_html(data.get("full_body")).replace("\n", "") + title = clean_html(data.get('full_body')) or self.generic_title(url) timestamp = unified_timestamp(data.get('date_created')) uploader = strip_or_none(data.get('name')) uploader_id = strip_or_none(data.get('username')) From 871b60b117a10feb40e6f117e22a6736c4007d6f Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 8 Aug 2022 11:40:31 -0700 Subject: [PATCH 14/20] _generic_title --- youtube_dl/extractor/parler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index b01f88e2e..b22de186e 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -61,7 +61,7 @@ class ParlerIE(InfoExtractor): data = status['data'][0]['primary'] # Pull out metadata - title = clean_html(data.get('full_body')) or self.generic_title(url) + title = clean_html(data.get('full_body')) or self._generic_title(url) timestamp = unified_timestamp(data.get('date_created')) uploader = strip_or_none(data.get('name')) uploader_id = strip_or_none(data.get('username')) From 0ef718103f65062242fe8515cfe90f809fc5735f Mon Sep 17 00:00:00 2001 From: palewire Date: Tue, 9 Aug 2022 03:42:26 -0700 Subject: [PATCH 15/20] Shorter title --- youtube_dl/extractor/parler.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index b22de186e..efa8136d2 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -25,7 +25,8 @@ class ParlerIE(InfoExtractor): 'info_dict': { 'id': 'df79fdba-07cc-48fe-b085-3293897520d7', 'ext': 'mp4', - 'title': 'Puberty-blocking procedures promoted by the Biden/Harris Admin are child abuse. The FDA has recently confirmed these hormones/drugs have extremely dangerous side effects, like brain swelling and vision loss.', + 'title': '@TulsiGabbard-720', + 'description': 'Puberty-blocking procedures promoted by the Biden/Harris Admin are child abuse. The FDA has recently confirmed these hormones/drugs have extremely dangerous side effects, like brain swelling and vision loss.', 'timestamp': 1659744000, 'upload_date': '20220806', 'uploader': 'Tulsi Gabbard', @@ -38,7 +39,8 @@ class ParlerIE(InfoExtractor): 'info_dict': { 'id': 'a7406eb4-91e5-4793-b5e3-ade57a24e287', 'ext': 'mp4', - 'title': 'This man should run for office', + 'title': '@BennyJohnson-360', + 'description': 'This man should run for office', 'timestamp': 1659657600, 'upload_date': '20220805', 'uploader': 'Benny Johnson', @@ -61,17 +63,23 @@ class ParlerIE(InfoExtractor): data = status['data'][0]['primary'] # Pull out metadata - title = clean_html(data.get('full_body')) or self._generic_title(url) + description = strip_or_none(clean_html(data.get('full_body'))) timestamp = unified_timestamp(data.get('date_created')) uploader = strip_or_none(data.get('name')) uploader_id = strip_or_none(data.get('username')) uploader_url = ('https://parler.com/' + uploader_id) if uploader_id else None + # Keep the file name short so it doesn't exceed filesystem limits + title = self._generic_title(url) + if uploader_id: + title = '@%s-%s' % (uploader_id, title) + # Return the result return { 'id': video_id, 'url': url, 'title': title, + 'description': description, 'timestamp': timestamp, 'uploader': uploader, 'uploader_id': uploader_id, From c8686e7ccb09b4d16c01e74bcfbc7b16c5758cdf Mon Sep 17 00:00:00 2001 From: palewire Date: Tue, 9 Aug 2022 03:44:59 -0700 Subject: [PATCH 16/20] No @ --- youtube_dl/extractor/parler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index efa8136d2..96a955b4d 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -25,7 +25,7 @@ class ParlerIE(InfoExtractor): 'info_dict': { 'id': 'df79fdba-07cc-48fe-b085-3293897520d7', 'ext': 'mp4', - 'title': '@TulsiGabbard-720', + 'title': 'TulsiGabbard-720', 'description': 'Puberty-blocking procedures promoted by the Biden/Harris Admin are child abuse. The FDA has recently confirmed these hormones/drugs have extremely dangerous side effects, like brain swelling and vision loss.', 'timestamp': 1659744000, 'upload_date': '20220806', @@ -39,7 +39,7 @@ class ParlerIE(InfoExtractor): 'info_dict': { 'id': 'a7406eb4-91e5-4793-b5e3-ade57a24e287', 'ext': 'mp4', - 'title': '@BennyJohnson-360', + 'title': 'BennyJohnson-360', 'description': 'This man should run for office', 'timestamp': 1659657600, 'upload_date': '20220805', @@ -72,7 +72,7 @@ class ParlerIE(InfoExtractor): # Keep the file name short so it doesn't exceed filesystem limits title = self._generic_title(url) if uploader_id: - title = '@%s-%s' % (uploader_id, title) + title = '%s-%s' % (uploader_id, title) # Return the result return { From 18b7043b0cb28ba614293bac901be769c2c56a72 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 15 Aug 2022 11:58:04 -0700 Subject: [PATCH 17/20] Backport from yt-dlp --- youtube_dl/extractor/parler.py | 74 +++++++++++++++------------------- 1 file changed, 32 insertions(+), 42 deletions(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index 96a955b4d..e00562e08 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -1,12 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -from urllib import parse - from .common import InfoExtractor - from ..utils import ( clean_html, + int_or_none, strip_or_none, unified_timestamp, urlencode_postdata, @@ -14,10 +12,8 @@ from ..utils import ( class ParlerIE(InfoExtractor): - """Extract videos from posts on Parler.""" - - _UUID_RE = r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}' - _VALID_URL = r'https://parler\.com/feed/(?P%s)' % (_UUID_RE, ) + IE_DESC = 'Posts on parler.com' + _VALID_URL = r'https://parler\.com/feed/(?P[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})' _TESTS = [ { 'url': 'https://parler.com/feed/df79fdba-07cc-48fe-b085-3293897520d7', @@ -25,12 +21,17 @@ class ParlerIE(InfoExtractor): 'info_dict': { 'id': 'df79fdba-07cc-48fe-b085-3293897520d7', 'ext': 'mp4', - 'title': 'TulsiGabbard-720', - 'description': 'Puberty-blocking procedures promoted by the Biden/Harris Admin are child abuse. The FDA has recently confirmed these hormones/drugs have extremely dangerous side effects, like brain swelling and vision loss.', + 'thumbnail': 'https://bl-images.parler.com/videos/6ce7cdf3-a27a-4d72-bf9c-d3e17ce39a66/thumbnail.jpeg', + 'title': 'Parler video #df79fdba-07cc-48fe-b085-3293897520d7', + 'description': 'md5:6f220bde2df4a97cbb89ac11f1fd8197', 'timestamp': 1659744000, 'upload_date': '20220806', 'uploader': 'Tulsi Gabbard', 'uploader_id': 'TulsiGabbard', + 'uploader_url': 'https://parler.com/TulsiGabbard', + 'view_count': int, + 'comment_count': int, + 'repost_count': int, }, }, { @@ -39,49 +40,38 @@ class ParlerIE(InfoExtractor): 'info_dict': { 'id': 'a7406eb4-91e5-4793-b5e3-ade57a24e287', 'ext': 'mp4', - 'title': 'BennyJohnson-360', + 'thumbnail': 'https://bl-images.parler.com/videos/317827a8-1e48-4cbc-981f-7dd17d4c1183/thumbnail.jpeg', + 'title': 'Parler video #a7406eb4-91e5-4793-b5e3-ade57a24e287', 'description': 'This man should run for office', 'timestamp': 1659657600, 'upload_date': '20220805', 'uploader': 'Benny Johnson', 'uploader_id': 'BennyJohnson', + 'uploader_url': 'https://parler.com/BennyJohnson', + 'view_count': int, + 'comment_count': int, + 'repost_count': int, }, }, ] def _real_extract(self, url): video_id = self._match_id(url) - - # Get data from API - api_url = 'https://parler.com/open-api/ParleyDetailEndpoint.php' - payload = urlencode_postdata({'uuid': video_id}) - status = self._download_json(api_url, video_id, data=payload) - - # Pull out video - url = status['data'][0]['primary']['video_data']['videoSrc'] - # now we know this exists and is a dict - data = status['data'][0]['primary'] - - # Pull out metadata - description = strip_or_none(clean_html(data.get('full_body'))) - timestamp = unified_timestamp(data.get('date_created')) - uploader = strip_or_none(data.get('name')) - uploader_id = strip_or_none(data.get('username')) - uploader_url = ('https://parler.com/' + uploader_id) if uploader_id else None - - # Keep the file name short so it doesn't exceed filesystem limits - title = self._generic_title(url) - if uploader_id: - title = '%s-%s' % (uploader_id, title) - - # Return the result + data = self._download_json( + 'https://parler.com/open-api/ParleyDetailEndpoint.php', video_id, + data=urlencode_postdata({'uuid': video_id}))['data'][0] + primary = data['primary'] return { 'id': video_id, - 'url': url, - 'title': title, - 'description': description, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'uploader_url': uploader_url, - } + 'url': primary['video_data']['videoSrc'], + 'thumbnail': primary['video_data']['thumbnailUrl'], + 'title': "Parler video #%s" % video_id, + 'description': strip_or_none(clean_html(primary.get('full_body'))) or None, + 'timestamp': unified_timestamp(primary.get('date_created')), + 'uploader': strip_or_none(primary.get('name')), + 'uploader_id': strip_or_none(primary.get('username')), + 'uploader_url': 'https://parler.com/%s' % strip_or_none(primary.get('username')), + 'view_count': int_or_none(primary.get('view_count')), + 'comment_count': int_or_none(data['engagement']['commentCount']), + 'repost_count': int_or_none(data['engagement']['echoCount']), + } \ No newline at end of file From d75cf3ade009cc48176bb126bd725ed2bb6f6b4e Mon Sep 17 00:00:00 2001 From: palewire Date: Tue, 23 Aug 2022 10:45:31 -0700 Subject: [PATCH 18/20] Worked youtube downloader back in --- youtube_dl/extractor/parler.py | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index e00562e08..0dce52f3f 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .youtube import YoutubeIE from ..utils import ( clean_html, int_or_none, @@ -53,6 +54,36 @@ class ParlerIE(InfoExtractor): 'repost_count': int, }, }, + { + 'url': 'https://parler.com/feed/f23b85c1-6558-470f-b9ff-02c145f28da5', + 'md5': 'eaba1ff4a10fe281f5ce74e930ab2cb4', + 'info_dict': { + 'id': 'r5vkSaz8PxQ', + 'ext': 'mp4', + 'thumbnail': 'https://i.ytimg.com/vi_webp/r5vkSaz8PxQ/maxresdefault.webp', + 'title': 'Tom MacDonald Names Reaction', + 'description': 'md5:33c21f0d35ae6dc2edf3007d6696baea', + 'upload_date': '20220716', + 'duration': 1267, + 'uploader': 'Mahesh Chookolingo', + 'uploader_id': 'maheshchookolingo', + 'uploader_url': 'http://www.youtube.com/user/maheshchookolingo', + 'channel': 'Mahesh Chookolingo', + 'channel_id': 'UCox6YeMSY1PQInbCtTaZj_w', + 'channel_url': 'https://www.youtube.com/channel/UCox6YeMSY1PQInbCtTaZj_w', + 'categories': ['Entertainment'], + 'tags': list, + 'availability': 'public', + 'live_status': 'not_live', + 'view_count': int, + 'comment_count': int, + 'like_count': int, + 'channel_follower_count': int, + 'age_limit': 0, + 'playable_in_embed': True, + }, + 'add_ie': ['Youtube'], + }, ] def _real_extract(self, url): @@ -61,6 +92,11 @@ class ParlerIE(InfoExtractor): 'https://parler.com/open-api/ParleyDetailEndpoint.php', video_id, data=urlencode_postdata({'uuid': video_id}))['data'][0] primary = data['primary'] + + embed = self._parse_json(primary.get('V2LINKLONG') or '', video_id, fatal=False) + if embed: + return self.url_result(embed[0], YoutubeIE.ie_key()) + return { 'id': video_id, 'url': primary['video_data']['videoSrc'], From 54a1f498f1daa71c319583cced7a8d0ef59fc5c3 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 30 Oct 2022 21:07:50 +0000 Subject: [PATCH 19/20] Ensure primary is a dict Test commit to provoke CI tests --- youtube_dl/extractor/parler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index 0dce52f3f..e16bcf749 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -7,6 +7,7 @@ from ..utils import ( clean_html, int_or_none, strip_or_none, + try_get, unified_timestamp, urlencode_postdata, ) @@ -91,7 +92,7 @@ class ParlerIE(InfoExtractor): data = self._download_json( 'https://parler.com/open-api/ParleyDetailEndpoint.php', video_id, data=urlencode_postdata({'uuid': video_id}))['data'][0] - primary = data['primary'] + primary = try_get(data, lambda x: x['primary'], dict) or {} embed = self._parse_json(primary.get('V2LINKLONG') or '', video_id, fatal=False) if embed: From 481ad85994e4234aaca3541972e0b6fe5c05eb28 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 30 Oct 2022 21:10:32 +0000 Subject: [PATCH 20/20] Newline at end --- youtube_dl/extractor/parler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/parler.py b/youtube_dl/extractor/parler.py index e16bcf749..4a1891384 100644 --- a/youtube_dl/extractor/parler.py +++ b/youtube_dl/extractor/parler.py @@ -111,4 +111,4 @@ class ParlerIE(InfoExtractor): 'view_count': int_or_none(primary.get('view_count')), 'comment_count': int_or_none(data['engagement']['commentCount']), 'repost_count': int_or_none(data['engagement']['echoCount']), - } \ No newline at end of file + }