From 27527397e47fc9ea6ca4984fed5721c044222a8a Mon Sep 17 00:00:00 2001 From: palewire Date: Wed, 10 Aug 2022 04:54:11 -0700 Subject: [PATCH] Clean it up --- youtube_dl/extractor/truth.py | 63 +++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/truth.py b/youtube_dl/extractor/truth.py index c1d83b2ee..5cbfd8eec 100644 --- a/youtube_dl/extractor/truth.py +++ b/youtube_dl/extractor/truth.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( @@ -14,7 +11,7 @@ from ..utils import ( class TruthIE(InfoExtractor): """Extract videos from posts on Donald Trump's truthsocial.com.""" - _VALID_URL = r'https://truthsocial\.com/@[^/]+/posts/(?P[0-9]+)' + _VALID_URL = r'https://truthsocial\.com/@[^/]+/posts/(?P[\d]+)' _TESTS = [ { 'url': 'https://truthsocial.com/@realDonaldTrump/posts/108779000807761862', @@ -22,11 +19,15 @@ class TruthIE(InfoExtractor): 'info_dict': { 'id': '108779000807761862', 'ext': 'qt', - 'title': 'realDonaldTrump-0d8691160c73d663', + 'title': 'Donald J. Trump - 0d8691160c73d663', 'timestamp': 1659835827, 'upload_date': '20220807', 'uploader': 'Donald J. Trump', 'uploader_id': 'realDonaldTrump', + 'uploader_url': 'https://truthsocial.com/@realDonaldTrump', + 'repost_count': int, + 'comment_count': int, + 'like_count': int, }, }, { @@ -35,54 +36,58 @@ class TruthIE(InfoExtractor): 'info_dict': { 'id': '108618228543962049', 'ext': 'mp4', - 'title': 'ProjectVeritasAction-6e24b75a4604b594', - 'description': """RETRACTO #368: Utah NPR Affiliate RETRACTS False Claim Live On Air Following Veritas' Reporting on Curtis Campaign \n“Nothing I ever do will suffice for these people. They are engaged in conspiracy theories. They are doing precisely the thing they project that I do. Which is they don’t believe in facts, they don’t believe in logic, and they don’t believe in rationality.” - James O’Keefe""", + 'title': 'md5:48813a16498d21b07edf24e1af621e83', + 'description': 'md5:e070ba6bcf6165957e26a7a94ef6d975', 'timestamp': 1657382637, 'upload_date': '20220709', 'uploader': 'Project Veritas Action', 'uploader_id': 'ProjectVeritasAction', + 'uploader_url': 'https://truthsocial.com/@ProjectVeritasAction', + 'repost_count': int, + 'comment_count': int, + 'like_count': int, }, }, ] _GEO_COUNTRIES = ['US'] # The site is only available in the US def _real_extract(self, url): - video_id = self._match_id(url) - # Get data from API - api_url = 'https://truthsocial.com/api/v1/statuses/' + video_id - status = self._download_json(api_url, video_id) + video_id = self._match_id(url) + status = self._download_json( + 'https://truthsocial.com/api/v1/statuses/' + video_id, + video_id + ) # Pull out video url = status['media_attachments'][0]['url'] - # Pull out metadata - description = strip_or_none(clean_html(status.get('content'))) - timestamp = unified_timestamp(status.get('created_at')) + # Return the stuff account = status.get('account') or {} uploader = strip_or_none(account.get('display_name')) uploader_id = strip_or_none(account.get('username')) - uploader_url = ('https://truthsocial.com/@' + uploader_id) if uploader_id else None - repost_count = int_or_none(status.get('reblogs_count')) - like_count = int_or_none(status.get('favourites_count')) - comment_count = int_or_none(status.get('replies_count')) + post = strip_or_none(clean_html(status.get('content'))) - # Keep the file name short so it doesn't exceed filesystem limits - title = self._generic_title(url) - if uploader_id: - title = '%s-%s' % (uploader_id, title) + # Set the title, handling case where its too long or empty + if len(post) > 40: + title = post[:35] + "[...]" + elif len(post) == 0: + title = self._generic_title(url) + else: + title = post + if uploader: + title = '%s - %s' % (uploader, title) - # Return the stuff return { 'id': video_id, 'url': url, 'title': title, - 'description': description, - 'timestamp': timestamp, + 'description': post, + 'timestamp': unified_timestamp(status.get('created_at')), 'uploader': uploader, 'uploader_id': uploader_id, - 'uploader_url': uploader_url, - 'repost_count': repost_count, - 'like_count': like_count, - 'comment_count': comment_count, + 'uploader_url': ('https://truthsocial.com/@' + uploader_id) if uploader_id else None, + 'repost_count': int_or_none(status.get('reblogs_count')), + 'like_count': int_or_none(status.get('favourites_count')), + 'comment_count': int_or_none(status.get('replies_count')), }