Clean it up

2022-08-10 04:54:11 -07:00 · 2022-08-10 04:54:11 -07:00 · 27527397e4
commit 27527397e4
parent 789a929349
1 changed files with 34 additions and 29 deletions
--- a/youtube_dl/extractor/truth.py
+++ b/youtube_dl/extractor/truth.py
@ -1,6 +1,3 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
@ -14,7 +11,7 @@ from ..utils import (
 class TruthIE(InfoExtractor):
    """Extract videos from posts on Donald Trump's truthsocial.com."""
-    _VALID_URL = r'https://truthsocial\.com/@[^/]+/posts/(?P<id>[0-9]+)'
+    _VALID_URL = r'https://truthsocial\.com/@[^/]+/posts/(?P<id>[\d]+)'
    _TESTS = [
        {
            'url': 'https://truthsocial.com/@realDonaldTrump/posts/108779000807761862',
@ -22,11 +19,15 @@ class TruthIE(InfoExtractor):
            'info_dict': {
                'id': '108779000807761862',
                'ext': 'qt',
-                'title': 'realDonaldTrump-0d8691160c73d663',
+                'title': 'Donald J. Trump - 0d8691160c73d663',
                'timestamp': 1659835827,
                'upload_date': '20220807',
                'uploader': 'Donald J. Trump',
                'uploader_id': 'realDonaldTrump',
                'uploader_url': 'https://truthsocial.com/@realDonaldTrump',
                'repost_count': int,
                'comment_count': int,
                'like_count': int,
            },
        },
        {
@ -35,54 +36,58 @@ class TruthIE(InfoExtractor):
            'info_dict': {
                'id': '108618228543962049',
                'ext': 'mp4',
-                'title': 'ProjectVeritasAction-6e24b75a4604b594',
+                'title': 'md5:48813a16498d21b07edf24e1af621e83',
-                'description': """RETRACTO #368: Utah NPR Affiliate RETRACTS False Claim Live On Air Following Veritas' Reporting on Curtis Campaign  \n“Nothing I ever do will suffice for these people. They are engaged in conspiracy theories. They are doing precisely the thing they project that I do. Which is they don’t believe in facts, they don’t believe in logic, and they don’t believe in rationality.” - James O’Keefe""",
+                'description': 'md5:e070ba6bcf6165957e26a7a94ef6d975',
                'timestamp': 1657382637,
                'upload_date': '20220709',
                'uploader': 'Project Veritas Action',
                'uploader_id': 'ProjectVeritasAction',
                'uploader_url': 'https://truthsocial.com/@ProjectVeritasAction',
                'repost_count': int,
                'comment_count': int,
                'like_count': int,
            },
        },
    ]
    _GEO_COUNTRIES = ['US']  # The site is only available in the US
    def _real_extract(self, url):
        video_id = self._match_id(url)
        # Get data from API
-        api_url = 'https://truthsocial.com/api/v1/statuses/' + video_id
+        video_id = self._match_id(url)
-        status = self._download_json(api_url, video_id)
+        status = self._download_json(
            'https://truthsocial.com/api/v1/statuses/' + video_id,
            video_id
        )
        # Pull out video
        url = status['media_attachments'][0]['url']
-        # Pull out metadata
+        # Return the stuff
        description = strip_or_none(clean_html(status.get('content')))
        timestamp = unified_timestamp(status.get('created_at'))
        account = status.get('account') or {}
        uploader = strip_or_none(account.get('display_name'))
        uploader_id = strip_or_none(account.get('username'))
-        uploader_url = ('https://truthsocial.com/@' + uploader_id) if uploader_id else None
+        post = strip_or_none(clean_html(status.get('content')))
        repost_count = int_or_none(status.get('reblogs_count'))
        like_count = int_or_none(status.get('favourites_count'))
        comment_count = int_or_none(status.get('replies_count'))
-        # Keep the file name short so it doesn't exceed filesystem limits
+        # Set the title, handling case where its too long or empty
-        title = self._generic_title(url)
+        if len(post) > 40:
-        if uploader_id:
+            title = post[:35] + "[...]"
-            title = '%s-%s' % (uploader_id, title)
+        elif len(post) == 0:
            title = self._generic_title(url)
        else:
            title = post
        if uploader:
            title = '%s - %s' % (uploader, title)
        # Return the stuff
        return {
            'id': video_id,
            'url': url,
            'title': title,
-            'description': description,
+            'description': post,
-            'timestamp': timestamp,
+            'timestamp': unified_timestamp(status.get('created_at')),
            'uploader': uploader,
            'uploader_id': uploader_id,
-            'uploader_url': uploader_url,
+            'uploader_url': ('https://truthsocial.com/@' + uploader_id) if uploader_id else None,
-            'repost_count': repost_count,
+            'repost_count': int_or_none(status.get('reblogs_count')),
-            'like_count': like_count,
+            'like_count': int_or_none(status.get('favourites_count')),
-            'comment_count': comment_count,
+            'comment_count': int_or_none(status.get('replies_count')),
        }