Clean it up

This commit is contained in:
palewire 2022-08-10 04:54:11 -07:00
parent 789a929349
commit 27527397e4
No known key found for this signature in database
GPG Key ID: A5AD4A9AD42D69AB

View File

@ -1,6 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -14,7 +11,7 @@ from ..utils import (
class TruthIE(InfoExtractor): class TruthIE(InfoExtractor):
"""Extract videos from posts on Donald Trump's truthsocial.com.""" """Extract videos from posts on Donald Trump's truthsocial.com."""
_VALID_URL = r'https://truthsocial\.com/@[^/]+/posts/(?P<id>[0-9]+)' _VALID_URL = r'https://truthsocial\.com/@[^/]+/posts/(?P<id>[\d]+)'
_TESTS = [ _TESTS = [
{ {
'url': 'https://truthsocial.com/@realDonaldTrump/posts/108779000807761862', 'url': 'https://truthsocial.com/@realDonaldTrump/posts/108779000807761862',
@ -22,11 +19,15 @@ class TruthIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '108779000807761862', 'id': '108779000807761862',
'ext': 'qt', 'ext': 'qt',
'title': 'realDonaldTrump-0d8691160c73d663', 'title': 'Donald J. Trump - 0d8691160c73d663',
'timestamp': 1659835827, 'timestamp': 1659835827,
'upload_date': '20220807', 'upload_date': '20220807',
'uploader': 'Donald J. Trump', 'uploader': 'Donald J. Trump',
'uploader_id': 'realDonaldTrump', 'uploader_id': 'realDonaldTrump',
'uploader_url': 'https://truthsocial.com/@realDonaldTrump',
'repost_count': int,
'comment_count': int,
'like_count': int,
}, },
}, },
{ {
@ -35,54 +36,58 @@ class TruthIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '108618228543962049', 'id': '108618228543962049',
'ext': 'mp4', 'ext': 'mp4',
'title': 'ProjectVeritasAction-6e24b75a4604b594', 'title': 'md5:48813a16498d21b07edf24e1af621e83',
'description': """RETRACTO #368: Utah NPR Affiliate RETRACTS False Claim Live On Air Following Veritas' Reporting on Curtis Campaign \n“Nothing I ever do will suffice for these people. They are engaged in conspiracy theories. They are doing precisely the thing they project that I do. Which is they dont believe in facts, they dont believe in logic, and they dont believe in rationality.” - James OKeefe""", 'description': 'md5:e070ba6bcf6165957e26a7a94ef6d975',
'timestamp': 1657382637, 'timestamp': 1657382637,
'upload_date': '20220709', 'upload_date': '20220709',
'uploader': 'Project Veritas Action', 'uploader': 'Project Veritas Action',
'uploader_id': 'ProjectVeritasAction', 'uploader_id': 'ProjectVeritasAction',
'uploader_url': 'https://truthsocial.com/@ProjectVeritasAction',
'repost_count': int,
'comment_count': int,
'like_count': int,
}, },
}, },
] ]
_GEO_COUNTRIES = ['US'] # The site is only available in the US _GEO_COUNTRIES = ['US'] # The site is only available in the US
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url)
# Get data from API # Get data from API
api_url = 'https://truthsocial.com/api/v1/statuses/' + video_id video_id = self._match_id(url)
status = self._download_json(api_url, video_id) status = self._download_json(
'https://truthsocial.com/api/v1/statuses/' + video_id,
video_id
)
# Pull out video # Pull out video
url = status['media_attachments'][0]['url'] url = status['media_attachments'][0]['url']
# Pull out metadata # Return the stuff
description = strip_or_none(clean_html(status.get('content')))
timestamp = unified_timestamp(status.get('created_at'))
account = status.get('account') or {} account = status.get('account') or {}
uploader = strip_or_none(account.get('display_name')) uploader = strip_or_none(account.get('display_name'))
uploader_id = strip_or_none(account.get('username')) uploader_id = strip_or_none(account.get('username'))
uploader_url = ('https://truthsocial.com/@' + uploader_id) if uploader_id else None post = strip_or_none(clean_html(status.get('content')))
repost_count = int_or_none(status.get('reblogs_count'))
like_count = int_or_none(status.get('favourites_count'))
comment_count = int_or_none(status.get('replies_count'))
# Keep the file name short so it doesn't exceed filesystem limits # Set the title, handling case where its too long or empty
title = self._generic_title(url) if len(post) > 40:
if uploader_id: title = post[:35] + "[...]"
title = '%s-%s' % (uploader_id, title) elif len(post) == 0:
title = self._generic_title(url)
else:
title = post
if uploader:
title = '%s - %s' % (uploader, title)
# Return the stuff
return { return {
'id': video_id, 'id': video_id,
'url': url, 'url': url,
'title': title, 'title': title,
'description': description, 'description': post,
'timestamp': timestamp, 'timestamp': unified_timestamp(status.get('created_at')),
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'uploader_url': uploader_url, 'uploader_url': ('https://truthsocial.com/@' + uploader_id) if uploader_id else None,
'repost_count': repost_count, 'repost_count': int_or_none(status.get('reblogs_count')),
'like_count': like_count, 'like_count': int_or_none(status.get('favourites_count')),
'comment_count': comment_count, 'comment_count': int_or_none(status.get('replies_count')),
} }