From 8b2b336337b83048648ca6810ac80656d44777a5 Mon Sep 17 00:00:00 2001 From: upintheairsheep <43690204+upintheairsheep@users.noreply.github.com> Date: Tue, 21 Feb 2023 09:07:14 -0800 Subject: [PATCH 1/7] Add GooglePhotosIE from tokune's ytdl fork --- youtube_dl/extractor/extractors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3a87f9e33..e52fc70cf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -452,6 +452,7 @@ from .go import GoIE from .godtube import GodTubeIE from .golem import GolemIE from .googledrive import GoogleDriveIE +from .googlephotos import GooglePhotosIE from .googlepodcasts import ( GooglePodcastsIE, GooglePodcastsFeedIE, From 0842d4d7d7191fbf8559b250bef9229a49be9713 Mon Sep 17 00:00:00 2001 From: upintheairsheep <43690204+upintheairsheep@users.noreply.github.com> Date: Tue, 21 Feb 2023 09:08:36 -0800 Subject: [PATCH 2/7] Add the actual extractor --- youtube_dl/extractor/googlephotos.py | 68 ++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 youtube_dl/extractor/googlephotos.py diff --git a/youtube_dl/extractor/googlephotos.py b/youtube_dl/extractor/googlephotos.py new file mode 100644 index 000000000..699d41d3d --- /dev/null +++ b/youtube_dl/extractor/googlephotos.py @@ -0,0 +1,68 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class GooglePhotosIE(InfoExtractor): + _VALID_URL = r'https?://photos\.google\.com/share/(.+?)/photo/(.+?)key=(?P.*)' + _TEST = { + 'url': 'https://photos.google.com/share/AF1QipO9WO5MnYm7850JgwAl7DIvRzbCoEcJamtywXL-oQ49rwF3K1frOSK63fjYD5MD-A/photo/AF1QipPRvvdy6-3EOqSACtJb7Q8QfmlXN4d4MwX5ico8?key=ZEV4S3RmYXd0bWNzQjRfQ09KQlBud1M4OUU1RzZn', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': 'ZEV4S3RmYXd0bWNzQjRfQ09KQlBud1M4OUU1RzZn', + 'ext': 'mp4', + 'title': 'GooglePhotosVideo', + } + } + + _formats = { + '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, + '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, + '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, + '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'}, + + '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'}, + '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'}, + '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'}, + '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, + '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, + '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'}, + '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'}, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + dash_formats = {} + formats = [] + dash_mpd_fatal = True + + dash_link = self._html_search_regex(r'data-url="(.+?)"', webpage, '') + mpd_url = self._download_webpage(dash_link + '=mm,dash?alr=true', video_id) + + for df in self._extract_mpd_formats( + mpd_url, video_id, fatal=dash_mpd_fatal, + formats_dict=self._formats): + if df['format_id'] not in dash_formats: + dash_formats[df['format_id']] = df + + if dash_formats: + formats = [f for f in formats if f['format_id'] not in dash_formats.keys()] + formats.extend(dash_formats.values()) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': 'GooglePhotosVideo', + 'formats': formats, + } From b44a9afcfc9480abbdbe7edcfca3fc9d456e9446 Mon Sep 17 00:00:00 2001 From: upintheairsheep <43690204+upintheairsheep@users.noreply.github.com> Date: Wed, 22 Feb 2023 09:28:03 -0800 Subject: [PATCH 3/7] Update youtube_dl/extractor/googlephotos.py Co-authored-by: dirkf --- youtube_dl/extractor/googlephotos.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/googlephotos.py b/youtube_dl/extractor/googlephotos.py index 699d41d3d..73a453e97 100644 --- a/youtube_dl/extractor/googlephotos.py +++ b/youtube_dl/extractor/googlephotos.py @@ -52,8 +52,7 @@ class GooglePhotosIE(InfoExtractor): for df in self._extract_mpd_formats( mpd_url, video_id, fatal=dash_mpd_fatal, formats_dict=self._formats): - if df['format_id'] not in dash_formats: - dash_formats[df['format_id']] = df + dash_formats.setdefault(df['format_id'], df) if dash_formats: formats = [f for f in formats if f['format_id'] not in dash_formats.keys()] From f6c7cda93afd554325f0f146c71f8fae8f8ea616 Mon Sep 17 00:00:00 2001 From: upintheairsheep <43690204+upintheairsheep@users.noreply.github.com> Date: Wed, 22 Feb 2023 09:28:18 -0800 Subject: [PATCH 4/7] Update youtube_dl/extractor/googlephotos.py thanks again Co-authored-by: dirkf --- youtube_dl/extractor/googlephotos.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/googlephotos.py b/youtube_dl/extractor/googlephotos.py index 73a453e97..c923622db 100644 --- a/youtube_dl/extractor/googlephotos.py +++ b/youtube_dl/extractor/googlephotos.py @@ -54,9 +54,6 @@ class GooglePhotosIE(InfoExtractor): formats_dict=self._formats): dash_formats.setdefault(df['format_id'], df) - if dash_formats: - formats = [f for f in formats if f['format_id'] not in dash_formats.keys()] - formats.extend(dash_formats.values()) self._sort_formats(formats) From 8ec63c3f81eab2e979c03cfed2c6fd6f1bd14ba9 Mon Sep 17 00:00:00 2001 From: upintheairsheep <43690204+upintheairsheep@users.noreply.github.com> Date: Wed, 22 Feb 2023 09:28:30 -0800 Subject: [PATCH 5/7] Update youtube_dl/extractor/googlephotos.py Co-authored-by: dirkf --- youtube_dl/extractor/googlephotos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/googlephotos.py b/youtube_dl/extractor/googlephotos.py index c923622db..9e6aeb980 100644 --- a/youtube_dl/extractor/googlephotos.py +++ b/youtube_dl/extractor/googlephotos.py @@ -46,7 +46,7 @@ class GooglePhotosIE(InfoExtractor): formats = [] dash_mpd_fatal = True - dash_link = self._html_search_regex(r'data-url="(.+?)"', webpage, '') + dash_link = self._search_regex(r'''data-url\s*=\s*('|")(?P(?:(?!\1).)+)''', webpage, group='link') mpd_url = self._download_webpage(dash_link + '=mm,dash?alr=true', video_id) for df in self._extract_mpd_formats( From ff7876afecc78002e2211413af5230e1e0acd1c1 Mon Sep 17 00:00:00 2001 From: upintheairsheep <43690204+upintheairsheep@users.noreply.github.com> Date: Wed, 22 Feb 2023 09:37:49 -0800 Subject: [PATCH 6/7] Fix 404 --- youtube_dl/extractor/googlephotos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/googlephotos.py b/youtube_dl/extractor/googlephotos.py index 9e6aeb980..f26280527 100644 --- a/youtube_dl/extractor/googlephotos.py +++ b/youtube_dl/extractor/googlephotos.py @@ -7,10 +7,10 @@ from .common import InfoExtractor class GooglePhotosIE(InfoExtractor): _VALID_URL = r'https?://photos\.google\.com/share/(.+?)/photo/(.+?)key=(?P.*)' _TEST = { - 'url': 'https://photos.google.com/share/AF1QipO9WO5MnYm7850JgwAl7DIvRzbCoEcJamtywXL-oQ49rwF3K1frOSK63fjYD5MD-A/photo/AF1QipPRvvdy6-3EOqSACtJb7Q8QfmlXN4d4MwX5ico8?key=ZEV4S3RmYXd0bWNzQjRfQ09KQlBud1M4OUU1RzZn', + 'url': 'https://photos.google.com/share/AF1QipO4IcvSjf_niq1icqPYPBK50FAsKWniuyVY7Mx8sMIDKZGb71hkUi6ZK9hgIFX-mQ/photo/AF1QipNewPmRaMZquiCgyNtz4McqeLBdkXLugNB3ov6_?key=RUhSeEVVajdhcTVic3o2Wk1URWlVZEtRdnRoaTl3', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { - 'id': 'ZEV4S3RmYXd0bWNzQjRfQ09KQlBud1M4OUU1RzZn', + 'id': 'AF1QipNewPmRaMZquiCgyNtz4McqeLBdkXLugNB3ov6_', 'ext': 'mp4', 'title': 'GooglePhotosVideo', } From 42111f3a1768f666ad61c8ff4225548577be6764 Mon Sep 17 00:00:00 2001 From: upintheairsheep <43690204+upintheairsheep@users.noreply.github.com> Date: Thu, 9 Mar 2023 12:43:44 -0800 Subject: [PATCH 7/7] Update googlephotos.py --- youtube_dl/extractor/googlephotos.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/youtube_dl/extractor/googlephotos.py b/youtube_dl/extractor/googlephotos.py index f26280527..6d0793d64 100644 --- a/youtube_dl/extractor/googlephotos.py +++ b/youtube_dl/extractor/googlephotos.py @@ -16,28 +16,7 @@ class GooglePhotosIE(InfoExtractor): } } - _formats = { - '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, - '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, - '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, - '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'}, - - '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'}, - '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'}, - '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'}, - '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, - '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, - '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'}, - '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'}, - } + _formats = YoutubeIE._formats def _real_extract(self, url): video_id = self._match_id(url)