updated extractor/hentaistigma.py

modified way of getting video_url to get all formats/version i hope it still works though
2024-10-05 00:35:45 +02:00 · 2024-10-05 00:35:45 +02:00 · 105d32b58c
commit 105d32b58c
parent 653928b002
1 changed files with 67 additions and 6 deletions
--- a/youtube_dl/extractor/hentaistigma.py
+++ b/youtube_dl/extractor/hentaistigma.py
@ -2,6 +2,12 @@ from __future__ import unicode_literals

 from .common import InfoExtractor

+##IFixThat additional imports (I insert them into every Extractor that I modify - can be removed if not needed)
+import datetime
+import os
+import re
+##IFixThat_end
+

 class HentaiStigmaIE(InfoExtractor):
    _VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<id>[^/]+)'
@ -16,6 +22,35 @@ class HentaiStigmaIE(InfoExtractor):
        }
    }

+    ##IFixThat general helper functions (I insert them into every Extractor that I modify - can be removed if not needed)
+
+    def _ifixthat_helper_file_exists(self,filename):
+        print('does '+filename+' exist?')
+        if os.path.exists(filename):
+            print('yes')
+            return True
+        else:
+            print('no')
+            return False
+
+    def _ifixthat_helper_file_write(self,filename, content):
+        if self._ifixthat_helper_file_exists(filename):
+            print('backing up previous '+filename)
+            os.rename(filename, filename+'.backup_'+datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
+        print('writing file')
+        myfile = open(filename, "wt")
+        myfile.write(content)
+        myfile.close()
+
+    def _ifixthat_helper_file_read(self,filename):
+        if self._ifixthat_helper_file_exists(filename):
+            myfile = open(filename, "rt")
+            return myfile.read()
+        else:
+            return ''
+
+    ##IFixThat_end
+
    def _real_extract(self, url):
        video_id = self._match_id(url)

@ -24,16 +59,42 @@ class HentaiStigmaIE(InfoExtractor):
        title = self._html_search_regex(
            r'<h2[^>]+class="posttitle"[^>]*><a[^>]*>([^<]+)</a>',
            webpage, 'title')
-        wrap_url = self._html_search_regex(
-            r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url')
-        wrap_webpage = self._download_webpage(wrap_url, video_id)
+
+        ##IFixThat replacing 1st-iframe-search with all iframe-search
+
+        #wrap_url = self._html_search_regex(
+        #    r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url')
+        #wrap_webpage = self._download_webpage(wrap_url, video_id)
+        #video_url = self._html_search_regex(
+        #    r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
+
+        ##IFixThat >REPLACE<
+
+        # foreach iframe do get source-src
+        formats = []
+        #print('-----------------------------------------------------------------------------------')
+        mymatches = re.findall(r'<b> (SUB|RAW)</b></span><br/><iframe[^>]+src="([^"]+mp4)"', webpage)
+        #print(mymatches)
+        #print('-----------------------------------------------------------------------------------')
+        for mymatch in mymatches:
+            print('"'+mymatch[0]+'" : '+mymatch[1])
+            wrap_webpage = self._download_webpage(mymatch[1], video_id)

            video_url = self._html_search_regex(
-            r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
+                #r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
+                r'<source[^>]+src="([^"]+mp4)"[^>]+type=\'video/mp4\'>', wrap_webpage, 'video url')
+                #e.g. <source src="https://v2.animestigma.com/videos/hd1/Inyouchuu_Etsu_-_Bonusssub.mp4" type="video/mp4">
+            formats.append({
+                    'url': video_url,
+                    'format_id': 'mp4-'+mymatch[0],
+                })
+
+        ##IFixThat_end

        return {
            'id': video_id,
-            'url': video_url,
+            ##IFixThat replace url with formats
+            'formats': formats,
            'title': title,
            'age_limit': 18,
        }