diff --git a/youtube_dl/extractor/hentaistigma.py b/youtube_dl/extractor/hentaistigma.py index 86a93de4d..5290998e8 100644 --- a/youtube_dl/extractor/hentaistigma.py +++ b/youtube_dl/extractor/hentaistigma.py @@ -2,6 +2,12 @@ from __future__ import unicode_literals from .common import InfoExtractor +##IFixThat additional imports (I insert them into every Extractor that I modify - can be removed if not needed) +import datetime +import os +import re +##IFixThat_end + class HentaiStigmaIE(InfoExtractor): _VALID_URL = r'^https?://hentai\.animestigma\.com/(?P[^/]+)' @@ -16,6 +22,35 @@ class HentaiStigmaIE(InfoExtractor): } } + ##IFixThat general helper functions (I insert them into every Extractor that I modify - can be removed if not needed) + + def _ifixthat_helper_file_exists(self,filename): + print('does '+filename+' exist?') + if os.path.exists(filename): + print('yes') + return True + else: + print('no') + return False + + def _ifixthat_helper_file_write(self,filename, content): + if self._ifixthat_helper_file_exists(filename): + print('backing up previous '+filename) + os.rename(filename, filename+'.backup_'+datetime.datetime.now().strftime("%Y%m%d%H%M%S")) + print('writing file') + myfile = open(filename, "wt") + myfile.write(content) + myfile.close() + + def _ifixthat_helper_file_read(self,filename): + if self._ifixthat_helper_file_exists(filename): + myfile = open(filename, "rt") + return myfile.read() + else: + return '' + + ##IFixThat_end + def _real_extract(self, url): video_id = self._match_id(url) @@ -24,16 +59,42 @@ class HentaiStigmaIE(InfoExtractor): title = self._html_search_regex( r']+class="posttitle"[^>]*>]*>([^<]+)', webpage, 'title') - wrap_url = self._html_search_regex( - r']+src="([^"]+mp4)"', webpage, 'wrapper url') - wrap_webpage = self._download_webpage(wrap_url, video_id) - video_url = self._html_search_regex( - r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url') + ##IFixThat replacing 1st-iframe-search with all iframe-search + + #wrap_url = self._html_search_regex( + # r']+src="([^"]+mp4)"', webpage, 'wrapper url') + #wrap_webpage = self._download_webpage(wrap_url, video_id) + #video_url = self._html_search_regex( + # r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url') + + ##IFixThat >REPLACE< + + # foreach iframe do get source-src + formats = [] + #print('-----------------------------------------------------------------------------------') + mymatches = re.findall(r' (SUB|RAW)
]+src="([^"]+mp4)"', webpage) + #print(mymatches) + #print('-----------------------------------------------------------------------------------') + for mymatch in mymatches: + print('"'+mymatch[0]+'" : '+mymatch[1]) + wrap_webpage = self._download_webpage(mymatch[1], video_id) + + video_url = self._html_search_regex( + #r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url') + r']+src="([^"]+mp4)"[^>]+type=\'video/mp4\'>', wrap_webpage, 'video url') + #e.g. + formats.append({ + 'url': video_url, + 'format_id': 'mp4-'+mymatch[0], + }) + + ##IFixThat_end return { 'id': video_id, - 'url': video_url, + ##IFixThat replace url with formats + 'formats': formats, 'title': title, 'age_limit': 18, }