X-Git-Url: http://git.jankratochvil.net/?a=blobdiff_plain;f=youtube-dl;h=450da8ebdbeb8f2e8030bcd9be70f6c1149ff6a5;hb=6501a06d4641ffacfccd66d3de4ad5ab0f7db7fb;hp=38ad999e59dbbd69f7239d80096b889bb90770ef;hpb=8d89fbae5ab5d51c54890ff80c3f472a452499ea;p=youtube-dl.git diff --git a/youtube-dl b/youtube-dl index 38ad999..450da8e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3383,6 +3383,92 @@ class CollegeHumorIE(InfoExtractor): self._downloader.trouble(u'\nERROR: unable to download video') +class XVideosIE(InfoExtractor): + """Information extractor for xvideos.com""" + + _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' + IE_NAME = u'xvideos' + + def report_webpage(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + htmlParser = HTMLParser.HTMLParser() + + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + video_id = mobj.group(1).decode('utf-8') + + self.report_webpage(video_id) + + request = urllib2.Request(url) + try: + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + + self.report_extraction(video_id) + + + # Extract video URL + mobj = re.search(r'flv_url=(.+?)&', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_url = urllib2.unquote(mobj.group(1).decode('utf-8')) + + + # Extract title + mobj = re.search(r'