Only skip download if files exists; convert audio
[youtube-dl.git] / youtube_dl / __init__.py
index 306ecca..d02178c 100755 (executable)
@@ -18,7 +18,7 @@ __author__  = (
        )
 
 __license__ = 'Public Domain'
-__version__ = '2011.11.23'
+__version__ = '2011.12.15'
 
 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
 
@@ -759,10 +759,6 @@ class FileDownloader(object):
                if filename is None:
                        return
 
-               if self.params.get('nooverwrites', False) and os.path.exists(filename):
-                       self.to_stderr(u'WARNING: file exists and will be skipped')
-                       return
-
                try:
                        dn = os.path.dirname(filename)
                        if dn != '' and not os.path.exists(dn):
@@ -804,16 +800,19 @@ class FileDownloader(object):
                                return
 
                if not self.params.get('skip_download', False):
-                       try:
-                               success = self._do_download(filename, info_dict)
-                       except (OSError, IOError), err:
-                               raise UnavailableVideoError
-                       except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self.trouble(u'ERROR: unable to download video data: %s' % str(err))
-                               return
-                       except (ContentTooShortError, ), err:
-                               self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
-                               return
+                       if self.params.get('nooverwrites', False) and os.path.exists(filename):
+                               success = True
+                       else:
+                               try:
+                                       success = self._do_download(filename, info_dict)
+                               except (OSError, IOError), err:
+                                       raise UnavailableVideoError
+                               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                                       self.trouble(u'ERROR: unable to download video data: %s' % str(err))
+                                       return
+                               except (ContentTooShortError, ), err:
+                                       self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+                                       return
        
                        if success:
                                try:
@@ -1591,6 +1590,8 @@ class DailymotionIE(InfoExtractor):
                self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
 
        def _real_extract(self, url):
+               htmlParser = HTMLParser.HTMLParser()
+               
                # Extract id and simplified title from URL
                mobj = re.match(self._VALID_URL, url)
                if mobj is None:
@@ -1601,7 +1602,6 @@ class DailymotionIE(InfoExtractor):
                self._downloader.increment_downloads()
                video_id = mobj.group(1)
 
-               simple_title = mobj.group(2).decode('utf-8')
                video_extension = 'flv'
 
                # Retrieve video webpage to extract further information
@@ -1631,12 +1631,13 @@ class DailymotionIE(InfoExtractor):
 
                video_url = mediaURL
 
-               mobj = re.search(r'(?im)<title>\s*(.+)\s*-\s*Video\s+Dailymotion</title>', webpage)
+               mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
                if mobj is None:
                        self._downloader.trouble(u'ERROR: unable to extract title')
                        return
-               video_title = mobj.group(1).decode('utf-8')
+               video_title = htmlParser.unescape(mobj.group('title')).decode('utf-8')
                video_title = sanitize_title(video_title)
+               simple_title = _simplify_title(video_title)
 
                mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
                if mobj is None: