Only skip download if files exists; convert audio

[youtube-dl.git] / youtube_dl / __init__.py
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 306ecca..d02178c 100755 (executable)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -18,7 +18,7 @@ __author__  = (
         )
  
  __license__ = 'Public Domain'
-__version__ = '2011.11.23'
+__version__ = '2011.12.15'
  
  UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
  
@@ -759,10 +759,6 @@ class FileDownloader(object):
                 if filename is None:
                         return
  
-               if self.params.get('nooverwrites', False) and os.path.exists(filename):
-                       self.to_stderr(u'WARNING: file exists and will be skipped')
-                       return
-
                 try:
                         dn = os.path.dirname(filename)
                         if dn != '' and not os.path.exists(dn):
@@ -804,16 +800,19 @@ class FileDownloader(object):
                                 return
  
                 if not self.params.get('skip_download', False):
-                       try:
-                               success = self._do_download(filename, info_dict)
-                       except (OSError, IOError), err:
-                               raise UnavailableVideoError
-                       except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self.trouble(u'ERROR: unable to download video data: %s' % str(err))
-                               return
-                       except (ContentTooShortError, ), err:
-                               self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
-                               return
+                       if self.params.get('nooverwrites', False) and os.path.exists(filename):
+                               success = True
+                       else:
+                               try:
+                                       success = self._do_download(filename, info_dict)
+                               except (OSError, IOError), err:
+                                       raise UnavailableVideoError
+                               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                                       self.trouble(u'ERROR: unable to download video data: %s' % str(err))
+                                       return
+                               except (ContentTooShortError, ), err:
+                                       self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+                                       return
         
                         if success:
                                 try:
@@ -1591,6 +1590,8 @@ class DailymotionIE(InfoExtractor):
                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
  
         def _real_extract(self, url):
+               htmlParser = HTMLParser.HTMLParser()
+               
                 # Extract id and simplified title from URL
                 mobj = re.match(self._VALID_URL, url)
                 if mobj is None:
@@ -1601,7 +1602,6 @@ class DailymotionIE(InfoExtractor):
                 self._downloader.increment_downloads()
                 video_id = mobj.group(1)
  
-               simple_title = mobj.group(2).decode('utf-8')
                 video_extension = 'flv'
  
                 # Retrieve video webpage to extract further information
@@ -1631,12 +1631,13 @@ class DailymotionIE(InfoExtractor):
  
                 video_url = mediaURL
  
-               mobj = re.search(r'(?im)<title>\s*(.+)\s*-\s*Video\s+Dailymotion</title>', webpage)
+               mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract title')
                         return
-               video_title = mobj.group(1).decode('utf-8')
+               video_title = htmlParser.unescape(mobj.group('title')).decode('utf-8')
                 video_title = sanitize_title(video_title)
+               simple_title = _simplify_title(video_title)
  
                 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
                 if mobj is None: