Merge branch 'master' into prefer-webm
[youtube-dl.git] / youtube-dl
index 13d4276..e6768ac 100755 (executable)
@@ -4,10 +4,12 @@
 # Author: Danny Colligan
 # Author: Benjamin Johnson
 # Author: Vasyl' Vavrychuk
+# Author: Witold Baryluk
 # License: Public domain code
 import cookielib
 import ctypes
 import datetime
+import email.utils
 import gzip
 import htmlentitydefs
 import httplib
@@ -116,6 +118,14 @@ def sanitize_open(filename, open_mode):
                stream = open(filename, open_mode)
                return (stream, filename)
 
+def timeconvert(timestr):
+    """Convert RFC 2822 defined time string into system timestamp"""
+    timestamp = None
+    timetuple = email.utils.parsedate_tz(timestr)
+    if timetuple is not None:
+        timestamp = email.utils.mktime_tz(timetuple)
+    return timestamp
+
 class DownloadError(Exception):
        """Download Error exception.
 
@@ -189,6 +199,14 @@ class YoutubeDLHandler(urllib2.HTTPHandler):
                except zlib.error:
                        return zlib.decompress(data)
        
+       @staticmethod
+       def addinfourl_wrapper(stream, headers, url, code):
+               if hasattr(urllib2.addinfourl, 'getcode'):
+                       return urllib2.addinfourl(stream, headers, url, code)
+               ret = urllib2.addinfourl(stream, headers, url)
+               ret.code = code
+               return ret
+       
        def http_request(self, req):
                for h in std_headers:
                        if h in req.headers:
@@ -205,12 +223,12 @@ class YoutubeDLHandler(urllib2.HTTPHandler):
                # gzip
                if resp.headers.get('Content-encoding', '') == 'gzip':
                        gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
-                       resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+                       resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
                        resp.msg = old_resp.msg
                # deflate
                if resp.headers.get('Content-encoding', '') == 'deflate':
                        gz = StringIO.StringIO(self.deflate(resp.read()))
-                       resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+                       resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
                        resp.msg = old_resp.msg
                return resp
 
@@ -249,6 +267,7 @@ class FileDownloader(object):
        forcetitle:       Force printing title.
        forcethumbnail:   Force printing thumbnail URL.
        forcedescription: Force printing description.
+       forcefilename:    Force printing final filename.
        simulate:         Do not download the video files.
        format:           Video format code.
        format_limit:     Highest quality format to try.
@@ -264,6 +283,7 @@ class FileDownloader(object):
        logtostderr:      Log messages to stderr instead of stdout.
        consoletitle:     Display progress in console window's titlebar.
        nopart:           Do not use temporary .part files.
+       updatetime:       Use the Last-modified header to set output file timestamps.
        """
 
        params = None
@@ -429,6 +449,11 @@ class FileDownloader(object):
                        return filename
                return filename + u'.part'
 
+       def undo_temp_name(self, filename):
+               if filename.endswith(u'.part'):
+                       return filename[:-len(u'.part')]
+               return filename
+
        def try_rename(self, old_filename, new_filename):
                try:
                        if old_filename == new_filename:
@@ -436,6 +461,23 @@ class FileDownloader(object):
                        os.rename(old_filename, new_filename)
                except (IOError, OSError), err:
                        self.trouble(u'ERROR: unable to rename file')
+       
+       def try_utime(self, filename, last_modified_hdr):
+               """Try to set the last-modified time of the given file."""
+               if last_modified_hdr is None:
+                       return
+               if not os.path.isfile(filename):
+                       return
+               timestr = last_modified_hdr
+               if timestr is None:
+                       return
+               filetime = timeconvert(timestr)
+               if filetime is None:
+                       return
+               try:
+                       os.utime(filename,(time.time(), filetime))
+               except:
+                       pass
 
        def report_destination(self, filename):
                """Report destination filename."""
@@ -480,8 +522,21 @@ class FileDownloader(object):
                """Increment the ordinal that assigns a number to each file."""
                self._num_downloads += 1
 
+       def prepare_filename(self, info_dict):
+               """Generate the output filename."""
+               try:
+                       template_dict = dict(info_dict)
+                       template_dict['epoch'] = unicode(long(time.time()))
+                       template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
+                       filename = self.params['outtmpl'] % template_dict
+                       return filename
+               except (ValueError, KeyError), err:
+                       self.trouble(u'ERROR: invalid system charset or erroneous output template')
+                       return None
+
        def process_info(self, info_dict):
                """Process a single dictionary returned by an InfoExtractor."""
+               filename = self.prepare_filename(info_dict)
                # Do nothing else if in simulate mode
                if self.params.get('simulate', False):
                        # Forced printings
@@ -493,16 +548,12 @@ class FileDownloader(object):
                                print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
                        if self.params.get('forcedescription', False) and 'description' in info_dict:
                                print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
+                       if self.params.get('forcefilename', False) and filename is not None:
+                               print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 
                        return
 
-               try:
-                       template_dict = dict(info_dict)
-                       template_dict['epoch'] = unicode(long(time.time()))
-                       template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
-                       filename = self.params['outtmpl'] % template_dict
-               except (ValueError, KeyError), err:
-                       self.trouble(u'ERROR: invalid system charset or erroneous output template')
+               if filename is None:
                        return
                if self.params.get('nooverwrites', False) and os.path.exists(filename):
                        self.to_stderr(u'WARNING: file exists and will be skipped')
@@ -698,6 +749,7 @@ class FileDownloader(object):
                        if stream is None:
                                try:
                                        (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
+                                       filename = self.undo_temp_name(tmpfilename)
                                        self.report_destination(filename)
                                except (OSError, IOError), err:
                                        self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
@@ -723,6 +775,11 @@ class FileDownloader(object):
                if data_len is not None and byte_counter != data_len:
                        raise ContentTooShortError(byte_counter, long(data_len))
                self.try_rename(tmpfilename, filename)
+
+               # Update file modification time
+               if self.params.get('updatetime', True):
+                       self.try_utime(filename, data.info().get('last-modified', None))
+
                return True
 
 class InfoExtractor(object):
@@ -799,13 +856,13 @@ class InfoExtractor(object):
 class YoutubeIE(InfoExtractor):
        """Information extractor for youtube.com."""
 
-       _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
+       _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
        _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
        _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
        _NETRC_MACHINE = 'youtube'
        # Listed in order of quality
-       _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
+       _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']
        _video_extensions = {
                '13': '3gp',
                '17': 'mp4',
@@ -1288,7 +1345,7 @@ class DailymotionIE(InfoExtractor):
                video_title = mobj.group(1).decode('utf-8')
                video_title = sanitize_title(video_title)
 
-               mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
+               mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
                if mobj is None:
                        self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                        return
@@ -2038,7 +2095,7 @@ class YahooSearchIE(InfoExtractor):
 class YoutubePlaylistIE(InfoExtractor):
        """Information Extractor for YouTube playlists."""
 
-       _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
+       _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*'
        _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
        _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
        _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
@@ -2290,20 +2347,26 @@ if __name__ == '__main__':
                import getpass
                import optparse
 
-               # Function to update the program file with the latest version from bitbucket.org
+               # Function to update the program file with the latest version from the repository.
                def update_self(downloader, filename):
                        # Note: downloader only used for options
-                       if not os.access (filename, os.W_OK):
+                       if not os.access(filename, os.W_OK):
                                sys.exit('ERROR: no write permissions on %s' % filename)
 
                        downloader.to_screen('Updating to latest stable version...')
-                       latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
-                       latest_version = urllib.urlopen(latest_url).read().strip()
-                       prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
-                       newcontent = urllib.urlopen(prog_url).read()
-                       stream = open(filename, 'w')
-                       stream.write(newcontent)
-                       stream.close()
+                       try:
+                               latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
+                               latest_version = urllib.urlopen(latest_url).read().strip()
+                               prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
+                               newcontent = urllib.urlopen(prog_url).read()
+                       except (IOError, OSError), err:
+                               sys.exit('ERROR: unable to download latest version')
+                       try:
+                               stream = open(filename, 'w')
+                               stream.write(newcontent)
+                               stream.close()
+                       except (IOError, OSError), err:
+                               sys.exit('ERROR: unable to overwrite current version')
                        downloader.to_screen('Updated to version %s' % latest_version)
 
                # Parse command line
@@ -2330,7 +2393,8 @@ if __name__ == '__main__':
                parser.add_option('--playlist-end',
                                dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
                parser.add_option('--dump-user-agent',
-                               action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False)
+                               action='store_true', dest='dump_user_agent',
+                               help='display the current browser identification', default=False)
 
                authentication = optparse.OptionGroup(parser, 'Authentication Options')
                authentication.add_option('-u', '--username',
@@ -2360,13 +2424,19 @@ if __name__ == '__main__':
                verbosity.add_option('-e', '--get-title',
                                action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
                verbosity.add_option('--get-thumbnail',
-                               action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
+                               action='store_true', dest='getthumbnail',
+                               help='simulate, quiet but print thumbnail URL', default=False)
                verbosity.add_option('--get-description',
-                               action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
+                               action='store_true', dest='getdescription',
+                               help='simulate, quiet but print video description', default=False)
+               verbosity.add_option('--get-filename',
+                               action='store_true', dest='getfilename',
+                               help='simulate, quiet but print output filename', default=False)
                verbosity.add_option('--no-progress',
                                action='store_true', dest='noprogress', help='do not print progress bar', default=False)
                verbosity.add_option('--console-title',
-                               action='store_true', dest='consoletitle', help='display progress in console titlebar', default=False)
+                               action='store_true', dest='consoletitle',
+                               help='display progress in console titlebar', default=False)
                parser.add_option_group(verbosity)
 
                filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
@@ -2375,7 +2445,8 @@ if __name__ == '__main__':
                filesystem.add_option('-l', '--literal',
                                action='store_true', dest='useliteral', help='use literal title in file name', default=False)
                filesystem.add_option('-A', '--auto-number',
-                               action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
+                               action='store_true', dest='autonumber',
+                               help='number downloaded files starting from 00000', default=False)
                filesystem.add_option('-o', '--output',
                                dest='outtmpl', metavar='TEMPLATE', help='output filename template')
                filesystem.add_option('-a', '--batch-file',
@@ -2388,6 +2459,9 @@ if __name__ == '__main__':
                                dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
                filesystem.add_option('--no-part',
                                action='store_true', dest='nopart', help='do not use .part files', default=False)
+               filesystem.add_option('--no-mtime',
+                               action='store_false', dest='updatetime',
+                               help='do not use the Last-modified header to set the file modification time', default=True)
                parser.add_option_group(filesystem)
 
                (opts, args) = parser.parse_args()
@@ -2482,12 +2556,13 @@ if __name__ == '__main__':
                        'usenetrc': opts.usenetrc,
                        'username': opts.username,
                        'password': opts.password,
-                       'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
+                       'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
                        'forceurl': opts.geturl,
                        'forcetitle': opts.gettitle,
                        'forcethumbnail': opts.getthumbnail,
                        'forcedescription': opts.getdescription,
-                       'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
+                       'forcefilename': opts.getfilename,
+                       'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
                        'format': opts.format,
                        'format_limit': opts.format_limit,
                        'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
@@ -2511,6 +2586,7 @@ if __name__ == '__main__':
                        'logtostderr': opts.outtmpl == '-',
                        'consoletitle': opts.consoletitle,
                        'nopart': opts.nopart,
+                       'updatetime': opts.updatetime,
                        })
                fd.add_info_extractor(youtube_search_ie)
                fd.add_info_extractor(youtube_pl_ie)