Merge branch 'master' into prefer-webm
[youtube-dl.git] / youtube-dl
index 869a32b..e6768ac 100755 (executable)
@@ -4,10 +4,12 @@
 # Author: Danny Colligan
 # Author: Benjamin Johnson
 # Author: Vasyl' Vavrychuk
+# Author: Witold Baryluk
 # License: Public domain code
 import cookielib
 import ctypes
 import datetime
+import email.utils
 import gzip
 import htmlentitydefs
 import httplib
@@ -116,6 +118,14 @@ def sanitize_open(filename, open_mode):
                stream = open(filename, open_mode)
                return (stream, filename)
 
+def timeconvert(timestr):
+    """Convert RFC 2822 defined time string into system timestamp"""
+    timestamp = None
+    timetuple = email.utils.parsedate_tz(timestr)
+    if timetuple is not None:
+        timestamp = email.utils.mktime_tz(timetuple)
+    return timestamp
+
 class DownloadError(Exception):
        """Download Error exception.
 
@@ -257,6 +267,7 @@ class FileDownloader(object):
        forcetitle:       Force printing title.
        forcethumbnail:   Force printing thumbnail URL.
        forcedescription: Force printing description.
+       forcefilename:    Force printing final filename.
        simulate:         Do not download the video files.
        format:           Video format code.
        format_limit:     Highest quality format to try.
@@ -272,6 +283,7 @@ class FileDownloader(object):
        logtostderr:      Log messages to stderr instead of stdout.
        consoletitle:     Display progress in console window's titlebar.
        nopart:           Do not use temporary .part files.
+       updatetime:       Use the Last-modified header to set output file timestamps.
        """
 
        params = None
@@ -449,6 +461,23 @@ class FileDownloader(object):
                        os.rename(old_filename, new_filename)
                except (IOError, OSError), err:
                        self.trouble(u'ERROR: unable to rename file')
+       
+       def try_utime(self, filename, last_modified_hdr):
+               """Try to set the last-modified time of the given file."""
+               if last_modified_hdr is None:
+                       return
+               if not os.path.isfile(filename):
+                       return
+               timestr = last_modified_hdr
+               if timestr is None:
+                       return
+               filetime = timeconvert(timestr)
+               if filetime is None:
+                       return
+               try:
+                       os.utime(filename,(time.time(), filetime))
+               except:
+                       pass
 
        def report_destination(self, filename):
                """Report destination filename."""
@@ -493,8 +522,21 @@ class FileDownloader(object):
                """Increment the ordinal that assigns a number to each file."""
                self._num_downloads += 1
 
+       def prepare_filename(self, info_dict):
+               """Generate the output filename."""
+               try:
+                       template_dict = dict(info_dict)
+                       template_dict['epoch'] = unicode(long(time.time()))
+                       template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
+                       filename = self.params['outtmpl'] % template_dict
+                       return filename
+               except (ValueError, KeyError), err:
+                       self.trouble(u'ERROR: invalid system charset or erroneous output template')
+                       return None
+
        def process_info(self, info_dict):
                """Process a single dictionary returned by an InfoExtractor."""
+               filename = self.prepare_filename(info_dict)
                # Do nothing else if in simulate mode
                if self.params.get('simulate', False):
                        # Forced printings
@@ -506,16 +548,12 @@ class FileDownloader(object):
                                print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
                        if self.params.get('forcedescription', False) and 'description' in info_dict:
                                print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
+                       if self.params.get('forcefilename', False) and filename is not None:
+                               print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 
                        return
 
-               try:
-                       template_dict = dict(info_dict)
-                       template_dict['epoch'] = unicode(long(time.time()))
-                       template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
-                       filename = self.params['outtmpl'] % template_dict
-               except (ValueError, KeyError), err:
-                       self.trouble(u'ERROR: invalid system charset or erroneous output template')
+               if filename is None:
                        return
                if self.params.get('nooverwrites', False) and os.path.exists(filename):
                        self.to_stderr(u'WARNING: file exists and will be skipped')
@@ -737,6 +775,11 @@ class FileDownloader(object):
                if data_len is not None and byte_counter != data_len:
                        raise ContentTooShortError(byte_counter, long(data_len))
                self.try_rename(tmpfilename, filename)
+
+               # Update file modification time
+               if self.params.get('updatetime', True):
+                       self.try_utime(filename, data.info().get('last-modified', None))
+
                return True
 
 class InfoExtractor(object):
@@ -813,13 +856,13 @@ class InfoExtractor(object):
 class YoutubeIE(InfoExtractor):
        """Information extractor for youtube.com."""
 
-       _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
+       _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
        _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
        _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
        _NETRC_MACHINE = 'youtube'
        # Listed in order of quality
-       _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
+       _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']
        _video_extensions = {
                '13': '3gp',
                '17': 'mp4',
@@ -2350,7 +2393,8 @@ if __name__ == '__main__':
                parser.add_option('--playlist-end',
                                dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
                parser.add_option('--dump-user-agent',
-                               action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False)
+                               action='store_true', dest='dump_user_agent',
+                               help='display the current browser identification', default=False)
 
                authentication = optparse.OptionGroup(parser, 'Authentication Options')
                authentication.add_option('-u', '--username',
@@ -2380,13 +2424,19 @@ if __name__ == '__main__':
                verbosity.add_option('-e', '--get-title',
                                action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
                verbosity.add_option('--get-thumbnail',
-                               action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
+                               action='store_true', dest='getthumbnail',
+                               help='simulate, quiet but print thumbnail URL', default=False)
                verbosity.add_option('--get-description',
-                               action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
+                               action='store_true', dest='getdescription',
+                               help='simulate, quiet but print video description', default=False)
+               verbosity.add_option('--get-filename',
+                               action='store_true', dest='getfilename',
+                               help='simulate, quiet but print output filename', default=False)
                verbosity.add_option('--no-progress',
                                action='store_true', dest='noprogress', help='do not print progress bar', default=False)
                verbosity.add_option('--console-title',
-                               action='store_true', dest='consoletitle', help='display progress in console titlebar', default=False)
+                               action='store_true', dest='consoletitle',
+                               help='display progress in console titlebar', default=False)
                parser.add_option_group(verbosity)
 
                filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
@@ -2395,7 +2445,8 @@ if __name__ == '__main__':
                filesystem.add_option('-l', '--literal',
                                action='store_true', dest='useliteral', help='use literal title in file name', default=False)
                filesystem.add_option('-A', '--auto-number',
-                               action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
+                               action='store_true', dest='autonumber',
+                               help='number downloaded files starting from 00000', default=False)
                filesystem.add_option('-o', '--output',
                                dest='outtmpl', metavar='TEMPLATE', help='output filename template')
                filesystem.add_option('-a', '--batch-file',
@@ -2408,6 +2459,9 @@ if __name__ == '__main__':
                                dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
                filesystem.add_option('--no-part',
                                action='store_true', dest='nopart', help='do not use .part files', default=False)
+               filesystem.add_option('--no-mtime',
+                               action='store_false', dest='updatetime',
+                               help='do not use the Last-modified header to set the file modification time', default=True)
                parser.add_option_group(filesystem)
 
                (opts, args) = parser.parse_args()
@@ -2502,12 +2556,13 @@ if __name__ == '__main__':
                        'usenetrc': opts.usenetrc,
                        'username': opts.username,
                        'password': opts.password,
-                       'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
+                       'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
                        'forceurl': opts.geturl,
                        'forcetitle': opts.gettitle,
                        'forcethumbnail': opts.getthumbnail,
                        'forcedescription': opts.getdescription,
-                       'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
+                       'forcefilename': opts.getfilename,
+                       'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
                        'format': opts.format,
                        'format_limit': opts.format_limit,
                        'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
@@ -2531,6 +2586,7 @@ if __name__ == '__main__':
                        'logtostderr': opts.outtmpl == '-',
                        'consoletitle': opts.consoletitle,
                        'nopart': opts.nopart,
+                       'updatetime': opts.updatetime,
                        })
                fd.add_info_extractor(youtube_search_ie)
                fd.add_info_extractor(youtube_pl_ie)