fixed PEP8 whitespace issues
[youtube-dl.git] / youtube-dl
index 617ac13..36ca6ba 100755 (executable)
@@ -38,7 +38,7 @@ except ImportError:
        from cgi import parse_qs
 
 std_headers = {
-       'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11',
+       'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
@@ -47,6 +47,7 @@ std_headers = {
 
 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
 
+
 def preferredencoding():
        """Get preferred encoding.
 
@@ -63,6 +64,7 @@ def preferredencoding():
                        yield pref
        return yield_preferredencoding().next()
 
+
 def htmlentity_transform(matchobj):
        """Transforms an HTML entity to a Unicode character.
 
@@ -89,11 +91,13 @@ def htmlentity_transform(matchobj):
        # Unknown entity in name, return its literal representation
        return (u'&%s;' % entity)
 
+
 def sanitize_title(utitle):
        """Sanitizes a video title so it could be used as part of a filename."""
        utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
        return utitle.replace(unicode(os.sep), u'%')
 
+
 def sanitize_open(filename, open_mode):
        """Try to open the given filename, and slightly tweak it if this fails.
 
@@ -120,13 +124,15 @@ def sanitize_open(filename, open_mode):
                stream = open(filename, open_mode)
                return (stream, filename)
 
+
 def timeconvert(timestr):
-    """Convert RFC 2822 defined time string into system timestamp"""
-    timestamp = None
-    timetuple = email.utils.parsedate_tz(timestr)
-    if timetuple is not None:
-        timestamp = email.utils.mktime_tz(timetuple)
-    return timestamp
+       """Convert RFC 2822 defined time string into system timestamp"""
+       timestamp = None
+       timetuple = email.utils.parsedate_tz(timestr)
+       if timetuple is not None:
+               timestamp = email.utils.mktime_tz(timetuple)
+       return timestamp
+
 
 class DownloadError(Exception):
        """Download Error exception.
@@ -137,6 +143,7 @@ class DownloadError(Exception):
        """
        pass
 
+
 class SameFileError(Exception):
        """Same File exception.
 
@@ -145,6 +152,7 @@ class SameFileError(Exception):
        """
        pass
 
+
 class PostProcessingError(Exception):
        """Post Processing exception.
 
@@ -153,6 +161,7 @@ class PostProcessingError(Exception):
        """
        pass
 
+
 class UnavailableVideoError(Exception):
        """Unavailable Format exception.
 
@@ -161,6 +170,7 @@ class UnavailableVideoError(Exception):
        """
        pass
 
+
 class ContentTooShortError(Exception):
        """Content Too Short exception.
 
@@ -176,6 +186,7 @@ class ContentTooShortError(Exception):
                self.downloaded = downloaded
                self.expected = expected
 
+
 class YoutubeDLHandler(urllib2.HTTPHandler):
        """Handler for HTTP requests and responses.
 
@@ -185,11 +196,11 @@ class YoutubeDLHandler(urllib2.HTTPHandler):
        a particular request, the original request in the program code only has
        to include the HTTP header "Youtubedl-No-Compression", which will be
        removed before making the real request.
-       
+
        Part of this code was copied from:
 
-         http://techknack.net/python-urllib2-handlers/
-         
+       http://techknack.net/python-urllib2-handlers/
+
        Andrew Rowls, the author of that code, agreed to release it to the
        public domain.
        """
@@ -200,7 +211,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler):
                        return zlib.decompress(data, -zlib.MAX_WBITS)
                except zlib.error:
                        return zlib.decompress(data)
-       
+
        @staticmethod
        def addinfourl_wrapper(stream, headers, url, code):
                if hasattr(urllib2.addinfourl, 'getcode'):
@@ -208,7 +219,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler):
                ret = urllib2.addinfourl(stream, headers, url)
                ret.code = code
                return ret
-       
+
        def http_request(self, req):
                for h in std_headers:
                        if h in req.headers:
@@ -234,6 +245,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler):
                        resp.msg = old_resp.msg
                return resp
 
+
 class FileDownloader(object):
        """File Downloader class.
 
@@ -325,7 +337,7 @@ class FileDownloader(object):
                else:
                        exponent = long(math.log(bytes, 1024.0))
                suffix = 'bkMGTPEZY'[exponent]
-               converted = float(bytes) / float(1024**exponent)
+               converted = float(bytes) / float(1024 ** exponent)
                return '%.2f%s' % (converted, suffix)
 
        @staticmethod
@@ -463,7 +475,7 @@ class FileDownloader(object):
                        os.rename(old_filename, new_filename)
                except (IOError, OSError), err:
                        self.trouble(u'ERROR: unable to rename file')
-       
+
        def try_utime(self, filename, last_modified_hdr):
                """Try to set the last-modified time of the given file."""
                if last_modified_hdr is None:
@@ -477,7 +489,7 @@ class FileDownloader(object):
                if filetime is None:
                        return
                try:
-                       os.utime(filename,(time.time(), filetime))
+                       os.utime(filename, (time.time(), filetime))
                except:
                        pass
 
@@ -680,7 +692,7 @@ class FileDownloader(object):
                # Request parameters in case of being able to resume
                if self.params.get('continuedl', False) and resume_len != 0:
                        self.report_resuming_byte(resume_len)
-                       request.add_header('Range','bytes=%d-' % resume_len)
+                       request.add_header('Range', 'bytes=%d-' % resume_len)
                        open_mode = 'ab'
 
                count = 0
@@ -706,7 +718,7 @@ class FileDownloader(object):
                                        else:
                                                # Examine the reported length
                                                if (content_length is not None and
-                                                   (resume_len - 100 < long(content_length) < resume_len + 100)):
+                                                               (resume_len - 100 < long(content_length) < resume_len + 100)):
                                                        # The file had already been fully downloaded.
                                                        # Explanation to the above condition: in issue #175 it was revealed that
                                                        # YouTube sometimes adds or removes a few bytes from the end of the file,
@@ -784,6 +796,7 @@ class FileDownloader(object):
 
                return True
 
+
 class InfoExtractor(object):
        """Information Extractor class.
 
@@ -855,10 +868,11 @@ class InfoExtractor(object):
                """Real extraction process. Redefine in subclasses."""
                pass
 
+
 class YoutubeIE(InfoExtractor):
        """Information extractor for youtube.com."""
 
-       _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
+       _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
        _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
        _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
@@ -1009,7 +1023,7 @@ class YoutubeIE(InfoExtractor):
                self.report_video_info_webpage_download(video_id)
                for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
                        video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
-                                          % (video_id, el_type))
+                                       % (video_id, el_type))
                        request = urllib2.Request(video_info_url)
                        try:
                                video_info_webpage = urllib2.urlopen(request).read()
@@ -1056,7 +1070,7 @@ class YoutubeIE(InfoExtractor):
 
                # upload date
                upload_date = u'NA'
-               mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
+               mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
                if mobj is not None:
                        upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
                        format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
@@ -1079,8 +1093,10 @@ class YoutubeIE(InfoExtractor):
                # Decide which formats to download
                req_format = self._downloader.params.get('format', None)
 
-               if 'fmt_url_map' in video_info:
-                       url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
+               if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
+                       url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
+                       url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs]
+                       url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data)
                        format_limit = self._downloader.params.get('format_limit', None)
                        if format_limit is not None and format_limit in self._available_formats:
                                format_list = self._available_formats[self._available_formats.index(format_limit):]
@@ -1369,6 +1385,7 @@ class DailymotionIE(InfoExtractor):
                except UnavailableVideoError:
                        self._downloader.trouble(u'\nERROR: unable to download video')
 
+
 class GoogleIE(InfoExtractor):
        """Information extractor for video.google.com."""
 
@@ -1462,7 +1479,6 @@ class GoogleIE(InfoExtractor):
                else:   # we need something to pass to process_info
                        video_thumbnail = ''
 
-
                try:
                        # Process video information
                        self._downloader.process_info({
@@ -1662,7 +1678,8 @@ class YahooIE(InfoExtractor):
                        self._downloader.trouble(u'ERROR: unable to extract video description')
                        return
                video_description = mobj.group(1).decode('utf-8')
-               if not video_description: video_description = 'No description available.'
+               if not video_description:
+                       video_description = 'No description available.'
 
                # Extract video height and width
                mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
@@ -1683,8 +1700,8 @@ class YahooIE(InfoExtractor):
                yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
                yv_bitrate = '700'  # according to Wikipedia this is hard-coded
                request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
-                                         '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
-                                         '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
+                               '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
+                               '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
                try:
                        self.report_download_webpage(video_id)
                        webpage = urllib2.urlopen(request).read()
@@ -1777,11 +1794,11 @@ class GenericIE(InfoExtractor):
                        return
 
                video_url = urllib.unquote(mobj.group(1))
-               video_id  = os.path.basename(video_url)
+               video_id = os.path.basename(video_url)
 
                # here's a fun little line of code for you:
                video_extension = os.path.splitext(video_id)[1][1:]
-               video_id        = os.path.splitext(video_id)[0]
+               video_id = os.path.splitext(video_id)[0]
 
                # it's tempting to parse this further, but you would
                # have to take into account all the variations like
@@ -1854,7 +1871,7 @@ class YoutubeSearchIE(InfoExtractor):
 
                prefix, query = query.split(':')
                prefix = prefix[8:]
-               query  = query.encode('utf-8')
+               query = query.encode('utf-8')
                if prefix == '':
                        self._download_n_results(query, 1)
                        return
@@ -1868,7 +1885,7 @@ class YoutubeSearchIE(InfoExtractor):
                                        self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
                                        return
                                elif n > self._max_youtube_results:
-                                       self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
+                                       self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
                                        n = self._max_youtube_results
                                self._download_n_results(query, n)
                                return
@@ -1912,6 +1929,7 @@ class YoutubeSearchIE(InfoExtractor):
 
                        pagenum = pagenum + 1
 
+
 class GoogleSearchIE(InfoExtractor):
        """Information Extractor for Google Video search queries."""
        _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
@@ -1945,7 +1963,7 @@ class GoogleSearchIE(InfoExtractor):
 
                prefix, query = query.split(':')
                prefix = prefix[8:]
-               query  = query.encode('utf-8')
+               query = query.encode('utf-8')
                if prefix == '':
                        self._download_n_results(query, 1)
                        return
@@ -1959,7 +1977,7 @@ class GoogleSearchIE(InfoExtractor):
                                        self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
                                        return
                                elif n > self._max_google_results:
-                                       self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
+                                       self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
                                        n = self._max_google_results
                                self._download_n_results(query, n)
                                return
@@ -2003,6 +2021,7 @@ class GoogleSearchIE(InfoExtractor):
 
                        pagenum = pagenum + 1
 
+
 class YahooSearchIE(InfoExtractor):
        """Information Extractor for Yahoo! Video search queries."""
        _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
@@ -2036,7 +2055,7 @@ class YahooSearchIE(InfoExtractor):
 
                prefix, query = query.split(':')
                prefix = prefix[8:]
-               query  = query.encode('utf-8')
+               query = query.encode('utf-8')
                if prefix == '':
                        self._download_n_results(query, 1)
                        return
@@ -2050,7 +2069,7 @@ class YahooSearchIE(InfoExtractor):
                                        self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
                                        return
                                elif n > self._max_yahoo_results:
-                                       self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
+                                       self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
                                        n = self._max_yahoo_results
                                self._download_n_results(query, n)
                                return
@@ -2094,6 +2113,7 @@ class YahooSearchIE(InfoExtractor):
 
                        pagenum = pagenum + 1
 
+
 class YoutubePlaylistIE(InfoExtractor):
        """Information Extractor for YouTube playlists."""
 
@@ -2170,6 +2190,7 @@ class YoutubePlaylistIE(InfoExtractor):
                        self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
                return
 
+
 class YoutubeUserIE(InfoExtractor):
        """Information Extractor for YouTube users."""
 
@@ -2191,7 +2212,7 @@ class YoutubeUserIE(InfoExtractor):
        def report_download_page(self, username, start_index):
                """Report attempt to download user page."""
                self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
-                                          (username, start_index, start_index + self._GDATA_PAGE_SIZE))
+                               (username, start_index, start_index + self._GDATA_PAGE_SIZE))
 
        def _real_initialize(self):
                self._youtube_ie.initialize()
@@ -2253,9 +2274,9 @@ class YoutubeUserIE(InfoExtractor):
                        video_ids = video_ids[playliststart:]
                else:
                        video_ids = video_ids[playliststart:playlistend]
-                       
+
                self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
-                                          (username, all_ids_count, len(video_ids)))
+                               (username, all_ids_count, len(video_ids)))
 
                for video_id in video_ids:
                        self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
@@ -2340,6 +2361,7 @@ class DepositFilesIE(InfoExtractor):
                except UnavailableVideoError, err:
                        self._downloader.trouble(u'ERROR: unable to download file')
 
+
 class FacebookIE(InfoExtractor):
        """Information Extractor for Facebook"""
 
@@ -2508,7 +2530,7 @@ class FacebookIE(InfoExtractor):
                # description
                video_description = 'No description available.'
                if (self._downloader.params.get('forcedescription', False) and
-                   'description' in video_info):
+                       'description' in video_info):
                        video_description = video_info['description']
 
                url_map = video_info['video_urls']
@@ -2563,6 +2585,7 @@ class FacebookIE(InfoExtractor):
                        except UnavailableVideoError, err:
                                self._downloader.trouble(u'\nERROR: unable to download video')
 
+
 class PostProcessor(object):
        """Post Processor class.
 
@@ -2609,6 +2632,7 @@ class PostProcessor(object):
                """
                return information # by default, do nothing
 
+
 class FFmpegExtractAudioPP(PostProcessor):
 
        def __init__(self, downloader=None, preferredcodec=None):
@@ -2620,8 +2644,8 @@ class FFmpegExtractAudioPP(PostProcessor):
        @staticmethod
        def get_audio_codec(path):
                try:
-                       handle = subprocess.Popen(['ffprobe', '-show_streams', path],
-                                       stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
+                       cmd = ['ffprobe', '-show_streams', '--', path]
+                       handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
                        output = handle.communicate()[0]
                        if handle.wait() != 0:
                                return None
@@ -2638,8 +2662,8 @@ class FFmpegExtractAudioPP(PostProcessor):
        @staticmethod
        def run_ffmpeg(path, out_path, codec, more_opts):
                try:
-                       ret = subprocess.call(['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + [out_path],
-                                       stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
+                       cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
+                       ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
                        return (ret == 0)
                except (IOError, OSError):
                        return False
@@ -2723,7 +2747,7 @@ if __name__ == '__main__':
                # Parse command line
                parser = optparse.OptionParser(
                        usage='Usage: %prog [options] url...',
-                       version='2011.02.25',
+                       version='2011.08.04',
                        conflict_handler='resolve',
                )