youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # Author: Ricardo Garcia Gonzalez
   4 # Author: Danny Colligan
   5 # License: Public domain code
   6 import htmlentitydefs
   7 import httplib
   8 import locale
   9 import math
  10 import netrc
  11 import os
  12 import os.path
  13 import re
  14 import socket
  15 import string
  16 import subprocess
  17 import sys
  18 import time
  19 import urllib
  20 import urllib2
  21 import urlparse
  22
  23 std_headers = {
  24         'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
  25         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  26         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
  27         'Accept-Language': 'en-us,en;q=0.5',
  28 }
  29
  30 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  31
  32 def preferredencoding():
  33         """Get preferred encoding.
  34
  35         Returns the best encoding scheme for the system, based on
  36         locale.getpreferredencoding() and some further tweaks.
  37         """
  38         def yield_preferredencoding():
  39                 try:
  40                         pref = locale.getpreferredencoding()
  41                         u'TEST'.encode(pref)
  42                 except:
  43                         pref = 'UTF-8'
  44                 while True:
  45                         yield pref
  46         return yield_preferredencoding().next()
  47
  48 class DownloadError(Exception):
  49         """Download Error exception.
  50
  51         This exception may be thrown by FileDownloader objects if they are not
  52         configured to continue on errors. They will contain the appropriate
  53         error message.
  54         """
  55         pass
  56
  57 class SameFileError(Exception):
  58         """Same File exception.
  59
  60         This exception will be thrown by FileDownloader objects if they detect
  61         multiple files would have to be downloaded to the same file on disk.
  62         """
  63         pass
  64
  65 class PostProcessingError(Exception):
  66         """Post Processing exception.
  67
  68         This exception may be raised by PostProcessor's .run() method to
  69         indicate an error in the postprocessing task.
  70         """
  71         pass
  72
  73 class UnavailableFormatError(Exception):
  74         """Unavailable Format exception.
  75
  76         This exception will be thrown when a video is requested
  77         in a format that is not available for that video.
  78         """
  79         pass
  80
  81 class ContentTooShortError(Exception):
  82         """Content Too Short exception.
  83
  84         This exception may be raised by FileDownloader objects when a file they
  85         download is too small for what the server announced first, indicating
  86         the connection was probably interrupted.
  87         """
  88         # Both in bytes
  89         downloaded = None
  90         expected = None
  91
  92         def __init__(self, downloaded, expected):
  93                 self.downloaded = downloaded
  94                 self.expected = expected
  95
  96 class FileDownloader(object):
  97         """File Downloader class.
  98
  99         File downloader objects are the ones responsible of downloading the
 100         actual video file and writing it to disk if the user has requested
 101         it, among some other tasks. In most cases there should be one per
 102         program. As, given a video URL, the downloader doesn't know how to
 103         extract all the needed information, task that InfoExtractors do, it
 104         has to pass the URL to one of them.
 105
 106         For this, file downloader objects have a method that allows
 107         InfoExtractors to be registered in a given order. When it is passed
 108         a URL, the file downloader handles it to the first InfoExtractor it
 109         finds that reports being able to handle it. The InfoExtractor extracts
 110         all the information about the video or videos the URL refers to, and
 111         asks the FileDownloader to process the video information, possibly
 112         downloading the video.
 113
 114         File downloaders accept a lot of parameters. In order not to saturate
 115         the object constructor with arguments, it receives a dictionary of
 116         options instead. These options are available through the params
 117         attribute for the InfoExtractors to use. The FileDownloader also
 118         registers itself as the downloader in charge for the InfoExtractors
 119         that are added to it, so this is a "mutual registration".
 120
 121         Available options:
 122
 123         username:       Username for authentication purposes.
 124         password:       Password for authentication purposes.
 125         usenetrc:       Use netrc for authentication instead.
 126         quiet:          Do not print messages to stdout.
 127         forceurl:       Force printing final URL.
 128         forcetitle:     Force printing title.
 129         simulate:       Do not download the video files.
 130         format:         Video format code.
 131         outtmpl:        Template for output names.
 132         ignoreerrors:   Do not stop on download errors.
 133         ratelimit:      Download speed limit, in bytes/sec.
 134         nooverwrites:   Prevent overwriting files.
 135         continuedl:     Try to continue downloads if possible.
 136         """
 137
 138         params = None
 139         _ies = []
 140         _pps = []
 141         _download_retcode = None
 142
 143         def __init__(self, params):
 144                 """Create a FileDownloader object with the given options."""
 145                 self._ies = []
 146                 self._pps = []
 147                 self._download_retcode = 0
 148                 self.params = params
 149
 150         @staticmethod
 151         def pmkdir(filename):
 152                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 153                 components = filename.split(os.sep)
 154                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 155                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 156                 for dir in aggregate:
 157                         if not os.path.exists(dir):
 158                                 os.mkdir(dir)
 159
 160         @staticmethod
 161         def format_bytes(bytes):
 162                 if bytes is None:
 163                         return 'N/A'
 164                 if type(bytes) is str:
 165                         bytes = float(bytes)
 166                 if bytes == 0.0:
 167                         exponent = 0
 168                 else:
 169                         exponent = long(math.log(bytes, 1024.0))
 170                 suffix = 'bkMGTPEZY'[exponent]
 171                 converted = float(bytes) / float(1024**exponent)
 172                 return '%.2f%s' % (converted, suffix)
 173
 174         @staticmethod
 175         def calc_percent(byte_counter, data_len):
 176                 if data_len is None:
 177                         return '---.-%'
 178                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 179
 180         @staticmethod
 181         def calc_eta(start, now, total, current):
 182                 if total is None:
 183                         return '--:--'
 184                 dif = now - start
 185                 if current == 0 or dif < 0.001: # One millisecond
 186                         return '--:--'
 187                 rate = float(current) / dif
 188                 eta = long((float(total) - float(current)) / rate)
 189                 (eta_mins, eta_secs) = divmod(eta, 60)
 190                 if eta_mins > 99:
 191                         return '--:--'
 192                 return '%02d:%02d' % (eta_mins, eta_secs)
 193
 194         @staticmethod
 195         def calc_speed(start, now, bytes):
 196                 dif = now - start
 197                 if bytes == 0 or dif < 0.001: # One millisecond
 198                         return '%10s' % '---b/s'
 199                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 200
 201         @staticmethod
 202         def best_block_size(elapsed_time, bytes):
 203                 new_min = max(bytes / 2.0, 1.0)
 204                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 205                 if elapsed_time < 0.001:
 206                         return long(new_max)
 207                 rate = bytes / elapsed_time
 208                 if rate > new_max:
 209                         return long(new_max)
 210                 if rate < new_min:
 211                         return long(new_min)
 212                 return long(rate)
 213
 214         @staticmethod
 215         def parse_bytes(bytestr):
 216                 """Parse a string indicating a byte quantity into a long integer."""
 217                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 218                 if matchobj is None:
 219                         return None
 220                 number = float(matchobj.group(1))
 221                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 222                 return long(round(number * multiplier))
 223
 224         @staticmethod
 225         def verify_url(url):
 226                 """Verify a URL is valid and data could be downloaded. Return real data URL."""
 227                 request = urllib2.Request(url, None, std_headers)
 228                 data = urllib2.urlopen(request)
 229                 data.read(1)
 230                 url = data.geturl()
 231                 data.close()
 232                 return url
 233
 234         def add_info_extractor(self, ie):
 235                 """Add an InfoExtractor object to the end of the list."""
 236                 self._ies.append(ie)
 237                 ie.set_downloader(self)
 238
 239         def add_post_processor(self, pp):
 240                 """Add a PostProcessor object to the end of the chain."""
 241                 self._pps.append(pp)
 242                 pp.set_downloader(self)
 243
 244         def to_stdout(self, message, skip_eol=False):
 245                 """Print message to stdout if not in quiet mode."""
 246                 if not self.params.get('quiet', False):
 247                         print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
 248                         sys.stdout.flush()
 249
 250         def to_stderr(self, message):
 251                 """Print message to stderr."""
 252                 print >>sys.stderr, message.encode(preferredencoding())
 253
 254         def fixed_template(self):
 255                 """Checks if the output template is fixed."""
 256                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 257
 258         def trouble(self, message=None):
 259                 """Determine action to take when a download problem appears.
 260
 261                 Depending on if the downloader has been configured to ignore
 262                 download errors or not, this method may throw an exception or
 263                 not when errors are found, after printing the message.
 264                 """
 265                 if message is not None:
 266                         self.to_stderr(message)
 267                 if not self.params.get('ignoreerrors', False):
 268                         raise DownloadError(message)
 269                 self._download_retcode = 1
 270
 271         def slow_down(self, start_time, byte_counter):
 272                 """Sleep if the download speed is over the rate limit."""
 273                 rate_limit = self.params.get('ratelimit', None)
 274                 if rate_limit is None or byte_counter == 0:
 275                         return
 276                 now = time.time()
 277                 elapsed = now - start_time
 278                 if elapsed <= 0.0:
 279                         return
 280                 speed = float(byte_counter) / elapsed
 281                 if speed > rate_limit:
 282                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 283
 284         def report_destination(self, filename):
 285                 """Report destination filename."""
 286                 self.to_stdout(u'[download] Destination: %s' % filename)
 287
 288         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 289                 """Report download progress."""
 290                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
 291                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 292
 293         def report_resuming_byte(self, resume_len):
 294                 """Report attemtp to resume at given byte."""
 295                 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
 296
 297         def report_file_already_downloaded(self, file_name):
 298                 """Report file has already been fully downloaded."""
 299                 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
 300
 301         def report_unable_to_resume(self):
 302                 """Report it was impossible to resume download."""
 303                 self.to_stdout(u'[download] Unable to resume')
 304
 305         def report_finish(self):
 306                 """Report download finished."""
 307                 self.to_stdout(u'')
 308
 309         def process_info(self, info_dict):
 310                 """Process a single dictionary returned by an InfoExtractor."""
 311                 # Do nothing else if in simulate mode
 312                 if self.params.get('simulate', False):
 313                         try:
 314                                 info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
 315                         except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
 316                                 raise UnavailableFormatError
 317
 318                         # Forced printings
 319                         if self.params.get('forcetitle', False):
 320                                 print info_dict['title'].encode(preferredencoding())
 321                         if self.params.get('forceurl', False):
 322                                 print info_dict['url'].encode(preferredencoding())
 323
 324                         return
 325
 326                 try:
 327                         template_dict = dict(info_dict)
 328                         template_dict['epoch'] = unicode(long(time.time()))
 329                         filename = self.params['outtmpl'] % template_dict
 330                 except (ValueError, KeyError), err:
 331                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
 332                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 333                         self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
 334                         return
 335
 336                 try:
 337                         self.pmkdir(filename)
 338                 except (OSError, IOError), err:
 339                         self.trouble('ERROR: unable to create directories: %s' % str(err))
 340                         return
 341
 342                 try:
 343                         success = self._do_download(filename, info_dict['url'].encode('utf-8'))
 344                 except (OSError, IOError), err:
 345                         raise UnavailableFormatError
 346                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 347                         self.trouble('ERROR: unable to download video data: %s' % str(err))
 348                         return
 349                 except (ContentTooShortError, ), err:
 350                         self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 351                         return
 352
 353                 if success:
 354                         try:
 355                                 self.post_process(filename, info_dict)
 356                         except (PostProcessingError), err:
 357                                 self.trouble('ERROR: postprocessing: %s' % str(err))
 358                                 return
 359
 360         def download(self, url_list):
 361                 """Download a given list of URLs."""
 362                 if len(url_list) > 1 and self.fixed_template():
 363                         raise SameFileError(self.params['outtmpl'])
 364
 365                 for url in url_list:
 366                         suitable_found = False
 367                         for ie in self._ies:
 368                                 # Go to next InfoExtractor if not suitable
 369                                 if not ie.suitable(url):
 370                                         continue
 371
 372                                 # Suitable InfoExtractor found
 373                                 suitable_found = True
 374
 375                                 # Extract information from URL and process it
 376                                 ie.extract(url)
 377
 378                                 # Suitable InfoExtractor had been found; go to next URL
 379                                 break
 380
 381                         if not suitable_found:
 382                                 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
 383
 384                 return self._download_retcode
 385
 386         def post_process(self, filename, ie_info):
 387                 """Run the postprocessing chain on the given file."""
 388                 info = dict(ie_info)
 389                 info['filepath'] = filename
 390                 for pp in self._pps:
 391                         info = pp.run(info)
 392                         if info is None:
 393                                 break
 394
 395         def _download_with_rtmpdump(self, filename, url):
 396                 self.report_destination(filename)
 397
 398                 # Check for rtmpdump first
 399                 try:
 400                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 401                 except (OSError, IOError):
 402                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 403                         return False
 404
 405                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 406                 # the connection was interrumpted and resuming appears to be
 407                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 408                 retval = subprocess.call(['rtmpdump', '-q', '-r', url, '-o', filename] + [[], ['-e']][self.params.get('continuedl', False)])
 409                 while retval == 2:
 410                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True)
 411                         time.sleep(2.0) # This seems to be needed
 412                         retval = subprocess.call(['rtmpdump', '-q', '-e', '-r', url, '-o', filename])
 413                 if retval == 0:
 414                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
 415                         return True
 416                 else:
 417                         self.trouble('ERROR: rtmpdump exited with code %d' % retval)
 418                         return False
 419
 420         def _do_download(self, filename, url):
 421                 # Attempt to download using rtmpdump
 422                 if url.startswith('rtmp'):
 423                         return self._download_with_rtmpdump(filename, url)
 424
 425                 stream = None
 426                 open_mode = 'wb'
 427                 basic_request = urllib2.Request(url, None, std_headers)
 428                 request = urllib2.Request(url, None, std_headers)
 429
 430                 # Establish possible resume length
 431                 if os.path.isfile(filename):
 432                         resume_len = os.path.getsize(filename)
 433                 else:
 434                         resume_len = 0
 435
 436                 # Request parameters in case of being able to resume
 437                 if self.params.get('continuedl', False) and resume_len != 0:
 438                         self.report_resuming_byte(resume_len)
 439                         request.add_header('Range','bytes=%d-' % resume_len)
 440                         open_mode = 'ab'
 441
 442                 # Establish connection
 443                 try:
 444                         data = urllib2.urlopen(request)
 445                 except (urllib2.HTTPError, ), err:
 446                         if err.code != 416: #  416 is 'Requested range not satisfiable'
 447                                 raise
 448                         # Unable to resume
 449                         data = urllib2.urlopen(basic_request)
 450                         content_length = data.info()['Content-Length']
 451
 452                         if content_length is not None and long(content_length) == resume_len:
 453                                 # Because the file had already been fully downloaded
 454                                 self.report_file_already_downloaded(filename)
 455                                 return True
 456                         else:
 457                                 # Because the server didn't let us
 458                                 self.report_unable_to_resume()
 459                                 open_mode = 'wb'
 460
 461                 data_len = data.info().get('Content-length', None)
 462                 data_len_str = self.format_bytes(data_len)
 463                 byte_counter = 0
 464                 block_size = 1024
 465                 start = time.time()
 466                 while True:
 467                         # Download and write
 468                         before = time.time()
 469                         data_block = data.read(block_size)
 470                         after = time.time()
 471                         data_block_len = len(data_block)
 472                         if data_block_len == 0:
 473                                 break
 474                         byte_counter += data_block_len
 475
 476                         # Open file just in time
 477                         if stream is None:
 478                                 try:
 479                                         stream = open(filename, open_mode)
 480                                         self.report_destination(filename)
 481                                 except (OSError, IOError), err:
 482                                         self.trouble('ERROR: unable to open for writing: %s' % str(err))
 483                                         return False
 484                         stream.write(data_block)
 485                         block_size = self.best_block_size(after - before, data_block_len)
 486
 487                         # Progress message
 488                         percent_str = self.calc_percent(byte_counter, data_len)
 489                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
 490                         speed_str = self.calc_speed(start, time.time(), byte_counter)
 491                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 492
 493                         # Apply rate limit
 494                         self.slow_down(start, byte_counter)
 495
 496                 self.report_finish()
 497                 if data_len is not None and str(byte_counter) != data_len:
 498                         raise ContentTooShortError(byte_counter, long(data_len))
 499                 return True
 500
 501 class InfoExtractor(object):
 502         """Information Extractor class.
 503
 504         Information extractors are the classes that, given a URL, extract
 505         information from the video (or videos) the URL refers to. This
 506         information includes the real video URL, the video title and simplified
 507         title, author and others. The information is stored in a dictionary
 508         which is then passed to the FileDownloader. The FileDownloader
 509         processes this information possibly downloading the video to the file
 510         system, among other possible outcomes. The dictionaries must include
 511         the following fields:
 512
 513         id:             Video identifier.
 514         url:            Final video URL.
 515         uploader:       Nickname of the video uploader.
 516         title:          Literal title.
 517         stitle:         Simplified title.
 518         ext:            Video filename extension.
 519
 520         Subclasses of this one should re-define the _real_initialize() and
 521         _real_extract() methods, as well as the suitable() static method.
 522         Probably, they should also be instantiated and added to the main
 523         downloader.
 524         """
 525
 526         _ready = False
 527         _downloader = None
 528
 529         def __init__(self, downloader=None):
 530                 """Constructor. Receives an optional downloader."""
 531                 self._ready = False
 532                 self.set_downloader(downloader)
 533
 534         @staticmethod
 535         def suitable(url):
 536                 """Receives a URL and returns True if suitable for this IE."""
 537                 return False
 538
 539         def initialize(self):
 540                 """Initializes an instance (authentication, etc)."""
 541                 if not self._ready:
 542                         self._real_initialize()
 543                         self._ready = True
 544
 545         def extract(self, url):
 546                 """Extracts URL information and returns it in list of dicts."""
 547                 self.initialize()
 548                 return self._real_extract(url)
 549
 550         def set_downloader(self, downloader):
 551                 """Sets the downloader for this IE."""
 552                 self._downloader = downloader
 553
 554         def _real_initialize(self):
 555                 """Real initialization process. Redefine in subclasses."""
 556                 pass
 557
 558         def _real_extract(self, url):
 559                 """Real extraction process. Redefine in subclasses."""
 560                 pass
 561
 562 class YoutubeIE(InfoExtractor):
 563         """Information extractor for youtube.com."""
 564
 565         _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
 566         _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 567         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
 568         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 569         _NETRC_MACHINE = 'youtube'
 570         _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
 571         _video_extensions = {
 572                 '13': '3gp',
 573                 '17': 'mp4',
 574                 '18': 'mp4',
 575                 '22': 'mp4',
 576                 '37': 'mp4',
 577         }
 578
 579         @staticmethod
 580         def suitable(url):
 581                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 582
 583         @staticmethod
 584         def htmlentity_transform(matchobj):
 585                 """Transforms an HTML entity to a Unicode character."""
 586                 entity = matchobj.group(1)
 587
 588                 # Known non-numeric HTML entity
 589                 if entity in htmlentitydefs.name2codepoint:
 590                         return unichr(htmlentitydefs.name2codepoint[entity])
 591
 592                 # Unicode character
 593                 mobj = re.match(ur'(?u)#(x?\d+)', entity)
 594                 if mobj is not None:
 595                         numstr = mobj.group(1)
 596                         if numstr.startswith(u'x'):
 597                                 base = 16
 598                                 numstr = u'0%s' % numstr
 599                         else:
 600                                 base = 10
 601                         return unichr(long(numstr, base))
 602
 603                 # Unknown entity in name, return its literal representation
 604                 return (u'&%s;' % entity)
 605
 606         def report_lang(self):
 607                 """Report attempt to set language."""
 608                 self._downloader.to_stdout(u'[youtube] Setting language')
 609
 610         def report_login(self):
 611                 """Report attempt to log in."""
 612                 self._downloader.to_stdout(u'[youtube] Logging in')
 613
 614         def report_age_confirmation(self):
 615                 """Report attempt to confirm age."""
 616                 self._downloader.to_stdout(u'[youtube] Confirming age')
 617
 618         def report_video_info_webpage_download(self, video_id):
 619                 """Report attempt to download video info webpage."""
 620                 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
 621
 622         def report_information_extraction(self, video_id):
 623                 """Report attempt to extract video information."""
 624                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
 625
 626         def report_unavailable_format(self, video_id, format):
 627                 """Report extracted video URL."""
 628                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
 629
 630         def report_rtmp_download(self):
 631                 """Indicate the download will use the RTMP protocol."""
 632                 self._downloader.to_stdout(u'[youtube] RTMP download detected')
 633
 634         def _real_initialize(self):
 635                 if self._downloader is None:
 636                         return
 637
 638                 username = None
 639                 password = None
 640                 downloader_params = self._downloader.params
 641
 642                 # Attempt to use provided username and password or .netrc data
 643                 if downloader_params.get('username', None) is not None:
 644                         username = downloader_params['username']
 645                         password = downloader_params['password']
 646                 elif downloader_params.get('usenetrc', False):
 647                         try:
 648                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 649                                 if info is not None:
 650                                         username = info[0]
 651                                         password = info[2]
 652                                 else:
 653                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 654                         except (IOError, netrc.NetrcParseError), err:
 655                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 656                                 return
 657
 658                 # Set language
 659                 request = urllib2.Request(self._LANG_URL, None, std_headers)
 660                 try:
 661                         self.report_lang()
 662                         urllib2.urlopen(request).read()
 663                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 664                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 665                         return
 666
 667                 # No authentication to be performed
 668                 if username is None:
 669                         return
 670
 671                 # Log in
 672                 login_form = {
 673                                 'current_form': 'loginForm',
 674                                 'next':         '/',
 675                                 'action_login': 'Log In',
 676                                 'username':     username,
 677                                 'password':     password,
 678                                 }
 679                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
 680                 try:
 681                         self.report_login()
 682                         login_results = urllib2.urlopen(request).read()
 683                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 684                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 685                                 return
 686                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 687                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 688                         return
 689
 690                 # Confirm age
 691                 age_form = {
 692                                 'next_url':             '/',
 693                                 'action_confirm':       'Confirm',
 694                                 }
 695                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
 696                 try:
 697                         self.report_age_confirmation()
 698                         age_results = urllib2.urlopen(request).read()
 699                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 700                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 701                         return
 702
 703         def _real_extract(self, url):
 704                 # Extract video id from URL
 705                 mobj = re.match(self._VALID_URL, url)
 706                 if mobj is None:
 707                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 708                         return
 709                 video_id = mobj.group(2)
 710
 711                 # Downloader parameters
 712                 best_quality = False
 713                 format_param = None
 714                 quality_index = 0
 715                 if self._downloader is not None:
 716                         params = self._downloader.params
 717                         format_param = params.get('format', None)
 718                         if format_param == '0':
 719                                 format_param = self._available_formats[quality_index]
 720                                 best_quality = True
 721
 722                 while True:
 723                         # Extension
 724                         video_extension = self._video_extensions.get(format_param, 'flv')
 725
 726                         # Get video info
 727                         video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id
 728                         request = urllib2.Request(video_info_url, None, std_headers)
 729                         try:
 730                                 self.report_video_info_webpage_download(video_id)
 731                                 video_info_webpage = urllib2.urlopen(request).read()
 732                                 video_info = urlparse.parse_qs(video_info_webpage)
 733                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 734                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
 735                                 return
 736                         self.report_information_extraction(video_id)
 737
 738                         # "t" param
 739                         if 'token' not in video_info:
 740                                 # Attempt to see if YouTube has issued an error message
 741                                 if 'reason' not in video_info:
 742                                         self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
 743                                         stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
 744                                         stream.write(video_info_webpage)
 745                                         stream.close()
 746                                 else:
 747                                         reason = urllib.unquote_plus(video_info['reason'][0])
 748                                         self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
 749                                 return
 750                         token = urllib.unquote_plus(video_info['token'][0])
 751                         video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
 752                         if format_param is not None:
 753                                 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 754
 755                         # Check possible RTMP download
 756                         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
 757                                 self.report_rtmp_download()
 758                                 video_real_url = video_info['conn'][0]
 759
 760                         # uploader
 761                         if 'author' not in video_info:
 762                                 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 763                                 return
 764                         video_uploader = urllib.unquote_plus(video_info['author'][0])
 765
 766                         # title
 767                         if 'title' not in video_info:
 768                                 self._downloader.trouble(u'ERROR: unable to extract video title')
 769                                 return
 770                         video_title = urllib.unquote_plus(video_info['title'][0])
 771                         video_title = video_title.decode('utf-8')
 772                         video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
 773                         video_title = video_title.replace(os.sep, u'%')
 774
 775                         # simplified title
 776                         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 777                         simple_title = simple_title.strip(ur'_')
 778
 779                         try:
 780                                 # Process video information
 781                                 self._downloader.process_info({
 782                                         'id':           video_id.decode('utf-8'),
 783                                         'url':          video_real_url.decode('utf-8'),
 784                                         'uploader':     video_uploader.decode('utf-8'),
 785                                         'title':        video_title,
 786                                         'stitle':       simple_title,
 787                                         'ext':          video_extension.decode('utf-8'),
 788                                 })
 789
 790                                 return
 791
 792                         except UnavailableFormatError, err:
 793                                 if best_quality:
 794                                         if quality_index == len(self._available_formats) - 1:
 795                                                 # I don't ever expect this to happen
 796                                                 self._downloader.trouble(u'ERROR: no known formats available for video')
 797                                                 return
 798                                         else:
 799                                                 self.report_unavailable_format(video_id, format_param)
 800                                                 quality_index += 1
 801                                                 format_param = self._available_formats[quality_index]
 802                                                 continue
 803                                 else:
 804                                         self._downloader.trouble('ERROR: format not available for video')
 805                                         return
 806
 807
 808 class MetacafeIE(InfoExtractor):
 809         """Information Extractor for metacafe.com."""
 810
 811         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
 812         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
 813         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
 814         _youtube_ie = None
 815
 816         def __init__(self, youtube_ie, downloader=None):
 817                 InfoExtractor.__init__(self, downloader)
 818                 self._youtube_ie = youtube_ie
 819
 820         @staticmethod
 821         def suitable(url):
 822                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
 823
 824         def report_disclaimer(self):
 825                 """Report disclaimer retrieval."""
 826                 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
 827
 828         def report_age_confirmation(self):
 829                 """Report attempt to confirm age."""
 830                 self._downloader.to_stdout(u'[metacafe] Confirming age')
 831
 832         def report_download_webpage(self, video_id):
 833                 """Report webpage download."""
 834                 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
 835
 836         def report_extraction(self, video_id):
 837                 """Report information extraction."""
 838                 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
 839
 840         def _real_initialize(self):
 841                 # Retrieve disclaimer
 842                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
 843                 try:
 844                         self.report_disclaimer()
 845                         disclaimer = urllib2.urlopen(request).read()
 846                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 847                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
 848                         return
 849
 850                 # Confirm age
 851                 disclaimer_form = {
 852                         'filters': '0',
 853                         'submit': "Continue - I'm over 18",
 854                         }
 855                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
 856                 try:
 857                         self.report_age_confirmation()
 858                         disclaimer = urllib2.urlopen(request).read()
 859                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 860                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 861                         return
 862
 863         def _real_extract(self, url):
 864                 # Extract id and simplified title from URL
 865                 mobj = re.match(self._VALID_URL, url)
 866                 if mobj is None:
 867                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 868                         return
 869
 870                 video_id = mobj.group(1)
 871
 872                 # Check if video comes from YouTube
 873                 mobj2 = re.match(r'^yt-(.*)$', video_id)
 874                 if mobj2 is not None:
 875                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
 876                         return
 877
 878                 simple_title = mobj.group(2).decode('utf-8')
 879                 video_extension = 'flv'
 880
 881                 # Retrieve video webpage to extract further information
 882                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
 883                 try:
 884                         self.report_download_webpage(video_id)
 885                         webpage = urllib2.urlopen(request).read()
 886                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 887                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
 888                         return
 889
 890                 # Extract URL, uploader and title from webpage
 891                 self.report_extraction(video_id)
 892                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
 893                 if mobj is None:
 894                         self._downloader.trouble(u'ERROR: unable to extract media URL')
 895                         return
 896                 mediaURL = urllib.unquote(mobj.group(1))
 897
 898                 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
 899                 #if mobj is None:
 900                 #       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
 901                 #       return
 902                 #gdaKey = mobj.group(1)
 903                 #
 904                 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
 905
 906                 video_url = mediaURL
 907
 908                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
 909                 if mobj is None:
 910                         self._downloader.trouble(u'ERROR: unable to extract title')
 911                         return
 912                 video_title = mobj.group(1).decode('utf-8')
 913
 914                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
 915                 if mobj is None:
 916                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 917                         return
 918                 video_uploader = mobj.group(1)
 919
 920                 try:
 921                         # Process video information
 922                         self._downloader.process_info({
 923                                 'id':           video_id.decode('utf-8'),
 924                                 'url':          video_url.decode('utf-8'),
 925                                 'uploader':     video_uploader.decode('utf-8'),
 926                                 'title':        video_title,
 927                                 'stitle':       simple_title,
 928                                 'ext':          video_extension.decode('utf-8'),
 929                         })
 930                 except UnavailableFormatError:
 931                         self._downloader.trouble(u'ERROR: format not available for video')
 932
 933
 934 class YoutubeSearchIE(InfoExtractor):
 935         """Information Extractor for YouTube search queries."""
 936         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
 937         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
 938         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
 939         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
 940         _youtube_ie = None
 941         _max_youtube_results = 1000
 942
 943         def __init__(self, youtube_ie, downloader=None):
 944                 InfoExtractor.__init__(self, downloader)
 945                 self._youtube_ie = youtube_ie
 946
 947         @staticmethod
 948         def suitable(url):
 949                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
 950
 951         def report_download_page(self, query, pagenum):
 952                 """Report attempt to download playlist page with given number."""
 953                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
 954
 955         def _real_initialize(self):
 956                 self._youtube_ie.initialize()
 957
 958         def _real_extract(self, query):
 959                 mobj = re.match(self._VALID_QUERY, query)
 960                 if mobj is None:
 961                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
 962                         return
 963
 964                 prefix, query = query.split(':')
 965                 prefix = prefix[8:]
 966                 if prefix == '':
 967                         self._download_n_results(query, 1)
 968                         return
 969                 elif prefix == 'all':
 970                         self._download_n_results(query, self._max_youtube_results)
 971                         return
 972                 else:
 973                         try:
 974                                 n = long(prefix)
 975                                 if n <= 0:
 976                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 977                                         return
 978                                 elif n > self._max_youtube_results:
 979                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
 980                                         n = self._max_youtube_results
 981                                 self._download_n_results(query, n)
 982                                 return
 983                         except ValueError: # parsing prefix as integer fails
 984                                 self._download_n_results(query, 1)
 985                                 return
 986
 987         def _download_n_results(self, query, n):
 988                 """Downloads a specified number of results for a query"""
 989
 990                 video_ids = []
 991                 already_seen = set()
 992                 pagenum = 1
 993
 994                 while True:
 995                         self.report_download_page(query, pagenum)
 996                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
 997                         request = urllib2.Request(result_url, None, std_headers)
 998                         try:
 999                                 page = urllib2.urlopen(request).read()
1000                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1001                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1002                                 return
1003
1004                         # Extract video identifiers
1005                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1006                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1007                                 if video_id not in already_seen:
1008                                         video_ids.append(video_id)
1009                                         already_seen.add(video_id)
1010                                         if len(video_ids) == n:
1011                                                 # Specified n videos reached
1012                                                 for id in video_ids:
1013                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1014                                                 return
1015
1016                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1017                                 for id in video_ids:
1018                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1019                                 return
1020
1021                         pagenum = pagenum + 1
1022
1023 class YoutubePlaylistIE(InfoExtractor):
1024         """Information Extractor for YouTube playlists."""
1025
1026         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
1027         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1028         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1029         _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
1030         _youtube_ie = None
1031
1032         def __init__(self, youtube_ie, downloader=None):
1033                 InfoExtractor.__init__(self, downloader)
1034                 self._youtube_ie = youtube_ie
1035
1036         @staticmethod
1037         def suitable(url):
1038                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1039
1040         def report_download_page(self, playlist_id, pagenum):
1041                 """Report attempt to download playlist page with given number."""
1042                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1043
1044         def _real_initialize(self):
1045                 self._youtube_ie.initialize()
1046
1047         def _real_extract(self, url):
1048                 # Extract playlist id
1049                 mobj = re.match(self._VALID_URL, url)
1050                 if mobj is None:
1051                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1052                         return
1053
1054                 # Download playlist pages
1055                 playlist_id = mobj.group(1)
1056                 video_ids = []
1057                 pagenum = 1
1058
1059                 while True:
1060                         self.report_download_page(playlist_id, pagenum)
1061                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1062                         try:
1063                                 page = urllib2.urlopen(request).read()
1064                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1065                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1066                                 return
1067
1068                         # Extract video identifiers
1069                         ids_in_page = []
1070                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1071                                 if mobj.group(1) not in ids_in_page:
1072                                         ids_in_page.append(mobj.group(1))
1073                         video_ids.extend(ids_in_page)
1074
1075                         if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
1076                                 break
1077                         pagenum = pagenum + 1
1078
1079                 for id in video_ids:
1080                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1081                 return
1082
1083 class YoutubeUserIE(InfoExtractor):
1084         """Information Extractor for YouTube users."""
1085
1086         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1087         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1088         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1089         _youtube_ie = None
1090
1091         def __init__(self, youtube_ie, downloader=None):
1092                 InfoExtractor.__init__(self, downloader)
1093                 self._youtube_ie = youtube_ie
1094
1095         @staticmethod
1096         def suitable(url):
1097                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1098
1099         def report_download_page(self, username):
1100                 """Report attempt to download user page."""
1101                 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1102
1103         def _real_initialize(self):
1104                 self._youtube_ie.initialize()
1105
1106         def _real_extract(self, url):
1107                 # Extract username
1108                 mobj = re.match(self._VALID_URL, url)
1109                 if mobj is None:
1110                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1111                         return
1112
1113                 # Download user page
1114                 username = mobj.group(1)
1115                 video_ids = []
1116                 pagenum = 1
1117
1118                 self.report_download_page(username)
1119                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1120                 try:
1121                         page = urllib2.urlopen(request).read()
1122                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1123                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1124                         return
1125
1126                 # Extract video identifiers
1127                 ids_in_page = []
1128
1129                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1130                         if mobj.group(1) not in ids_in_page:
1131                                 ids_in_page.append(mobj.group(1))
1132                 video_ids.extend(ids_in_page)
1133
1134                 for id in video_ids:
1135                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1136                 return
1137
1138 class PostProcessor(object):
1139         """Post Processor class.
1140
1141         PostProcessor objects can be added to downloaders with their
1142         add_post_processor() method. When the downloader has finished a
1143         successful download, it will take its internal chain of PostProcessors
1144         and start calling the run() method on each one of them, first with
1145         an initial argument and then with the returned value of the previous
1146         PostProcessor.
1147
1148         The chain will be stopped if one of them ever returns None or the end
1149         of the chain is reached.
1150
1151         PostProcessor objects follow a "mutual registration" process similar
1152         to InfoExtractor objects.
1153         """
1154
1155         _downloader = None
1156
1157         def __init__(self, downloader=None):
1158                 self._downloader = downloader
1159
1160         def set_downloader(self, downloader):
1161                 """Sets the downloader for this PP."""
1162                 self._downloader = downloader
1163
1164         def run(self, information):
1165                 """Run the PostProcessor.
1166
1167                 The "information" argument is a dictionary like the ones
1168                 composed by InfoExtractors. The only difference is that this
1169                 one has an extra field called "filepath" that points to the
1170                 downloaded file.
1171
1172                 When this method returns None, the postprocessing chain is
1173                 stopped. However, this method may return an information
1174                 dictionary that will be passed to the next postprocessing
1175                 object in the chain. It can be the one it received after
1176                 changing some fields.
1177
1178                 In addition, this method may raise a PostProcessingError
1179                 exception that will be taken into account by the downloader
1180                 it was called from.
1181                 """
1182                 return information # by default, do nothing
1183
1184 ### MAIN PROGRAM ###
1185 if __name__ == '__main__':
1186         try:
1187                 # Modules needed only when running the main program
1188                 import getpass
1189                 import optparse
1190
1191                 # Function to update the program file with the latest version from bitbucket.org
1192                 def update_self(downloader, filename):
1193                         # Note: downloader only used for options
1194                         if not os.access (filename, os.W_OK):
1195                                 sys.exit('ERROR: no write permissions on %s' % filename)
1196
1197                         downloader.to_stdout('Updating to latest stable version...')
1198                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1199                         latest_version = urllib.urlopen(latest_url).read().strip()
1200                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1201                         newcontent = urllib.urlopen(prog_url).read()
1202                         stream = open(filename, 'w')
1203                         stream.write(newcontent)
1204                         stream.close()
1205                         downloader.to_stdout('Updated to version %s' % latest_version)
1206
1207                 # General configuration
1208                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1209                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1210                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1211
1212                 # Parse command line
1213                 parser = optparse.OptionParser(
1214                         usage='Usage: %prog [options] url...',
1215                         version='2010.01.05',
1216                         conflict_handler='resolve',
1217                 )
1218
1219                 parser.add_option('-h', '--help',
1220                                 action='help', help='print this help text and exit')
1221                 parser.add_option('-v', '--version',
1222                                 action='version', help='print program version and exit')
1223                 parser.add_option('-U', '--update',
1224                                 action='store_true', dest='update_self', help='update this program to latest stable version')
1225                 parser.add_option('-i', '--ignore-errors',
1226                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1227                 parser.add_option('-r', '--rate-limit',
1228                                 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1229
1230                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1231                 authentication.add_option('-u', '--username',
1232                                 dest='username', metavar='UN', help='account username')
1233                 authentication.add_option('-p', '--password',
1234                                 dest='password', metavar='PW', help='account password')
1235                 authentication.add_option('-n', '--netrc',
1236                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1237                 parser.add_option_group(authentication)
1238
1239                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1240                 video_format.add_option('-f', '--format',
1241                                 action='store', dest='format', metavar='FMT', help='video format code')
1242                 video_format.add_option('-b', '--best-quality',
1243                                 action='store_const', dest='format', help='download the best quality video possible', const='0')
1244                 video_format.add_option('-m', '--mobile-version',
1245                                 action='store_const', dest='format', help='alias for -f 17', const='17')
1246                 video_format.add_option('-d', '--high-def',
1247                                 action='store_const', dest='format', help='alias for -f 22', const='22')
1248                 parser.add_option_group(video_format)
1249
1250                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
1251                 verbosity.add_option('-q', '--quiet',
1252                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
1253                 verbosity.add_option('-s', '--simulate',
1254                                 action='store_true', dest='simulate', help='do not download video', default=False)
1255                 verbosity.add_option('-g', '--get-url',
1256                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
1257                 verbosity.add_option('-e', '--get-title',
1258                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
1259                 parser.add_option_group(verbosity)
1260
1261                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1262                 filesystem.add_option('-t', '--title',
1263                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
1264                 filesystem.add_option('-l', '--literal',
1265                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
1266                 filesystem.add_option('-o', '--output',
1267                                 dest='outtmpl', metavar='TPL', help='output filename template')
1268                 filesystem.add_option('-a', '--batch-file',
1269                                 dest='batchfile', metavar='F', help='file containing URLs to download')
1270                 filesystem.add_option('-w', '--no-overwrites',
1271                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
1272                 filesystem.add_option('-c', '--continue',
1273                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
1274                 parser.add_option_group(filesystem)
1275
1276                 (opts, args) = parser.parse_args()
1277
1278                 # Batch file verification
1279                 batchurls = []
1280                 if opts.batchfile is not None:
1281                         try:
1282                                 batchurls = open(opts.batchfile, 'r').readlines()
1283                                 batchurls = [x.strip() for x in batchurls]
1284                                 batchurls = [x for x in batchurls if len(x) > 0]
1285                         except IOError:
1286                                 sys.exit(u'ERROR: batch file could not be read')
1287                 all_urls = batchurls + args
1288
1289                 # Conflicting, missing and erroneous options
1290                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
1291                         parser.error(u'using .netrc conflicts with giving username/password')
1292                 if opts.password is not None and opts.username is None:
1293                         parser.error(u'account username missing')
1294                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
1295                         parser.error(u'using output template conflicts with using title or literal title')
1296                 if opts.usetitle and opts.useliteral:
1297                         parser.error(u'using title conflicts with using literal title')
1298                 if opts.username is not None and opts.password is None:
1299                         opts.password = getpass.getpass(u'Type account password and press return:')
1300                 if opts.ratelimit is not None:
1301                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
1302                         if numeric_limit is None:
1303                                 parser.error(u'invalid rate limit specified')
1304                         opts.ratelimit = numeric_limit
1305
1306                 # Information extractors
1307                 youtube_ie = YoutubeIE()
1308                 metacafe_ie = MetacafeIE(youtube_ie)
1309                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
1310                 youtube_user_ie = YoutubeUserIE(youtube_ie)
1311                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
1312
1313                 # File downloader
1314                 fd = FileDownloader({
1315                         'usenetrc': opts.usenetrc,
1316                         'username': opts.username,
1317                         'password': opts.password,
1318                         'quiet': (opts.quiet or opts.geturl or opts.gettitle),
1319                         'forceurl': opts.geturl,
1320                         'forcetitle': opts.gettitle,
1321                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
1322                         'format': opts.format,
1323                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
1324                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
1325                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
1326                                 or u'%(id)s.%(ext)s'),
1327                         'ignoreerrors': opts.ignoreerrors,
1328                         'ratelimit': opts.ratelimit,
1329                         'nooverwrites': opts.nooverwrites,
1330                         'continuedl': opts.continue_dl,
1331                         })
1332                 fd.add_info_extractor(youtube_search_ie)
1333                 fd.add_info_extractor(youtube_pl_ie)
1334                 fd.add_info_extractor(youtube_user_ie)
1335                 fd.add_info_extractor(metacafe_ie)
1336                 fd.add_info_extractor(youtube_ie)
1337
1338                 # Update version
1339                 if opts.update_self:
1340                         update_self(fd, sys.argv[0])
1341
1342                 # Maybe do nothing
1343                 if len(all_urls) < 1:
1344                         if not opts.update_self:
1345                                 parser.error(u'you must provide at least one URL')
1346                         else:
1347                                 sys.exit()
1348                 retcode = fd.download(all_urls)
1349                 sys.exit(retcode)
1350
1351         except DownloadError:
1352                 sys.exit(1)
1353         except SameFileError:
1354                 sys.exit(u'ERROR: fixed output name but more than one file to download')
1355         except KeyboardInterrupt:
1356                 sys.exit(u'\nERROR: Interrupted by user')