youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 __author__  = (
   5         'Ricardo Garcia Gonzalez',
   6         'Danny Colligan',
   7         'Benjamin Johnson',
   8         'Vasyl\' Vavrychuk',
   9         'Witold Baryluk',
  10         'Paweł Paprota',
  11         'Gergely Imreh',
  12         'Rogério Brito',
  13         'Philipp Hagemeister',
  14         'Sören Schulze',
  15         )
  16
  17 __license__ = 'Public Domain'
  18 __version__ = '2011.09.06-phihag'
  19
  20 UPDATE_URL = 'https://raw.github.com/phihag/youtube-dl/master/youtube-dl'
  21
  22 import cookielib
  23 import datetime
  24 import gzip
  25 import htmlentitydefs
  26 import httplib
  27 import locale
  28 import math
  29 import netrc
  30 import os
  31 import os.path
  32 import re
  33 import socket
  34 import string
  35 import subprocess
  36 import sys
  37 import time
  38 import urllib
  39 import urllib2
  40 import warnings
  41 import zlib
  42
  43 if os.name == 'nt':
  44         import ctypes
  45
  46 try:
  47         import email.utils
  48 except ImportError: # Python 2.4
  49         import email.Utils
  50 try:
  51         import cStringIO as StringIO
  52 except ImportError:
  53         import StringIO
  54
  55 # parse_qs was moved from the cgi module to the urlparse module recently.
  56 try:
  57         from urlparse import parse_qs
  58 except ImportError:
  59         from cgi import parse_qs
  60
  61 try:
  62         import lxml.etree
  63 except ImportError:
  64         pass # Handled below
  65
  66 std_headers = {
  67         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
  68         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  69         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  70         'Accept-Encoding': 'gzip, deflate',
  71         'Accept-Language': 'en-us,en;q=0.5',
  72 }
  73
  74 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  75
  76 try:
  77         import json
  78 except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
  79         import re
  80         class json(object):
  81                 @staticmethod
  82                 def loads(s):
  83                         s = s.decode('UTF-8')
  84                         def raiseError(msg, i):
  85                                 raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
  86                         def skipSpace(i, expectMore=True):
  87                                 while i < len(s) and s[i] in ' \t\r\n':
  88                                         i += 1
  89                                 if expectMore:
  90                                         if i >= len(s):
  91                                                 raiseError('Premature end', i)
  92                                 return i
  93                         def decodeEscape(match):
  94                                 esc = match.group(1)
  95                                 _STATIC = {
  96                                         '"': '"',
  97                                         '\\': '\\',
  98                                         '/': '/',
  99                                         'b': unichr(0x8),
 100                                         'f': unichr(0xc),
 101                                         'n': '\n',
 102                                         'r': '\r',
 103                                         't': '\t',
 104                                 }
 105                                 if esc in _STATIC:
 106                                         return _STATIC[esc]
 107                                 if esc[0] == 'u':
 108                                         if len(esc) == 1+4:
 109                                                 return unichr(int(esc[1:5], 16))
 110                                         if len(esc) == 5+6 and esc[5:7] == '\\u':
 111                                                 hi = int(esc[1:5], 16)
 112                                                 low = int(esc[7:11], 16)
 113                                                 return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
 114                                 raise ValueError('Unknown escape ' + str(esc))
 115                         def parseString(i):
 116                                 i += 1
 117                                 e = i
 118                                 while True:
 119                                         e = s.index('"', e)
 120                                         bslashes = 0
 121                                         while s[e-bslashes-1] == '\\':
 122                                                 bslashes += 1
 123                                         if bslashes % 2 == 1:
 124                                                 e += 1
 125                                                 continue
 126                                         break
 127                                 rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
 128                                 stri = rexp.sub(decodeEscape, s[i:e])
 129                                 return (e+1,stri)
 130                         def parseObj(i):
 131                                 i += 1
 132                                 res = {}
 133                                 i = skipSpace(i)
 134                                 if s[i] == '}': # Empty dictionary
 135                                         return (i+1,res)
 136                                 while True:
 137                                         if s[i] != '"':
 138                                                 raiseError('Expected a string object key', i)
 139                                         i,key = parseString(i)
 140                                         i = skipSpace(i)
 141                                         if i >= len(s) or s[i] != ':':
 142                                                 raiseError('Expected a colon', i)
 143                                         i,val = parse(i+1)
 144                                         res[key] = val
 145                                         i = skipSpace(i)
 146                                         if s[i] == '}':
 147                                                 return (i+1, res)
 148                                         if s[i] != ',':
 149                                                 raiseError('Expected comma or closing curly brace', i)
 150                                         i = skipSpace(i+1)
 151                         def parseArray(i):
 152                                 res = []
 153                                 i = skipSpace(i+1)
 154                                 if s[i] == ']': # Empty array
 155                                         return (i+1,res)
 156                                 while True:
 157                                         i,val = parse(i)
 158                                         res.append(val)
 159                                         i = skipSpace(i) # Raise exception if premature end
 160                                         if s[i] == ']':
 161                                                 return (i+1, res)
 162                                         if s[i] != ',':
 163                                                 raiseError('Expected a comma or closing bracket', i)
 164                                         i = skipSpace(i+1)
 165                         def parseDiscrete(i):
 166                                 for k,v in {'true': True, 'false': False, 'null': None}.items():
 167                                         if s.startswith(k, i):
 168                                                 return (i+len(k), v)
 169                                 raiseError('Not a boolean (or null)', i)
 170                         def parseNumber(i):
 171                                 mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
 172                                 if mobj is None:
 173                                         raiseError('Not a number', i)
 174                                 nums = mobj.group(1)
 175                                 if '.' in nums or 'e' in nums or 'E' in nums:
 176                                         return (i+len(nums), float(nums))
 177                                 return (i+len(nums), int(nums))
 178                         CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
 179                         def parse(i):
 180                                 i = skipSpace(i)
 181                                 i,res = CHARMAP.get(s[i], parseNumber)(i)
 182                                 i = skipSpace(i, False)
 183                                 return (i,res)
 184                         i,res = parse(0)
 185                         if i < len(s):
 186                                 raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
 187                         return res
 188
 189 def preferredencoding():
 190         """Get preferred encoding.
 191
 192         Returns the best encoding scheme for the system, based on
 193         locale.getpreferredencoding() and some further tweaks.
 194         """
 195         def yield_preferredencoding():
 196                 try:
 197                         pref = locale.getpreferredencoding()
 198                         u'TEST'.encode(pref)
 199                 except:
 200                         pref = 'UTF-8'
 201                 while True:
 202                         yield pref
 203         return yield_preferredencoding().next()
 204
 205
 206 def htmlentity_transform(matchobj):
 207         """Transforms an HTML entity to a Unicode character.
 208
 209         This function receives a match object and is intended to be used with
 210         the re.sub() function.
 211         """
 212         entity = matchobj.group(1)
 213
 214         # Known non-numeric HTML entity
 215         if entity in htmlentitydefs.name2codepoint:
 216                 return unichr(htmlentitydefs.name2codepoint[entity])
 217
 218         # Unicode character
 219         mobj = re.match(ur'(?u)#(x?\d+)', entity)
 220         if mobj is not None:
 221                 numstr = mobj.group(1)
 222                 if numstr.startswith(u'x'):
 223                         base = 16
 224                         numstr = u'0%s' % numstr
 225                 else:
 226                         base = 10
 227                 return unichr(long(numstr, base))
 228
 229         # Unknown entity in name, return its literal representation
 230         return (u'&%s;' % entity)
 231
 232
 233 def sanitize_title(utitle):
 234         """Sanitizes a video title so it could be used as part of a filename."""
 235         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
 236         return utitle.replace(unicode(os.sep), u'%')
 237
 238
 239 def sanitize_open(filename, open_mode):
 240         """Try to open the given filename, and slightly tweak it if this fails.
 241
 242         Attempts to open the given filename. If this fails, it tries to change
 243         the filename slightly, step by step, until it's either able to open it
 244         or it fails and raises a final exception, like the standard open()
 245         function.
 246
 247         It returns the tuple (stream, definitive_file_name).
 248         """
 249         try:
 250                 if filename == u'-':
 251                         if sys.platform == 'win32':
 252                                 import msvcrt
 253                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 254                         return (sys.stdout, filename)
 255                 stream = open(filename, open_mode)
 256                 return (stream, filename)
 257         except (IOError, OSError), err:
 258                 # In case of error, try to remove win32 forbidden chars
 259                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 260
 261                 # An exception here should be caught in the caller
 262                 stream = open(filename, open_mode)
 263                 return (stream, filename)
 264
 265
 266 def timeconvert(timestr):
 267         """Convert RFC 2822 defined time string into system timestamp"""
 268         timestamp = None
 269         timetuple = email.utils.parsedate_tz(timestr)
 270         if timetuple is not None:
 271                 timestamp = email.utils.mktime_tz(timetuple)
 272         return timestamp
 273
 274
 275 class DownloadError(Exception):
 276         """Download Error exception.
 277
 278         This exception may be thrown by FileDownloader objects if they are not
 279         configured to continue on errors. They will contain the appropriate
 280         error message.
 281         """
 282         pass
 283
 284
 285 class SameFileError(Exception):
 286         """Same File exception.
 287
 288         This exception will be thrown by FileDownloader objects if they detect
 289         multiple files would have to be downloaded to the same file on disk.
 290         """
 291         pass
 292
 293
 294 class PostProcessingError(Exception):
 295         """Post Processing exception.
 296
 297         This exception may be raised by PostProcessor's .run() method to
 298         indicate an error in the postprocessing task.
 299         """
 300         pass
 301
 302
 303 class UnavailableVideoError(Exception):
 304         """Unavailable Format exception.
 305
 306         This exception will be thrown when a video is requested
 307         in a format that is not available for that video.
 308         """
 309         pass
 310
 311
 312 class ContentTooShortError(Exception):
 313         """Content Too Short exception.
 314
 315         This exception may be raised by FileDownloader objects when a file they
 316         download is too small for what the server announced first, indicating
 317         the connection was probably interrupted.
 318         """
 319         # Both in bytes
 320         downloaded = None
 321         expected = None
 322
 323         def __init__(self, downloaded, expected):
 324                 self.downloaded = downloaded
 325                 self.expected = expected
 326
 327
 328 class YoutubeDLHandler(urllib2.HTTPHandler):
 329         """Handler for HTTP requests and responses.
 330
 331         This class, when installed with an OpenerDirector, automatically adds
 332         the standard headers to every HTTP request and handles gzipped and
 333         deflated responses from web servers. If compression is to be avoided in
 334         a particular request, the original request in the program code only has
 335         to include the HTTP header "Youtubedl-No-Compression", which will be
 336         removed before making the real request.
 337
 338         Part of this code was copied from:
 339
 340         http://techknack.net/python-urllib2-handlers/
 341
 342         Andrew Rowls, the author of that code, agreed to release it to the
 343         public domain.
 344         """
 345
 346         @staticmethod
 347         def deflate(data):
 348                 try:
 349                         return zlib.decompress(data, -zlib.MAX_WBITS)
 350                 except zlib.error:
 351                         return zlib.decompress(data)
 352
 353         @staticmethod
 354         def addinfourl_wrapper(stream, headers, url, code):
 355                 if hasattr(urllib2.addinfourl, 'getcode'):
 356                         return urllib2.addinfourl(stream, headers, url, code)
 357                 ret = urllib2.addinfourl(stream, headers, url)
 358                 ret.code = code
 359                 return ret
 360
 361         def http_request(self, req):
 362                 for h in std_headers:
 363                         if h in req.headers:
 364                                 del req.headers[h]
 365                         req.add_header(h, std_headers[h])
 366                 if 'Youtubedl-no-compression' in req.headers:
 367                         if 'Accept-encoding' in req.headers:
 368                                 del req.headers['Accept-encoding']
 369                         del req.headers['Youtubedl-no-compression']
 370                 return req
 371
 372         def http_response(self, req, resp):
 373                 old_resp = resp
 374                 # gzip
 375                 if resp.headers.get('Content-encoding', '') == 'gzip':
 376                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
 377                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 378                         resp.msg = old_resp.msg
 379                 # deflate
 380                 if resp.headers.get('Content-encoding', '') == 'deflate':
 381                         gz = StringIO.StringIO(self.deflate(resp.read()))
 382                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 383                         resp.msg = old_resp.msg
 384                 return resp
 385
 386
 387 class FileDownloader(object):
 388         """File Downloader class.
 389
 390         File downloader objects are the ones responsible of downloading the
 391         actual video file and writing it to disk if the user has requested
 392         it, among some other tasks. In most cases there should be one per
 393         program. As, given a video URL, the downloader doesn't know how to
 394         extract all the needed information, task that InfoExtractors do, it
 395         has to pass the URL to one of them.
 396
 397         For this, file downloader objects have a method that allows
 398         InfoExtractors to be registered in a given order. When it is passed
 399         a URL, the file downloader handles it to the first InfoExtractor it
 400         finds that reports being able to handle it. The InfoExtractor extracts
 401         all the information about the video or videos the URL refers to, and
 402         asks the FileDownloader to process the video information, possibly
 403         downloading the video.
 404
 405         File downloaders accept a lot of parameters. In order not to saturate
 406         the object constructor with arguments, it receives a dictionary of
 407         options instead. These options are available through the params
 408         attribute for the InfoExtractors to use. The FileDownloader also
 409         registers itself as the downloader in charge for the InfoExtractors
 410         that are added to it, so this is a "mutual registration".
 411
 412         Available options:
 413
 414         username:         Username for authentication purposes.
 415         password:         Password for authentication purposes.
 416         usenetrc:         Use netrc for authentication instead.
 417         quiet:            Do not print messages to stdout.
 418         forceurl:         Force printing final URL.
 419         forcetitle:       Force printing title.
 420         forcethumbnail:   Force printing thumbnail URL.
 421         forcedescription: Force printing description.
 422         forcefilename:    Force printing final filename.
 423         simulate:         Do not download the video files.
 424         format:           Video format code.
 425         format_limit:     Highest quality format to try.
 426         outtmpl:          Template for output names.
 427         ignoreerrors:     Do not stop on download errors.
 428         ratelimit:        Download speed limit, in bytes/sec.
 429         nooverwrites:     Prevent overwriting files.
 430         retries:          Number of times to retry for HTTP error 5xx
 431         continuedl:       Try to continue downloads if possible.
 432         noprogress:       Do not print the progress bar.
 433         playliststart:    Playlist item to start at.
 434         playlistend:      Playlist item to end at.
 435         logtostderr:      Log messages to stderr instead of stdout.
 436         consoletitle:     Display progress in console window's titlebar.
 437         nopart:           Do not use temporary .part files.
 438         updatetime:       Use the Last-modified header to set output file timestamps.
 439         writedescription: Write the video description to a .description file
 440         writeinfojson:    Write the video description to a .info.json file
 441         """
 442
 443         params = None
 444         _ies = []
 445         _pps = []
 446         _download_retcode = None
 447         _num_downloads = None
 448         _screen_file = None
 449
 450         def __init__(self, params):
 451                 """Create a FileDownloader object with the given options."""
 452                 self._ies = []
 453                 self._pps = []
 454                 self._download_retcode = 0
 455                 self._num_downloads = 0
 456                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 457                 self.params = params
 458
 459         @staticmethod
 460         def format_bytes(bytes):
 461                 if bytes is None:
 462                         return 'N/A'
 463                 if type(bytes) is str:
 464                         bytes = float(bytes)
 465                 if bytes == 0.0:
 466                         exponent = 0
 467                 else:
 468                         exponent = long(math.log(bytes, 1024.0))
 469                 suffix = 'bkMGTPEZY'[exponent]
 470                 converted = float(bytes) / float(1024 ** exponent)
 471                 return '%.2f%s' % (converted, suffix)
 472
 473         @staticmethod
 474         def calc_percent(byte_counter, data_len):
 475                 if data_len is None:
 476                         return '---.-%'
 477                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 478
 479         @staticmethod
 480         def calc_eta(start, now, total, current):
 481                 if total is None:
 482                         return '--:--'
 483                 dif = now - start
 484                 if current == 0 or dif < 0.001: # One millisecond
 485                         return '--:--'
 486                 rate = float(current) / dif
 487                 eta = long((float(total) - float(current)) / rate)
 488                 (eta_mins, eta_secs) = divmod(eta, 60)
 489                 if eta_mins > 99:
 490                         return '--:--'
 491                 return '%02d:%02d' % (eta_mins, eta_secs)
 492
 493         @staticmethod
 494         def calc_speed(start, now, bytes):
 495                 dif = now - start
 496                 if bytes == 0 or dif < 0.001: # One millisecond
 497                         return '%10s' % '---b/s'
 498                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 499
 500         @staticmethod
 501         def best_block_size(elapsed_time, bytes):
 502                 new_min = max(bytes / 2.0, 1.0)
 503                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 504                 if elapsed_time < 0.001:
 505                         return long(new_max)
 506                 rate = bytes / elapsed_time
 507                 if rate > new_max:
 508                         return long(new_max)
 509                 if rate < new_min:
 510                         return long(new_min)
 511                 return long(rate)
 512
 513         @staticmethod
 514         def parse_bytes(bytestr):
 515                 """Parse a string indicating a byte quantity into a long integer."""
 516                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 517                 if matchobj is None:
 518                         return None
 519                 number = float(matchobj.group(1))
 520                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 521                 return long(round(number * multiplier))
 522
 523         def add_info_extractor(self, ie):
 524                 """Add an InfoExtractor object to the end of the list."""
 525                 self._ies.append(ie)
 526                 ie.set_downloader(self)
 527
 528         def add_post_processor(self, pp):
 529                 """Add a PostProcessor object to the end of the chain."""
 530                 self._pps.append(pp)
 531                 pp.set_downloader(self)
 532
 533         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
 534                 """Print message to stdout if not in quiet mode."""
 535                 try:
 536                         if not self.params.get('quiet', False):
 537                                 terminator = [u'\n', u''][skip_eol]
 538                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
 539                         self._screen_file.flush()
 540                 except (UnicodeEncodeError), err:
 541                         if not ignore_encoding_errors:
 542                                 raise
 543
 544         def to_stderr(self, message):
 545                 """Print message to stderr."""
 546                 print >>sys.stderr, message.encode(preferredencoding())
 547
 548         def to_cons_title(self, message):
 549                 """Set console/terminal window title to message."""
 550                 if not self.params.get('consoletitle', False):
 551                         return
 552                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 553                         # c_wchar_p() might not be necessary if `message` is
 554                         # already of type unicode()
 555                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 556                 elif 'TERM' in os.environ:
 557                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 558
 559         def fixed_template(self):
 560                 """Checks if the output template is fixed."""
 561                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 562
 563         def trouble(self, message=None):
 564                 """Determine action to take when a download problem appears.
 565
 566                 Depending on if the downloader has been configured to ignore
 567                 download errors or not, this method may throw an exception or
 568                 not when errors are found, after printing the message.
 569                 """
 570                 if message is not None:
 571                         self.to_stderr(message)
 572                 if not self.params.get('ignoreerrors', False):
 573                         raise DownloadError(message)
 574                 self._download_retcode = 1
 575
 576         def slow_down(self, start_time, byte_counter):
 577                 """Sleep if the download speed is over the rate limit."""
 578                 rate_limit = self.params.get('ratelimit', None)
 579                 if rate_limit is None or byte_counter == 0:
 580                         return
 581                 now = time.time()
 582                 elapsed = now - start_time
 583                 if elapsed <= 0.0:
 584                         return
 585                 speed = float(byte_counter) / elapsed
 586                 if speed > rate_limit:
 587                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 588
 589         def temp_name(self, filename):
 590                 """Returns a temporary filename for the given filename."""
 591                 if self.params.get('nopart', False) or filename == u'-' or \
 592                                 (os.path.exists(filename) and not os.path.isfile(filename)):
 593                         return filename
 594                 return filename + u'.part'
 595
 596         def undo_temp_name(self, filename):
 597                 if filename.endswith(u'.part'):
 598                         return filename[:-len(u'.part')]
 599                 return filename
 600
 601         def try_rename(self, old_filename, new_filename):
 602                 try:
 603                         if old_filename == new_filename:
 604                                 return
 605                         os.rename(old_filename, new_filename)
 606                 except (IOError, OSError), err:
 607                         self.trouble(u'ERROR: unable to rename file')
 608
 609         def try_utime(self, filename, last_modified_hdr):
 610                 """Try to set the last-modified time of the given file."""
 611                 if last_modified_hdr is None:
 612                         return
 613                 if not os.path.isfile(filename):
 614                         return
 615                 timestr = last_modified_hdr
 616                 if timestr is None:
 617                         return
 618                 filetime = timeconvert(timestr)
 619                 if filetime is None:
 620                         return
 621                 try:
 622                         os.utime(filename, (time.time(), filetime))
 623                 except:
 624                         pass
 625
 626         def report_writedescription(self, descfn):
 627                 """ Report that the description file is being written """
 628                 self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
 629
 630         def report_writeinfojson(self, infofn):
 631                 """ Report that the metadata file has been written """
 632                 self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
 633
 634         def report_destination(self, filename):
 635                 """Report destination filename."""
 636                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
 637
 638         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 639                 """Report download progress."""
 640                 if self.params.get('noprogress', False):
 641                         return
 642                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 643                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 644                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 645                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 646
 647         def report_resuming_byte(self, resume_len):
 648                 """Report attempt to resume at given byte."""
 649                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 650
 651         def report_retry(self, count, retries):
 652                 """Report retry in case of HTTP error 5xx"""
 653                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 654
 655         def report_file_already_downloaded(self, file_name):
 656                 """Report file has already been fully downloaded."""
 657                 try:
 658                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
 659                 except (UnicodeEncodeError), err:
 660                         self.to_screen(u'[download] The file has already been downloaded')
 661
 662         def report_unable_to_resume(self):
 663                 """Report it was impossible to resume download."""
 664                 self.to_screen(u'[download] Unable to resume')
 665
 666         def report_finish(self):
 667                 """Report download finished."""
 668                 if self.params.get('noprogress', False):
 669                         self.to_screen(u'[download] Download completed')
 670                 else:
 671                         self.to_screen(u'')
 672
 673         def increment_downloads(self):
 674                 """Increment the ordinal that assigns a number to each file."""
 675                 self._num_downloads += 1
 676
 677         def prepare_filename(self, info_dict):
 678                 """Generate the output filename."""
 679                 try:
 680                         template_dict = dict(info_dict)
 681                         template_dict['epoch'] = unicode(long(time.time()))
 682                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 683                         filename = self.params['outtmpl'] % template_dict
 684                         return filename
 685                 except (ValueError, KeyError), err:
 686                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 687                         return None
 688
 689         def process_info(self, info_dict):
 690                 """Process a single dictionary returned by an InfoExtractor."""
 691                 filename = self.prepare_filename(info_dict)
 692                 # Do nothing else if in simulate mode
 693                 if self.params.get('simulate', False):
 694                         # Forced printings
 695                         if self.params.get('forcetitle', False):
 696                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 697                         if self.params.get('forceurl', False):
 698                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 699                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 700                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 701                         if self.params.get('forcedescription', False) and 'description' in info_dict:
 702                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 703                         if self.params.get('forcefilename', False) and filename is not None:
 704                                 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 705
 706                         return
 707
 708                 if filename is None:
 709                         return
 710                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 711                         self.to_stderr(u'WARNING: file exists and will be skipped')
 712                         return
 713
 714                 try:
 715                         dn = os.path.dirname(filename)
 716                         if dn != '' and not os.path.exists(dn):
 717                                 os.makedirs(dn)
 718                 except (OSError, IOError), err:
 719                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
 720                         return
 721
 722                 if self.params.get('writedescription', False):
 723                         try:
 724                                 descfn = filename + '.description'
 725                                 self.report_writedescription(descfn)
 726                                 descfile = open(descfn, 'wb')
 727                                 try:
 728                                         descfile.write(info_dict['description'].encode('utf-8'))
 729                                 finally:
 730                                         descfile.close()
 731                         except (OSError, IOError):
 732                                 self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn))
 733                                 return
 734
 735                 if self.params.get('writeinfojson', False):
 736                         infofn = filename + '.info.json'
 737                         self.report_writeinfojson(infofn)
 738                         try:
 739                                 json.dump
 740                         except (NameError,AttributeError):
 741                                 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
 742                                 return
 743                         try:
 744                                 infof = open(infofn, 'wb')
 745                                 try:
 746                                         json.dump(info_dict, infof)
 747                                 finally:
 748                                         infof.close()
 749                         except (OSError, IOError):
 750                                 self.trouble(u'ERROR: Cannot write metadata to JSON file: %s' % str(infofn))
 751                                 return
 752
 753                 try:
 754                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
 755                 except (OSError, IOError), err:
 756                         raise UnavailableVideoError
 757                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 758                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 759                         return
 760                 except (ContentTooShortError, ), err:
 761                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 762                         return
 763
 764                 if success:
 765                         try:
 766                                 self.post_process(filename, info_dict)
 767                         except (PostProcessingError), err:
 768                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
 769                                 return
 770
 771         def download(self, url_list):
 772                 """Download a given list of URLs."""
 773                 if len(url_list) > 1 and self.fixed_template():
 774                         raise SameFileError(self.params['outtmpl'])
 775
 776                 for url in url_list:
 777                         suitable_found = False
 778                         for ie in self._ies:
 779                                 # Go to next InfoExtractor if not suitable
 780                                 if not ie.suitable(url):
 781                                         continue
 782
 783                                 # Suitable InfoExtractor found
 784                                 suitable_found = True
 785
 786                                 # Extract information from URL and process it
 787                                 ie.extract(url)
 788
 789                                 # Suitable InfoExtractor had been found; go to next URL
 790                                 break
 791
 792                         if not suitable_found:
 793                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 794
 795                 return self._download_retcode
 796
 797         def post_process(self, filename, ie_info):
 798                 """Run the postprocessing chain on the given file."""
 799                 info = dict(ie_info)
 800                 info['filepath'] = filename
 801                 for pp in self._pps:
 802                         info = pp.run(info)
 803                         if info is None:
 804                                 break
 805
 806         def _download_with_rtmpdump(self, filename, url, player_url):
 807                 self.report_destination(filename)
 808                 tmpfilename = self.temp_name(filename)
 809
 810                 # Check for rtmpdump first
 811                 try:
 812                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 813                 except (OSError, IOError):
 814                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 815                         return False
 816
 817                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 818                 # the connection was interrumpted and resuming appears to be
 819                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 820                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 821                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 822                 while retval == 2 or retval == 1:
 823                         prevsize = os.path.getsize(tmpfilename)
 824                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 825                         time.sleep(5.0) # This seems to be needed
 826                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 827                         cursize = os.path.getsize(tmpfilename)
 828                         if prevsize == cursize and retval == 1:
 829                                 break
 830                 if retval == 0:
 831                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
 832                         self.try_rename(tmpfilename, filename)
 833                         return True
 834                 else:
 835                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 836                         return False
 837
 838         def _do_download(self, filename, url, player_url):
 839                 # Check file already present
 840                 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
 841                         self.report_file_already_downloaded(filename)
 842                         return True
 843
 844                 # Attempt to download using rtmpdump
 845                 if url.startswith('rtmp'):
 846                         return self._download_with_rtmpdump(filename, url, player_url)
 847
 848                 tmpfilename = self.temp_name(filename)
 849                 stream = None
 850                 open_mode = 'wb'
 851
 852                 # Do not include the Accept-Encoding header
 853                 headers = {'Youtubedl-no-compression': 'True'}
 854                 basic_request = urllib2.Request(url, None, headers)
 855                 request = urllib2.Request(url, None, headers)
 856
 857                 # Establish possible resume length
 858                 if os.path.isfile(tmpfilename):
 859                         resume_len = os.path.getsize(tmpfilename)
 860                 else:
 861                         resume_len = 0
 862
 863                 # Request parameters in case of being able to resume
 864                 if self.params.get('continuedl', False) and resume_len != 0:
 865                         self.report_resuming_byte(resume_len)
 866                         request.add_header('Range', 'bytes=%d-' % resume_len)
 867                         open_mode = 'ab'
 868
 869                 count = 0
 870                 retries = self.params.get('retries', 0)
 871                 while count <= retries:
 872                         # Establish connection
 873                         try:
 874                                 data = urllib2.urlopen(request)
 875                                 break
 876                         except (urllib2.HTTPError, ), err:
 877                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 878                                         # Unexpected HTTP error
 879                                         raise
 880                                 elif err.code == 416:
 881                                         # Unable to resume (requested range not satisfiable)
 882                                         try:
 883                                                 # Open the connection again without the range header
 884                                                 data = urllib2.urlopen(basic_request)
 885                                                 content_length = data.info()['Content-Length']
 886                                         except (urllib2.HTTPError, ), err:
 887                                                 if err.code < 500 or err.code >= 600:
 888                                                         raise
 889                                         else:
 890                                                 # Examine the reported length
 891                                                 if (content_length is not None and
 892                                                                 (resume_len - 100 < long(content_length) < resume_len + 100)):
 893                                                         # The file had already been fully downloaded.
 894                                                         # Explanation to the above condition: in issue #175 it was revealed that
 895                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 896                                                         # changing the file size slightly and causing problems for some users. So
 897                                                         # I decided to implement a suggested change and consider the file
 898                                                         # completely downloaded if the file size differs less than 100 bytes from
 899                                                         # the one in the hard drive.
 900                                                         self.report_file_already_downloaded(filename)
 901                                                         self.try_rename(tmpfilename, filename)
 902                                                         return True
 903                                                 else:
 904                                                         # The length does not match, we start the download over
 905                                                         self.report_unable_to_resume()
 906                                                         open_mode = 'wb'
 907                                                         break
 908                         # Retry
 909                         count += 1
 910                         if count <= retries:
 911                                 self.report_retry(count, retries)
 912
 913                 if count > retries:
 914                         self.trouble(u'ERROR: giving up after %s retries' % retries)
 915                         return False
 916
 917                 data_len = data.info().get('Content-length', None)
 918                 if data_len is not None:
 919                         data_len = long(data_len) + resume_len
 920                 data_len_str = self.format_bytes(data_len)
 921                 byte_counter = 0 + resume_len
 922                 block_size = 1024
 923                 start = time.time()
 924                 while True:
 925                         # Download and write
 926                         before = time.time()
 927                         data_block = data.read(block_size)
 928                         after = time.time()
 929                         if len(data_block) == 0:
 930                                 break
 931                         byte_counter += len(data_block)
 932
 933                         # Open file just in time
 934                         if stream is None:
 935                                 try:
 936                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 937                                         assert stream is not None
 938                                         filename = self.undo_temp_name(tmpfilename)
 939                                         self.report_destination(filename)
 940                                 except (OSError, IOError), err:
 941                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 942                                         return False
 943                         try:
 944                                 stream.write(data_block)
 945                         except (IOError, OSError), err:
 946                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 947                                 return False
 948                         block_size = self.best_block_size(after - before, len(data_block))
 949
 950                         # Progress message
 951                         percent_str = self.calc_percent(byte_counter, data_len)
 952                         eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 953                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 954                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 955
 956                         # Apply rate limit
 957                         self.slow_down(start, byte_counter - resume_len)
 958
 959                 if stream is None:
 960                         self.trouble(u'\nERROR: Did not get any data blocks')
 961                         return False
 962                 stream.close()
 963                 self.report_finish()
 964                 if data_len is not None and byte_counter != data_len:
 965                         raise ContentTooShortError(byte_counter, long(data_len))
 966                 self.try_rename(tmpfilename, filename)
 967
 968                 # Update file modification time
 969                 if self.params.get('updatetime', True):
 970                         self.try_utime(filename, data.info().get('last-modified', None))
 971
 972                 return True
 973
 974
 975 class InfoExtractor(object):
 976         """Information Extractor class.
 977
 978         Information extractors are the classes that, given a URL, extract
 979         information from the video (or videos) the URL refers to. This
 980         information includes the real video URL, the video title and simplified
 981         title, author and others. The information is stored in a dictionary
 982         which is then passed to the FileDownloader. The FileDownloader
 983         processes this information possibly downloading the video to the file
 984         system, among other possible outcomes. The dictionaries must include
 985         the following fields:
 986
 987         id:             Video identifier.
 988         url:            Final video URL.
 989         uploader:       Nickname of the video uploader.
 990         title:          Literal title.
 991         stitle:         Simplified title.
 992         ext:            Video filename extension.
 993         format:         Video format.
 994         player_url:     SWF Player URL (may be None).
 995
 996         The following fields are optional. Their primary purpose is to allow
 997         youtube-dl to serve as the backend for a video search function, such
 998         as the one in youtube2mp3.  They are only used when their respective
 999         forced printing functions are called:
1000
1001         thumbnail:      Full URL to a video thumbnail image.
1002         description:    One-line video description.
1003
1004         Subclasses of this one should re-define the _real_initialize() and
1005         _real_extract() methods, as well as the suitable() static method.
1006         Probably, they should also be instantiated and added to the main
1007         downloader.
1008         """
1009
1010         _ready = False
1011         _downloader = None
1012
1013         def __init__(self, downloader=None):
1014                 """Constructor. Receives an optional downloader."""
1015                 self._ready = False
1016                 self.set_downloader(downloader)
1017
1018         @staticmethod
1019         def suitable(url):
1020                 """Receives a URL and returns True if suitable for this IE."""
1021                 return False
1022
1023         def initialize(self):
1024                 """Initializes an instance (authentication, etc)."""
1025                 if not self._ready:
1026                         self._real_initialize()
1027                         self._ready = True
1028
1029         def extract(self, url):
1030                 """Extracts URL information and returns it in list of dicts."""
1031                 self.initialize()
1032                 return self._real_extract(url)
1033
1034         def set_downloader(self, downloader):
1035                 """Sets the downloader for this IE."""
1036                 self._downloader = downloader
1037
1038         def _real_initialize(self):
1039                 """Real initialization process. Redefine in subclasses."""
1040                 pass
1041
1042         def _real_extract(self, url):
1043                 """Real extraction process. Redefine in subclasses."""
1044                 pass
1045
1046
1047 class YoutubeIE(InfoExtractor):
1048         """Information extractor for youtube.com."""
1049
1050         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
1051         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
1052         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
1053         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
1054         _NETRC_MACHINE = 'youtube'
1055         # Listed in order of quality
1056         _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']
1057         _video_extensions = {
1058                 '13': '3gp',
1059                 '17': 'mp4',
1060                 '18': 'mp4',
1061                 '22': 'mp4',
1062                 '37': 'mp4',
1063                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
1064                 '43': 'webm',
1065                 '45': 'webm',
1066         }
1067
1068         @staticmethod
1069         def suitable(url):
1070                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
1071
1072         def report_lang(self):
1073                 """Report attempt to set language."""
1074                 self._downloader.to_screen(u'[youtube] Setting language')
1075
1076         def report_login(self):
1077                 """Report attempt to log in."""
1078                 self._downloader.to_screen(u'[youtube] Logging in')
1079
1080         def report_age_confirmation(self):
1081                 """Report attempt to confirm age."""
1082                 self._downloader.to_screen(u'[youtube] Confirming age')
1083
1084         def report_video_webpage_download(self, video_id):
1085                 """Report attempt to download video webpage."""
1086                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
1087
1088         def report_video_info_webpage_download(self, video_id):
1089                 """Report attempt to download video info webpage."""
1090                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
1091
1092         def report_information_extraction(self, video_id):
1093                 """Report attempt to extract video information."""
1094                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
1095
1096         def report_unavailable_format(self, video_id, format):
1097                 """Report extracted video URL."""
1098                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
1099
1100         def report_rtmp_download(self):
1101                 """Indicate the download will use the RTMP protocol."""
1102                 self._downloader.to_screen(u'[youtube] RTMP download detected')
1103
1104         def _real_initialize(self):
1105                 if self._downloader is None:
1106                         return
1107
1108                 username = None
1109                 password = None
1110                 downloader_params = self._downloader.params
1111
1112                 # Attempt to use provided username and password or .netrc data
1113                 if downloader_params.get('username', None) is not None:
1114                         username = downloader_params['username']
1115                         password = downloader_params['password']
1116                 elif downloader_params.get('usenetrc', False):
1117                         try:
1118                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
1119                                 if info is not None:
1120                                         username = info[0]
1121                                         password = info[2]
1122                                 else:
1123                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
1124                         except (IOError, netrc.NetrcParseError), err:
1125                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
1126                                 return
1127
1128                 # Set language
1129                 request = urllib2.Request(self._LANG_URL)
1130                 try:
1131                         self.report_lang()
1132                         urllib2.urlopen(request).read()
1133                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1134                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
1135                         return
1136
1137                 # No authentication to be performed
1138                 if username is None:
1139                         return
1140
1141                 # Log in
1142                 login_form = {
1143                                 'current_form': 'loginForm',
1144                                 'next':         '/',
1145                                 'action_login': 'Log In',
1146                                 'username':     username,
1147                                 'password':     password,
1148                                 }
1149                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
1150                 try:
1151                         self.report_login()
1152                         login_results = urllib2.urlopen(request).read()
1153                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
1154                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
1155                                 return
1156                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1157                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
1158                         return
1159
1160                 # Confirm age
1161                 age_form = {
1162                                 'next_url':             '/',
1163                                 'action_confirm':       'Confirm',
1164                                 }
1165                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
1166                 try:
1167                         self.report_age_confirmation()
1168                         age_results = urllib2.urlopen(request).read()
1169                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1170                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1171                         return
1172
1173         def _real_extract(self, url):
1174                 # Extract video id from URL
1175                 mobj = re.match(self._VALID_URL, url)
1176                 if mobj is None:
1177                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1178                         return
1179                 video_id = mobj.group(2)
1180
1181                 # Get video webpage
1182                 self.report_video_webpage_download(video_id)
1183                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
1184                 try:
1185                         video_webpage = urllib2.urlopen(request).read()
1186                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1187                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1188                         return
1189
1190                 # Attempt to extract SWF player URL
1191                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1192                 if mobj is not None:
1193                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1194                 else:
1195                         player_url = None
1196
1197                 # Get video info
1198                 self.report_video_info_webpage_download(video_id)
1199                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1200                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1201                                         % (video_id, el_type))
1202                         request = urllib2.Request(video_info_url)
1203                         try:
1204                                 video_info_webpage = urllib2.urlopen(request).read()
1205                                 video_info = parse_qs(video_info_webpage)
1206                                 if 'token' in video_info:
1207                                         break
1208                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1209                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1210                                 return
1211                 if 'token' not in video_info:
1212                         if 'reason' in video_info:
1213                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1214                         else:
1215                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1216                         return
1217
1218                 # Start extracting information
1219                 self.report_information_extraction(video_id)
1220
1221                 # uploader
1222                 if 'author' not in video_info:
1223                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1224                         return
1225                 video_uploader = urllib.unquote_plus(video_info['author'][0])
1226
1227                 # title
1228                 if 'title' not in video_info:
1229                         self._downloader.trouble(u'ERROR: unable to extract video title')
1230                         return
1231                 video_title = urllib.unquote_plus(video_info['title'][0])
1232                 video_title = video_title.decode('utf-8')
1233                 video_title = sanitize_title(video_title)
1234
1235                 # simplified title
1236                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1237                 simple_title = simple_title.strip(ur'_')
1238
1239                 # thumbnail image
1240                 if 'thumbnail_url' not in video_info:
1241                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1242                         video_thumbnail = ''
1243                 else:   # don't panic if we can't find it
1244                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1245
1246                 # upload date
1247                 upload_date = u'NA'
1248                 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1249                 if mobj is not None:
1250                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1251                         format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1252                         for expression in format_expressions:
1253                                 try:
1254                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1255                                 except:
1256                                         pass
1257
1258                 # description
1259                 try:
1260                         lxml.etree
1261                 except NameError:
1262                         video_description = u'No description available.'
1263                         if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
1264                                 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1265                                 if mobj is not None:
1266                                         video_description = mobj.group(1).decode('utf-8')
1267                 else:
1268                         html_parser = lxml.etree.HTMLParser(encoding='utf-8')
1269                         vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
1270                         video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
1271                         # TODO use another parser
1272
1273                 # token
1274                 video_token = urllib.unquote_plus(video_info['token'][0])
1275
1276                 # Decide which formats to download
1277                 req_format = self._downloader.params.get('format', None)
1278
1279                 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1280                         self.report_rtmp_download()
1281                         video_url_list = [(None, video_info['conn'][0])]
1282                 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1283                         url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
1284                         url_data = [parse_qs(uds) for uds in url_data_strs]
1285                         url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
1286                         url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
1287
1288                         format_limit = self._downloader.params.get('format_limit', None)
1289                         if format_limit is not None and format_limit in self._available_formats:
1290                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
1291                         else:
1292                                 format_list = self._available_formats
1293                         existing_formats = [x for x in format_list if x in url_map]
1294                         if len(existing_formats) == 0:
1295                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1296                                 return
1297                         if req_format is None:
1298                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1299                         elif req_format == '-1':
1300                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1301                         else:
1302                                 # Specific format
1303                                 if req_format not in url_map:
1304                                         self._downloader.trouble(u'ERROR: requested format not available')
1305                                         return
1306                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
1307                 else:
1308                         self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
1309                         return
1310
1311                 for format_param, video_real_url in video_url_list:
1312                         # At this point we have a new video
1313                         self._downloader.increment_downloads()
1314
1315                         # Extension
1316                         video_extension = self._video_extensions.get(format_param, 'flv')
1317
1318                         try:
1319                                 # Process video information
1320                                 self._downloader.process_info({
1321                                         'id':           video_id.decode('utf-8'),
1322                                         'url':          video_real_url.decode('utf-8'),
1323                                         'uploader':     video_uploader.decode('utf-8'),
1324                                         'upload_date':  upload_date,
1325                                         'title':        video_title,
1326                                         'stitle':       simple_title,
1327                                         'ext':          video_extension.decode('utf-8'),
1328                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1329                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1330                                         'description':  video_description,
1331                                         'player_url':   player_url,
1332                                 })
1333                         except UnavailableVideoError, err:
1334                                 self._downloader.trouble(u'\nERROR: unable to download video')
1335
1336
1337 class MetacafeIE(InfoExtractor):
1338         """Information Extractor for metacafe.com."""
1339
1340         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1341         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1342         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1343         _youtube_ie = None
1344
1345         def __init__(self, youtube_ie, downloader=None):
1346                 InfoExtractor.__init__(self, downloader)
1347                 self._youtube_ie = youtube_ie
1348
1349         @staticmethod
1350         def suitable(url):
1351                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1352
1353         def report_disclaimer(self):
1354                 """Report disclaimer retrieval."""
1355                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1356
1357         def report_age_confirmation(self):
1358                 """Report attempt to confirm age."""
1359                 self._downloader.to_screen(u'[metacafe] Confirming age')
1360
1361         def report_download_webpage(self, video_id):
1362                 """Report webpage download."""
1363                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1364
1365         def report_extraction(self, video_id):
1366                 """Report information extraction."""
1367                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1368
1369         def _real_initialize(self):
1370                 # Retrieve disclaimer
1371                 request = urllib2.Request(self._DISCLAIMER)
1372                 try:
1373                         self.report_disclaimer()
1374                         disclaimer = urllib2.urlopen(request).read()
1375                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1376                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1377                         return
1378
1379                 # Confirm age
1380                 disclaimer_form = {
1381                         'filters': '0',
1382                         'submit': "Continue - I'm over 18",
1383                         }
1384                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1385                 try:
1386                         self.report_age_confirmation()
1387                         disclaimer = urllib2.urlopen(request).read()
1388                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1389                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1390                         return
1391
1392         def _real_extract(self, url):
1393                 # Extract id and simplified title from URL
1394                 mobj = re.match(self._VALID_URL, url)
1395                 if mobj is None:
1396                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1397                         return
1398
1399                 video_id = mobj.group(1)
1400
1401                 # Check if video comes from YouTube
1402                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1403                 if mobj2 is not None:
1404                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1405                         return
1406
1407                 # At this point we have a new video
1408                 self._downloader.increment_downloads()
1409
1410                 simple_title = mobj.group(2).decode('utf-8')
1411
1412                 # Retrieve video webpage to extract further information
1413                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1414                 try:
1415                         self.report_download_webpage(video_id)
1416                         webpage = urllib2.urlopen(request).read()
1417                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1418                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1419                         return
1420
1421                 # Extract URL, uploader and title from webpage
1422                 self.report_extraction(video_id)
1423                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1424                 if mobj is not None:
1425                         mediaURL = urllib.unquote(mobj.group(1))
1426                         video_extension = mediaURL[-3:]
1427
1428                         # Extract gdaKey if available
1429                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1430                         if mobj is None:
1431                                 video_url = mediaURL
1432                         else:
1433                                 gdaKey = mobj.group(1)
1434                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1435                 else:
1436                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1437                         if mobj is None:
1438                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1439                                 return
1440                         vardict = parse_qs(mobj.group(1))
1441                         if 'mediaData' not in vardict:
1442                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1443                                 return
1444                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1445                         if mobj is None:
1446                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1447                                 return
1448                         mediaURL = mobj.group(1).replace('\\/', '/')
1449                         video_extension = mediaURL[-3:]
1450                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1451
1452                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1453                 if mobj is None:
1454                         self._downloader.trouble(u'ERROR: unable to extract title')
1455                         return
1456                 video_title = mobj.group(1).decode('utf-8')
1457                 video_title = sanitize_title(video_title)
1458
1459                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1460                 if mobj is None:
1461                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1462                         return
1463                 video_uploader = mobj.group(1)
1464
1465                 try:
1466                         # Process video information
1467                         self._downloader.process_info({
1468                                 'id':           video_id.decode('utf-8'),
1469                                 'url':          video_url.decode('utf-8'),
1470                                 'uploader':     video_uploader.decode('utf-8'),
1471                                 'upload_date':  u'NA',
1472                                 'title':        video_title,
1473                                 'stitle':       simple_title,
1474                                 'ext':          video_extension.decode('utf-8'),
1475                                 'format':       u'NA',
1476                                 'player_url':   None,
1477                         })
1478                 except UnavailableVideoError:
1479                         self._downloader.trouble(u'\nERROR: unable to download video')
1480
1481
1482 class DailymotionIE(InfoExtractor):
1483         """Information Extractor for Dailymotion"""
1484
1485         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1486
1487         def __init__(self, downloader=None):
1488                 InfoExtractor.__init__(self, downloader)
1489
1490         @staticmethod
1491         def suitable(url):
1492                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1493
1494         def report_download_webpage(self, video_id):
1495                 """Report webpage download."""
1496                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1497
1498         def report_extraction(self, video_id):
1499                 """Report information extraction."""
1500                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1501
1502         def _real_initialize(self):
1503                 return
1504
1505         def _real_extract(self, url):
1506                 # Extract id and simplified title from URL
1507                 mobj = re.match(self._VALID_URL, url)
1508                 if mobj is None:
1509                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1510                         return
1511
1512                 # At this point we have a new video
1513                 self._downloader.increment_downloads()
1514                 video_id = mobj.group(1)
1515
1516                 simple_title = mobj.group(2).decode('utf-8')
1517                 video_extension = 'flv'
1518
1519                 # Retrieve video webpage to extract further information
1520                 request = urllib2.Request(url)
1521                 try:
1522                         self.report_download_webpage(video_id)
1523                         webpage = urllib2.urlopen(request).read()
1524                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1525                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1526                         return
1527
1528                 # Extract URL, uploader and title from webpage
1529                 self.report_extraction(video_id)
1530                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1531                 if mobj is None:
1532                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1533                         return
1534                 mediaURL = urllib.unquote(mobj.group(1))
1535
1536                 # if needed add http://www.dailymotion.com/ if relative URL
1537
1538                 video_url = mediaURL
1539
1540                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1541                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1542                 if mobj is None:
1543                         self._downloader.trouble(u'ERROR: unable to extract title')
1544                         return
1545                 video_title = mobj.group(1).decode('utf-8')
1546                 video_title = sanitize_title(video_title)
1547
1548                 mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
1549                 if mobj is None:
1550                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1551                         return
1552                 video_uploader = mobj.group(1)
1553
1554                 try:
1555                         # Process video information
1556                         self._downloader.process_info({
1557                                 'id':           video_id.decode('utf-8'),
1558                                 'url':          video_url.decode('utf-8'),
1559                                 'uploader':     video_uploader.decode('utf-8'),
1560                                 'upload_date':  u'NA',
1561                                 'title':        video_title,
1562                                 'stitle':       simple_title,
1563                                 'ext':          video_extension.decode('utf-8'),
1564                                 'format':       u'NA',
1565                                 'player_url':   None,
1566                         })
1567                 except UnavailableVideoError:
1568                         self._downloader.trouble(u'\nERROR: unable to download video')
1569
1570
1571 class GoogleIE(InfoExtractor):
1572         """Information extractor for video.google.com."""
1573
1574         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1575
1576         def __init__(self, downloader=None):
1577                 InfoExtractor.__init__(self, downloader)
1578
1579         @staticmethod
1580         def suitable(url):
1581                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1582
1583         def report_download_webpage(self, video_id):
1584                 """Report webpage download."""
1585                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1586
1587         def report_extraction(self, video_id):
1588                 """Report information extraction."""
1589                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1590
1591         def _real_initialize(self):
1592                 return
1593
1594         def _real_extract(self, url):
1595                 # Extract id from URL
1596                 mobj = re.match(self._VALID_URL, url)
1597                 if mobj is None:
1598                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1599                         return
1600
1601                 # At this point we have a new video
1602                 self._downloader.increment_downloads()
1603                 video_id = mobj.group(1)
1604
1605                 video_extension = 'mp4'
1606
1607                 # Retrieve video webpage to extract further information
1608                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1609                 try:
1610                         self.report_download_webpage(video_id)
1611                         webpage = urllib2.urlopen(request).read()
1612                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1613                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1614                         return
1615
1616                 # Extract URL, uploader, and title from webpage
1617                 self.report_extraction(video_id)
1618                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1619                 if mobj is None:
1620                         video_extension = 'flv'
1621                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1622                 if mobj is None:
1623                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1624                         return
1625                 mediaURL = urllib.unquote(mobj.group(1))
1626                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1627                 mediaURL = mediaURL.replace('\\x26', '\x26')
1628
1629                 video_url = mediaURL
1630
1631                 mobj = re.search(r'<title>(.*)</title>', webpage)
1632                 if mobj is None:
1633                         self._downloader.trouble(u'ERROR: unable to extract title')
1634                         return
1635                 video_title = mobj.group(1).decode('utf-8')
1636                 video_title = sanitize_title(video_title)
1637                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1638
1639                 # Extract video description
1640                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1641                 if mobj is None:
1642                         self._downloader.trouble(u'ERROR: unable to extract video description')
1643                         return
1644                 video_description = mobj.group(1).decode('utf-8')
1645                 if not video_description:
1646                         video_description = 'No description available.'
1647
1648                 # Extract video thumbnail
1649                 if self._downloader.params.get('forcethumbnail', False):
1650                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1651                         try:
1652                                 webpage = urllib2.urlopen(request).read()
1653                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1654                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1655                                 return
1656                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1657                         if mobj is None:
1658                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1659                                 return
1660                         video_thumbnail = mobj.group(1)
1661                 else:   # we need something to pass to process_info
1662                         video_thumbnail = ''
1663
1664                 try:
1665                         # Process video information
1666                         self._downloader.process_info({
1667                                 'id':           video_id.decode('utf-8'),
1668                                 'url':          video_url.decode('utf-8'),
1669                                 'uploader':     u'NA',
1670                                 'upload_date':  u'NA',
1671                                 'title':        video_title,
1672                                 'stitle':       simple_title,
1673                                 'ext':          video_extension.decode('utf-8'),
1674                                 'format':       u'NA',
1675                                 'player_url':   None,
1676                         })
1677                 except UnavailableVideoError:
1678                         self._downloader.trouble(u'\nERROR: unable to download video')
1679
1680
1681 class PhotobucketIE(InfoExtractor):
1682         """Information extractor for photobucket.com."""
1683
1684         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1685
1686         def __init__(self, downloader=None):
1687                 InfoExtractor.__init__(self, downloader)
1688
1689         @staticmethod
1690         def suitable(url):
1691                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1692
1693         def report_download_webpage(self, video_id):
1694                 """Report webpage download."""
1695                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1696
1697         def report_extraction(self, video_id):
1698                 """Report information extraction."""
1699                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1700
1701         def _real_initialize(self):
1702                 return
1703
1704         def _real_extract(self, url):
1705                 # Extract id from URL
1706                 mobj = re.match(self._VALID_URL, url)
1707                 if mobj is None:
1708                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1709                         return
1710
1711                 # At this point we have a new video
1712                 self._downloader.increment_downloads()
1713                 video_id = mobj.group(1)
1714
1715                 video_extension = 'flv'
1716
1717                 # Retrieve video webpage to extract further information
1718                 request = urllib2.Request(url)
1719                 try:
1720                         self.report_download_webpage(video_id)
1721                         webpage = urllib2.urlopen(request).read()
1722                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1723                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1724                         return
1725
1726                 # Extract URL, uploader, and title from webpage
1727                 self.report_extraction(video_id)
1728                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1729                 if mobj is None:
1730                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1731                         return
1732                 mediaURL = urllib.unquote(mobj.group(1))
1733
1734                 video_url = mediaURL
1735
1736                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1737                 if mobj is None:
1738                         self._downloader.trouble(u'ERROR: unable to extract title')
1739                         return
1740                 video_title = mobj.group(1).decode('utf-8')
1741                 video_title = sanitize_title(video_title)
1742                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1743
1744                 video_uploader = mobj.group(2).decode('utf-8')
1745
1746                 try:
1747                         # Process video information
1748                         self._downloader.process_info({
1749                                 'id':           video_id.decode('utf-8'),
1750                                 'url':          video_url.decode('utf-8'),
1751                                 'uploader':     video_uploader,
1752                                 'upload_date':  u'NA',
1753                                 'title':        video_title,
1754                                 'stitle':       simple_title,
1755                                 'ext':          video_extension.decode('utf-8'),
1756                                 'format':       u'NA',
1757                                 'player_url':   None,
1758                         })
1759                 except UnavailableVideoError:
1760                         self._downloader.trouble(u'\nERROR: unable to download video')
1761
1762
1763 class YahooIE(InfoExtractor):
1764         """Information extractor for video.yahoo.com."""
1765
1766         # _VALID_URL matches all Yahoo! Video URLs
1767         # _VPAGE_URL matches only the extractable '/watch/' URLs
1768         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1769         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1770
1771         def __init__(self, downloader=None):
1772                 InfoExtractor.__init__(self, downloader)
1773
1774         @staticmethod
1775         def suitable(url):
1776                 return (re.match(YahooIE._VALID_URL, url) is not None)
1777
1778         def report_download_webpage(self, video_id):
1779                 """Report webpage download."""
1780                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1781
1782         def report_extraction(self, video_id):
1783                 """Report information extraction."""
1784                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1785
1786         def _real_initialize(self):
1787                 return
1788
1789         def _real_extract(self, url, new_video=True):
1790                 # Extract ID from URL
1791                 mobj = re.match(self._VALID_URL, url)
1792                 if mobj is None:
1793                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1794                         return
1795
1796                 # At this point we have a new video
1797                 self._downloader.increment_downloads()
1798                 video_id = mobj.group(2)
1799                 video_extension = 'flv'
1800
1801                 # Rewrite valid but non-extractable URLs as
1802                 # extractable English language /watch/ URLs
1803                 if re.match(self._VPAGE_URL, url) is None:
1804                         request = urllib2.Request(url)
1805                         try:
1806                                 webpage = urllib2.urlopen(request).read()
1807                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1808                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1809                                 return
1810
1811                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1812                         if mobj is None:
1813                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1814                                 return
1815                         yahoo_id = mobj.group(1)
1816
1817                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1818                         if mobj is None:
1819                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1820                                 return
1821                         yahoo_vid = mobj.group(1)
1822
1823                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1824                         return self._real_extract(url, new_video=False)
1825
1826                 # Retrieve video webpage to extract further information
1827                 request = urllib2.Request(url)
1828                 try:
1829                         self.report_download_webpage(video_id)
1830                         webpage = urllib2.urlopen(request).read()
1831                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1832                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1833                         return
1834
1835                 # Extract uploader and title from webpage
1836                 self.report_extraction(video_id)
1837                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1838                 if mobj is None:
1839                         self._downloader.trouble(u'ERROR: unable to extract video title')
1840                         return
1841                 video_title = mobj.group(1).decode('utf-8')
1842                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1843
1844                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1845                 if mobj is None:
1846                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1847                         return
1848                 video_uploader = mobj.group(1).decode('utf-8')
1849
1850                 # Extract video thumbnail
1851                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1852                 if mobj is None:
1853                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1854                         return
1855                 video_thumbnail = mobj.group(1).decode('utf-8')
1856
1857                 # Extract video description
1858                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1859                 if mobj is None:
1860                         self._downloader.trouble(u'ERROR: unable to extract video description')
1861                         return
1862                 video_description = mobj.group(1).decode('utf-8')
1863                 if not video_description:
1864                         video_description = 'No description available.'
1865
1866                 # Extract video height and width
1867                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1868                 if mobj is None:
1869                         self._downloader.trouble(u'ERROR: unable to extract video height')
1870                         return
1871                 yv_video_height = mobj.group(1)
1872
1873                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1874                 if mobj is None:
1875                         self._downloader.trouble(u'ERROR: unable to extract video width')
1876                         return
1877                 yv_video_width = mobj.group(1)
1878
1879                 # Retrieve video playlist to extract media URL
1880                 # I'm not completely sure what all these options are, but we
1881                 # seem to need most of them, otherwise the server sends a 401.
1882                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1883                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1884                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1885                                 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1886                                 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1887                 try:
1888                         self.report_download_webpage(video_id)
1889                         webpage = urllib2.urlopen(request).read()
1890                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1891                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1892                         return
1893
1894                 # Extract media URL from playlist XML
1895                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1896                 if mobj is None:
1897                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1898                         return
1899                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1900                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1901
1902                 try:
1903                         # Process video information
1904                         self._downloader.process_info({
1905                                 'id':           video_id.decode('utf-8'),
1906                                 'url':          video_url,
1907                                 'uploader':     video_uploader,
1908                                 'upload_date':  u'NA',
1909                                 'title':        video_title,
1910                                 'stitle':       simple_title,
1911                                 'ext':          video_extension.decode('utf-8'),
1912                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1913                                 'description':  video_description,
1914                                 'thumbnail':    video_thumbnail,
1915                                 'description':  video_description,
1916                                 'player_url':   None,
1917                         })
1918                 except UnavailableVideoError:
1919                         self._downloader.trouble(u'\nERROR: unable to download video')
1920
1921
1922 class VimeoIE(InfoExtractor):
1923         """Information extractor for vimeo.com."""
1924
1925         # _VALID_URL matches Vimeo URLs
1926         _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
1927
1928         def __init__(self, downloader=None):
1929                 InfoExtractor.__init__(self, downloader)
1930
1931         @staticmethod
1932         def suitable(url):
1933                 return (re.match(VimeoIE._VALID_URL, url) is not None)
1934
1935         def report_download_webpage(self, video_id):
1936                 """Report webpage download."""
1937                 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
1938
1939         def report_extraction(self, video_id):
1940                 """Report information extraction."""
1941                 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
1942
1943         def _real_initialize(self):
1944                 return
1945
1946         def _real_extract(self, url, new_video=True):
1947                 # Extract ID from URL
1948                 mobj = re.match(self._VALID_URL, url)
1949                 if mobj is None:
1950                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1951                         return
1952
1953                 # At this point we have a new video
1954                 self._downloader.increment_downloads()
1955                 video_id = mobj.group(1)
1956
1957                 # Retrieve video webpage to extract further information
1958                 request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
1959                 try:
1960                         self.report_download_webpage(video_id)
1961                         webpage = urllib2.urlopen(request).read()
1962                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1963                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1964                         return
1965
1966                 # Now we begin extracting as much information as we can from what we
1967                 # retrieved. First we extract the information common to all extractors,
1968                 # and latter we extract those that are Vimeo specific.
1969                 self.report_extraction(video_id)
1970
1971                 # Extract title
1972                 mobj = re.search(r'<caption>(.*?)</caption>', webpage)
1973                 if mobj is None:
1974                         self._downloader.trouble(u'ERROR: unable to extract video title')
1975                         return
1976                 video_title = mobj.group(1).decode('utf-8')
1977                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1978
1979                 # Extract uploader
1980                 mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
1981                 if mobj is None:
1982                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1983                         return
1984                 video_uploader = mobj.group(1).decode('utf-8')
1985
1986                 # Extract video thumbnail
1987                 mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
1988                 if mobj is None:
1989                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1990                         return
1991                 video_thumbnail = mobj.group(1).decode('utf-8')
1992
1993                 # # Extract video description
1994                 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
1995                 # if mobj is None:
1996                 #       self._downloader.trouble(u'ERROR: unable to extract video description')
1997                 #       return
1998                 # video_description = mobj.group(1).decode('utf-8')
1999                 # if not video_description: video_description = 'No description available.'
2000                 video_description = 'Foo.'
2001
2002                 # Vimeo specific: extract request signature
2003                 mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
2004                 if mobj is None:
2005                         self._downloader.trouble(u'ERROR: unable to extract request signature')
2006                         return
2007                 sig = mobj.group(1).decode('utf-8')
2008
2009                 # Vimeo specific: Extract request signature expiration
2010                 mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
2011                 if mobj is None:
2012                         self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
2013                         return
2014                 sig_exp = mobj.group(1).decode('utf-8')
2015
2016                 video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
2017
2018                 try:
2019                         # Process video information
2020                         self._downloader.process_info({
2021                                 'id':           video_id.decode('utf-8'),
2022                                 'url':          video_url,
2023                                 'uploader':     video_uploader,
2024                                 'upload_date':  u'NA',
2025                                 'title':        video_title,
2026                                 'stitle':       simple_title,
2027                                 'ext':          u'mp4',
2028                                 'thumbnail':    video_thumbnail.decode('utf-8'),
2029                                 'description':  video_description,
2030                                 'thumbnail':    video_thumbnail,
2031                                 'description':  video_description,
2032                                 'player_url':   None,
2033                         })
2034                 except UnavailableVideoError:
2035                         self._downloader.trouble(u'ERROR: unable to download video')
2036
2037
2038 class GenericIE(InfoExtractor):
2039         """Generic last-resort information extractor."""
2040
2041         def __init__(self, downloader=None):
2042                 InfoExtractor.__init__(self, downloader)
2043
2044         @staticmethod
2045         def suitable(url):
2046                 return True
2047
2048         def report_download_webpage(self, video_id):
2049                 """Report webpage download."""
2050                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
2051                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
2052
2053         def report_extraction(self, video_id):
2054                 """Report information extraction."""
2055                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
2056
2057         def _real_initialize(self):
2058                 return
2059
2060         def _real_extract(self, url):
2061                 # At this point we have a new video
2062                 self._downloader.increment_downloads()
2063
2064                 video_id = url.split('/')[-1]
2065                 request = urllib2.Request(url)
2066                 try:
2067                         self.report_download_webpage(video_id)
2068                         webpage = urllib2.urlopen(request).read()
2069                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2070                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2071                         return
2072                 except ValueError, err:
2073                         # since this is the last-resort InfoExtractor, if
2074                         # this error is thrown, it'll be thrown here
2075                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2076                         return
2077
2078                 self.report_extraction(video_id)
2079                 # Start with something easy: JW Player in SWFObject
2080                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
2081                 if mobj is None:
2082                         # Broaden the search a little bit
2083                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
2084                 if mobj is None:
2085                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2086                         return
2087
2088                 # It's possible that one of the regexes
2089                 # matched, but returned an empty group:
2090                 if mobj.group(1) is None:
2091                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2092                         return
2093
2094                 video_url = urllib.unquote(mobj.group(1))
2095                 video_id = os.path.basename(video_url)
2096
2097                 # here's a fun little line of code for you:
2098                 video_extension = os.path.splitext(video_id)[1][1:]
2099                 video_id = os.path.splitext(video_id)[0]
2100
2101                 # it's tempting to parse this further, but you would
2102                 # have to take into account all the variations like
2103                 #   Video Title - Site Name
2104                 #   Site Name | Video Title
2105                 #   Video Title - Tagline | Site Name
2106                 # and so on and so forth; it's just not practical
2107                 mobj = re.search(r'<title>(.*)</title>', webpage)
2108                 if mobj is None:
2109                         self._downloader.trouble(u'ERROR: unable to extract title')
2110                         return
2111                 video_title = mobj.group(1).decode('utf-8')
2112                 video_title = sanitize_title(video_title)
2113                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2114
2115                 # video uploader is domain name
2116                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
2117                 if mobj is None:
2118                         self._downloader.trouble(u'ERROR: unable to extract title')
2119                         return
2120                 video_uploader = mobj.group(1).decode('utf-8')
2121
2122                 try:
2123                         # Process video information
2124                         self._downloader.process_info({
2125                                 'id':           video_id.decode('utf-8'),
2126                                 'url':          video_url.decode('utf-8'),
2127                                 'uploader':     video_uploader,
2128                                 'upload_date':  u'NA',
2129                                 'title':        video_title,
2130                                 'stitle':       simple_title,
2131                                 'ext':          video_extension.decode('utf-8'),
2132                                 'format':       u'NA',
2133                                 'player_url':   None,
2134                         })
2135                 except UnavailableVideoError, err:
2136                         self._downloader.trouble(u'\nERROR: unable to download video')
2137
2138
2139 class YoutubeSearchIE(InfoExtractor):
2140         """Information Extractor for YouTube search queries."""
2141         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
2142         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
2143         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
2144         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2145         _youtube_ie = None
2146         _max_youtube_results = 1000
2147
2148         def __init__(self, youtube_ie, downloader=None):
2149                 InfoExtractor.__init__(self, downloader)
2150                 self._youtube_ie = youtube_ie
2151
2152         @staticmethod
2153         def suitable(url):
2154                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
2155
2156         def report_download_page(self, query, pagenum):
2157                 """Report attempt to download playlist page with given number."""
2158                 query = query.decode(preferredencoding())
2159                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
2160
2161         def _real_initialize(self):
2162                 self._youtube_ie.initialize()
2163
2164         def _real_extract(self, query):
2165                 mobj = re.match(self._VALID_QUERY, query)
2166                 if mobj is None:
2167                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2168                         return
2169
2170                 prefix, query = query.split(':')
2171                 prefix = prefix[8:]
2172                 query = query.encode('utf-8')
2173                 if prefix == '':
2174                         self._download_n_results(query, 1)
2175                         return
2176                 elif prefix == 'all':
2177                         self._download_n_results(query, self._max_youtube_results)
2178                         return
2179                 else:
2180                         try:
2181                                 n = long(prefix)
2182                                 if n <= 0:
2183                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2184                                         return
2185                                 elif n > self._max_youtube_results:
2186                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
2187                                         n = self._max_youtube_results
2188                                 self._download_n_results(query, n)
2189                                 return
2190                         except ValueError: # parsing prefix as integer fails
2191                                 self._download_n_results(query, 1)
2192                                 return
2193
2194         def _download_n_results(self, query, n):
2195                 """Downloads a specified number of results for a query"""
2196
2197                 video_ids = []
2198                 already_seen = set()
2199                 pagenum = 1
2200
2201                 while True:
2202                         self.report_download_page(query, pagenum)
2203                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2204                         request = urllib2.Request(result_url)
2205                         try:
2206                                 page = urllib2.urlopen(request).read()
2207                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2208                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2209                                 return
2210
2211                         # Extract video identifiers
2212                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2213                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2214                                 if video_id not in already_seen:
2215                                         video_ids.append(video_id)
2216                                         already_seen.add(video_id)
2217                                         if len(video_ids) == n:
2218                                                 # Specified n videos reached
2219                                                 for id in video_ids:
2220                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2221                                                 return
2222
2223                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2224                                 for id in video_ids:
2225                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2226                                 return
2227
2228                         pagenum = pagenum + 1
2229
2230
2231 class GoogleSearchIE(InfoExtractor):
2232         """Information Extractor for Google Video search queries."""
2233         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
2234         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2235         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2236         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2237         _google_ie = None
2238         _max_google_results = 1000
2239
2240         def __init__(self, google_ie, downloader=None):
2241                 InfoExtractor.__init__(self, downloader)
2242                 self._google_ie = google_ie
2243
2244         @staticmethod
2245         def suitable(url):
2246                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
2247
2248         def report_download_page(self, query, pagenum):
2249                 """Report attempt to download playlist page with given number."""
2250                 query = query.decode(preferredencoding())
2251                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
2252
2253         def _real_initialize(self):
2254                 self._google_ie.initialize()
2255
2256         def _real_extract(self, query):
2257                 mobj = re.match(self._VALID_QUERY, query)
2258                 if mobj is None:
2259                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2260                         return
2261
2262                 prefix, query = query.split(':')
2263                 prefix = prefix[8:]
2264                 query = query.encode('utf-8')
2265                 if prefix == '':
2266                         self._download_n_results(query, 1)
2267                         return
2268                 elif prefix == 'all':
2269                         self._download_n_results(query, self._max_google_results)
2270                         return
2271                 else:
2272                         try:
2273                                 n = long(prefix)
2274                                 if n <= 0:
2275                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2276                                         return
2277                                 elif n > self._max_google_results:
2278                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
2279                                         n = self._max_google_results
2280                                 self._download_n_results(query, n)
2281                                 return
2282                         except ValueError: # parsing prefix as integer fails
2283                                 self._download_n_results(query, 1)
2284                                 return
2285
2286         def _download_n_results(self, query, n):
2287                 """Downloads a specified number of results for a query"""
2288
2289                 video_ids = []
2290                 already_seen = set()
2291                 pagenum = 1
2292
2293                 while True:
2294                         self.report_download_page(query, pagenum)
2295                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2296                         request = urllib2.Request(result_url)
2297                         try:
2298                                 page = urllib2.urlopen(request).read()
2299                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2300                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2301                                 return
2302
2303                         # Extract video identifiers
2304                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2305                                 video_id = mobj.group(1)
2306                                 if video_id not in already_seen:
2307                                         video_ids.append(video_id)
2308                                         already_seen.add(video_id)
2309                                         if len(video_ids) == n:
2310                                                 # Specified n videos reached
2311                                                 for id in video_ids:
2312                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2313                                                 return
2314
2315                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2316                                 for id in video_ids:
2317                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2318                                 return
2319
2320                         pagenum = pagenum + 1
2321
2322
2323 class YahooSearchIE(InfoExtractor):
2324         """Information Extractor for Yahoo! Video search queries."""
2325         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
2326         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2327         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2328         _MORE_PAGES_INDICATOR = r'\s*Next'
2329         _yahoo_ie = None
2330         _max_yahoo_results = 1000
2331
2332         def __init__(self, yahoo_ie, downloader=None):
2333                 InfoExtractor.__init__(self, downloader)
2334                 self._yahoo_ie = yahoo_ie
2335
2336         @staticmethod
2337         def suitable(url):
2338                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
2339
2340         def report_download_page(self, query, pagenum):
2341                 """Report attempt to download playlist page with given number."""
2342                 query = query.decode(preferredencoding())
2343                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2344
2345         def _real_initialize(self):
2346                 self._yahoo_ie.initialize()
2347
2348         def _real_extract(self, query):
2349                 mobj = re.match(self._VALID_QUERY, query)
2350                 if mobj is None:
2351                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2352                         return
2353
2354                 prefix, query = query.split(':')
2355                 prefix = prefix[8:]
2356                 query = query.encode('utf-8')
2357                 if prefix == '':
2358                         self._download_n_results(query, 1)
2359                         return
2360                 elif prefix == 'all':
2361                         self._download_n_results(query, self._max_yahoo_results)
2362                         return
2363                 else:
2364                         try:
2365                                 n = long(prefix)
2366                                 if n <= 0:
2367                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2368                                         return
2369                                 elif n > self._max_yahoo_results:
2370                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
2371                                         n = self._max_yahoo_results
2372                                 self._download_n_results(query, n)
2373                                 return
2374                         except ValueError: # parsing prefix as integer fails
2375                                 self._download_n_results(query, 1)
2376                                 return
2377
2378         def _download_n_results(self, query, n):
2379                 """Downloads a specified number of results for a query"""
2380
2381                 video_ids = []
2382                 already_seen = set()
2383                 pagenum = 1
2384
2385                 while True:
2386                         self.report_download_page(query, pagenum)
2387                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2388                         request = urllib2.Request(result_url)
2389                         try:
2390                                 page = urllib2.urlopen(request).read()
2391                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2392                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2393                                 return
2394
2395                         # Extract video identifiers
2396                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2397                                 video_id = mobj.group(1)
2398                                 if video_id not in already_seen:
2399                                         video_ids.append(video_id)
2400                                         already_seen.add(video_id)
2401                                         if len(video_ids) == n:
2402                                                 # Specified n videos reached
2403                                                 for id in video_ids:
2404                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2405                                                 return
2406
2407                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2408                                 for id in video_ids:
2409                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2410                                 return
2411
2412                         pagenum = pagenum + 1
2413
2414
2415 class YoutubePlaylistIE(InfoExtractor):
2416         """Information Extractor for YouTube playlists."""
2417
2418         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2419         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2420         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2421         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2422         _youtube_ie = None
2423
2424         def __init__(self, youtube_ie, downloader=None):
2425                 InfoExtractor.__init__(self, downloader)
2426                 self._youtube_ie = youtube_ie
2427
2428         @staticmethod
2429         def suitable(url):
2430                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2431
2432         def report_download_page(self, playlist_id, pagenum):
2433                 """Report attempt to download playlist page with given number."""
2434                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2435
2436         def _real_initialize(self):
2437                 self._youtube_ie.initialize()
2438
2439         def _real_extract(self, url):
2440                 # Extract playlist id
2441                 mobj = re.match(self._VALID_URL, url)
2442                 if mobj is None:
2443                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2444                         return
2445
2446                 # Single video case
2447                 if mobj.group(3) is not None:
2448                         self._youtube_ie.extract(mobj.group(3))
2449                         return
2450
2451                 # Download playlist pages
2452                 # prefix is 'p' as default for playlists but there are other types that need extra care
2453                 playlist_prefix = mobj.group(1)
2454                 if playlist_prefix == 'a':
2455                         playlist_access = 'artist'
2456                 else:
2457                         playlist_prefix = 'p'
2458                         playlist_access = 'view_play_list'
2459                 playlist_id = mobj.group(2)
2460                 video_ids = []
2461                 pagenum = 1
2462
2463                 while True:
2464                         self.report_download_page(playlist_id, pagenum)
2465                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
2466                         try:
2467                                 page = urllib2.urlopen(request).read()
2468                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2469                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2470                                 return
2471
2472                         # Extract video identifiers
2473                         ids_in_page = []
2474                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2475                                 if mobj.group(1) not in ids_in_page:
2476                                         ids_in_page.append(mobj.group(1))
2477                         video_ids.extend(ids_in_page)
2478
2479                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2480                                 break
2481                         pagenum = pagenum + 1
2482
2483                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2484                 playlistend = self._downloader.params.get('playlistend', -1)
2485                 video_ids = video_ids[playliststart:playlistend]
2486
2487                 for id in video_ids:
2488                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2489                 return
2490
2491
2492 class YoutubeUserIE(InfoExtractor):
2493         """Information Extractor for YouTube users."""
2494
2495         _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2496         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2497         _GDATA_PAGE_SIZE = 50
2498         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2499         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2500         _youtube_ie = None
2501
2502         def __init__(self, youtube_ie, downloader=None):
2503                 InfoExtractor.__init__(self, downloader)
2504                 self._youtube_ie = youtube_ie
2505
2506         @staticmethod
2507         def suitable(url):
2508                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2509
2510         def report_download_page(self, username, start_index):
2511                 """Report attempt to download user page."""
2512                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2513                                 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2514
2515         def _real_initialize(self):
2516                 self._youtube_ie.initialize()
2517
2518         def _real_extract(self, url):
2519                 # Extract username
2520                 mobj = re.match(self._VALID_URL, url)
2521                 if mobj is None:
2522                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2523                         return
2524
2525                 username = mobj.group(1)
2526
2527                 # Download video ids using YouTube Data API. Result size per
2528                 # query is limited (currently to 50 videos) so we need to query
2529                 # page by page until there are no video ids - it means we got
2530                 # all of them.
2531
2532                 video_ids = []
2533                 pagenum = 0
2534
2535                 while True:
2536                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2537                         self.report_download_page(username, start_index)
2538
2539                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2540
2541                         try:
2542                                 page = urllib2.urlopen(request).read()
2543                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2544                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2545                                 return
2546
2547                         # Extract video identifiers
2548                         ids_in_page = []
2549
2550                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2551                                 if mobj.group(1) not in ids_in_page:
2552                                         ids_in_page.append(mobj.group(1))
2553
2554                         video_ids.extend(ids_in_page)
2555
2556                         # A little optimization - if current page is not
2557                         # "full", ie. does not contain PAGE_SIZE video ids then
2558                         # we can assume that this page is the last one - there
2559                         # are no more ids on further pages - no need to query
2560                         # again.
2561
2562                         if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2563                                 break
2564
2565                         pagenum += 1
2566
2567                 all_ids_count = len(video_ids)
2568                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2569                 playlistend = self._downloader.params.get('playlistend', -1)
2570
2571                 if playlistend == -1:
2572                         video_ids = video_ids[playliststart:]
2573                 else:
2574                         video_ids = video_ids[playliststart:playlistend]
2575
2576                 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2577                                 (username, all_ids_count, len(video_ids)))
2578
2579                 for video_id in video_ids:
2580                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2581
2582
2583 class DepositFilesIE(InfoExtractor):
2584         """Information extractor for depositfiles.com"""
2585
2586         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2587
2588         def __init__(self, downloader=None):
2589                 InfoExtractor.__init__(self, downloader)
2590
2591         @staticmethod
2592         def suitable(url):
2593                 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2594
2595         def report_download_webpage(self, file_id):
2596                 """Report webpage download."""
2597                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2598
2599         def report_extraction(self, file_id):
2600                 """Report information extraction."""
2601                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2602
2603         def _real_initialize(self):
2604                 return
2605
2606         def _real_extract(self, url):
2607                 # At this point we have a new file
2608                 self._downloader.increment_downloads()
2609
2610                 file_id = url.split('/')[-1]
2611                 # Rebuild url in english locale
2612                 url = 'http://depositfiles.com/en/files/' + file_id
2613
2614                 # Retrieve file webpage with 'Free download' button pressed
2615                 free_download_indication = { 'gateway_result' : '1' }
2616                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2617                 try:
2618                         self.report_download_webpage(file_id)
2619                         webpage = urllib2.urlopen(request).read()
2620                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2621                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2622                         return
2623
2624                 # Search for the real file URL
2625                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2626                 if (mobj is None) or (mobj.group(1) is None):
2627                         # Try to figure out reason of the error.
2628                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2629                         if (mobj is not None) and (mobj.group(1) is not None):
2630                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2631                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2632                         else:
2633                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2634                         return
2635
2636                 file_url = mobj.group(1)
2637                 file_extension = os.path.splitext(file_url)[1][1:]
2638
2639                 # Search for file title
2640                 mobj = re.search(r'<b title="(.*?)">', webpage)
2641                 if mobj is None:
2642                         self._downloader.trouble(u'ERROR: unable to extract title')
2643                         return
2644                 file_title = mobj.group(1).decode('utf-8')
2645
2646                 try:
2647                         # Process file information
2648                         self._downloader.process_info({
2649                                 'id':           file_id.decode('utf-8'),
2650                                 'url':          file_url.decode('utf-8'),
2651                                 'uploader':     u'NA',
2652                                 'upload_date':  u'NA',
2653                                 'title':        file_title,
2654                                 'stitle':       file_title,
2655                                 'ext':          file_extension.decode('utf-8'),
2656                                 'format':       u'NA',
2657                                 'player_url':   None,
2658                         })
2659                 except UnavailableVideoError, err:
2660                         self._downloader.trouble(u'ERROR: unable to download file')
2661
2662
2663 class FacebookIE(InfoExtractor):
2664         """Information Extractor for Facebook"""
2665
2666         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2667         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2668         _NETRC_MACHINE = 'facebook'
2669         _available_formats = ['highqual', 'lowqual']
2670         _video_extensions = {
2671                 'highqual': 'mp4',
2672                 'lowqual': 'mp4',
2673         }
2674
2675         def __init__(self, downloader=None):
2676                 InfoExtractor.__init__(self, downloader)
2677
2678         @staticmethod
2679         def suitable(url):
2680                 return (re.match(FacebookIE._VALID_URL, url) is not None)
2681
2682         def _reporter(self, message):
2683                 """Add header and report message."""
2684                 self._downloader.to_screen(u'[facebook] %s' % message)
2685
2686         def report_login(self):
2687                 """Report attempt to log in."""
2688                 self._reporter(u'Logging in')
2689
2690         def report_video_webpage_download(self, video_id):
2691                 """Report attempt to download video webpage."""
2692                 self._reporter(u'%s: Downloading video webpage' % video_id)
2693
2694         def report_information_extraction(self, video_id):
2695                 """Report attempt to extract video information."""
2696                 self._reporter(u'%s: Extracting video information' % video_id)
2697
2698         def _parse_page(self, video_webpage):
2699                 """Extract video information from page"""
2700                 # General data
2701                 data = {'title': r'class="video_title datawrap">(.*?)</',
2702                         'description': r'<div class="datawrap">(.*?)</div>',
2703                         'owner': r'\("video_owner_name", "(.*?)"\)',
2704                         'upload_date': r'data-date="(.*?)"',
2705                         'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2706                         }
2707                 video_info = {}
2708                 for piece in data.keys():
2709                         mobj = re.search(data[piece], video_webpage)
2710                         if mobj is not None:
2711                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2712
2713                 # Video urls
2714                 video_urls = {}
2715                 for fmt in self._available_formats:
2716                         mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2717                         if mobj is not None:
2718                                 # URL is in a Javascript segment inside an escaped Unicode format within
2719                                 # the generally utf-8 page
2720                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2721                 video_info['video_urls'] = video_urls
2722
2723                 return video_info
2724
2725         def _real_initialize(self):
2726                 if self._downloader is None:
2727                         return
2728
2729                 useremail = None
2730                 password = None
2731                 downloader_params = self._downloader.params
2732
2733                 # Attempt to use provided username and password or .netrc data
2734                 if downloader_params.get('username', None) is not None:
2735                         useremail = downloader_params['username']
2736                         password = downloader_params['password']
2737                 elif downloader_params.get('usenetrc', False):
2738                         try:
2739                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2740                                 if info is not None:
2741                                         useremail = info[0]
2742                                         password = info[2]
2743                                 else:
2744                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2745                         except (IOError, netrc.NetrcParseError), err:
2746                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2747                                 return
2748
2749                 if useremail is None:
2750                         return
2751
2752                 # Log in
2753                 login_form = {
2754                         'email': useremail,
2755                         'pass': password,
2756                         'login': 'Log+In'
2757                         }
2758                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2759                 try:
2760                         self.report_login()
2761                         login_results = urllib2.urlopen(request).read()
2762                         if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2763                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2764                                 return
2765                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2766                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2767                         return
2768
2769         def _real_extract(self, url):
2770                 mobj = re.match(self._VALID_URL, url)
2771                 if mobj is None:
2772                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2773                         return
2774                 video_id = mobj.group('ID')
2775
2776                 # Get video webpage
2777                 self.report_video_webpage_download(video_id)
2778                 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2779                 try:
2780                         page = urllib2.urlopen(request)
2781                         video_webpage = page.read()
2782                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2783                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2784                         return
2785
2786                 # Start extracting information
2787                 self.report_information_extraction(video_id)
2788
2789                 # Extract information
2790                 video_info = self._parse_page(video_webpage)
2791
2792                 # uploader
2793                 if 'owner' not in video_info:
2794                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2795                         return
2796                 video_uploader = video_info['owner']
2797
2798                 # title
2799                 if 'title' not in video_info:
2800                         self._downloader.trouble(u'ERROR: unable to extract video title')
2801                         return
2802                 video_title = video_info['title']
2803                 video_title = video_title.decode('utf-8')
2804                 video_title = sanitize_title(video_title)
2805
2806                 # simplified title
2807                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2808                 simple_title = simple_title.strip(ur'_')
2809
2810                 # thumbnail image
2811                 if 'thumbnail' not in video_info:
2812                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2813                         video_thumbnail = ''
2814                 else:
2815                         video_thumbnail = video_info['thumbnail']
2816
2817                 # upload date
2818                 upload_date = u'NA'
2819                 if 'upload_date' in video_info:
2820                         upload_time = video_info['upload_date']
2821                         timetuple = email.utils.parsedate_tz(upload_time)
2822                         if timetuple is not None:
2823                                 try:
2824                                         upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2825                                 except:
2826                                         pass
2827
2828                 # description
2829                 video_description = video_info.get('description', 'No description available.')
2830
2831                 url_map = video_info['video_urls']
2832                 if len(url_map.keys()) > 0:
2833                         # Decide which formats to download
2834                         req_format = self._downloader.params.get('format', None)
2835                         format_limit = self._downloader.params.get('format_limit', None)
2836
2837                         if format_limit is not None and format_limit in self._available_formats:
2838                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
2839                         else:
2840                                 format_list = self._available_formats
2841                         existing_formats = [x for x in format_list if x in url_map]
2842                         if len(existing_formats) == 0:
2843                                 self._downloader.trouble(u'ERROR: no known formats available for video')
2844                                 return
2845                         if req_format is None:
2846                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2847                         elif req_format == '-1':
2848                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2849                         else:
2850                                 # Specific format
2851                                 if req_format not in url_map:
2852                                         self._downloader.trouble(u'ERROR: requested format not available')
2853                                         return
2854                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
2855
2856                 for format_param, video_real_url in video_url_list:
2857
2858                         # At this point we have a new video
2859                         self._downloader.increment_downloads()
2860
2861                         # Extension
2862                         video_extension = self._video_extensions.get(format_param, 'mp4')
2863
2864                         try:
2865                                 # Process video information
2866                                 self._downloader.process_info({
2867                                         'id':           video_id.decode('utf-8'),
2868                                         'url':          video_real_url.decode('utf-8'),
2869                                         'uploader':     video_uploader.decode('utf-8'),
2870                                         'upload_date':  upload_date,
2871                                         'title':        video_title,
2872                                         'stitle':       simple_title,
2873                                         'ext':          video_extension.decode('utf-8'),
2874                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
2875                                         'thumbnail':    video_thumbnail.decode('utf-8'),
2876                                         'description':  video_description.decode('utf-8'),
2877                                         'player_url':   None,
2878                                 })
2879                         except UnavailableVideoError, err:
2880                                 self._downloader.trouble(u'\nERROR: unable to download video')
2881
2882 class BlipTVIE(InfoExtractor):
2883         """Information extractor for blip.tv"""
2884
2885         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
2886         _URL_EXT = r'^.*\.([a-z0-9]+)$'
2887
2888         @staticmethod
2889         def suitable(url):
2890                 return (re.match(BlipTVIE._VALID_URL, url) is not None)
2891
2892         def report_extraction(self, file_id):
2893                 """Report information extraction."""
2894                 self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id)
2895
2896         def _simplify_title(self, title):
2897                 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
2898                 res = res.strip(ur'_')
2899                 return res
2900
2901         def _real_extract(self, url):
2902                 mobj = re.match(self._VALID_URL, url)
2903                 if mobj is None:
2904                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2905                         return
2906
2907                 if '?' in url:
2908                         cchar = '&'
2909                 else:
2910                         cchar = '?'
2911                 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
2912                 request = urllib2.Request(json_url)
2913                 self.report_extraction(mobj.group(1))
2914                 try:
2915                         json_code = urllib2.urlopen(request).read()
2916                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2917                         self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
2918                         return
2919                 try:
2920                         json_data = json.loads(json_code)
2921                         if 'Post' in json_data:
2922                                 data = json_data['Post']
2923                         else:
2924                                 data = json_data
2925
2926                         upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
2927                         video_url = data['media']['url']
2928                         umobj = re.match(self._URL_EXT, video_url)
2929                         if umobj is None:
2930                                 raise ValueError('Can not determine filename extension')
2931                         ext = umobj.group(1)
2932
2933                         self._downloader.increment_downloads()
2934
2935                         info = {
2936                                 'id': data['item_id'],
2937                                 'url': video_url,
2938                                 'uploader': data['display_name'],
2939                                 'upload_date': upload_date,
2940                                 'title': data['title'],
2941                                 'stitle': self._simplify_title(data['title']),
2942                                 'ext': ext,
2943                                 'format': data['media']['mimeType'],
2944                                 'thumbnail': data['thumbnailUrl'],
2945                                 'description': data['description'],
2946                                 'player_url': data['embedUrl']
2947                         }
2948                 except (ValueError,KeyError), err:
2949                         self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
2950                         return
2951
2952                 try:
2953                         self._downloader.process_info(info)
2954                 except UnavailableVideoError, err:
2955                         self._downloader.trouble(u'\nERROR: unable to download video')
2956
2957
2958 class MyVideoIE(InfoExtractor):
2959         """Information Extractor for myvideo.de."""
2960
2961         _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
2962
2963         def __init__(self, downloader=None):
2964                 InfoExtractor.__init__(self, downloader)
2965
2966         @staticmethod
2967         def suitable(url):
2968                 return (re.match(MyVideoIE._VALID_URL, url) is not None)
2969
2970         def report_download_webpage(self, video_id):
2971                 """Report webpage download."""
2972                 self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
2973
2974         def report_extraction(self, video_id):
2975                 """Report information extraction."""
2976                 self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
2977
2978         def _real_initialize(self):
2979                 return
2980
2981         def _real_extract(self,url):
2982                 mobj = re.match(self._VALID_URL, url)
2983                 if mobj is None:
2984                         self._download.trouble(u'ERROR: invalid URL: %s' % url)
2985                         return
2986
2987                 video_id = mobj.group(1)
2988                 simple_title = mobj.group(2).decode('utf-8')
2989                 # should actually not be necessary
2990                 simple_title = sanitize_title(simple_title)
2991                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title)
2992
2993                 # Get video webpage
2994                 request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
2995                 try:
2996                         self.report_download_webpage(video_id)
2997                         webpage = urllib2.urlopen(request).read()
2998                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2999                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
3000                         return
3001
3002                 self.report_extraction(video_id)
3003                 mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
3004                                  webpage)
3005                 if mobj is None:
3006                         self._downloader.trouble(u'ERROR: unable to extract media URL')
3007                         return
3008                 video_url = mobj.group(1) + ('/%s.flv' % video_id)
3009
3010                 mobj = re.search('<title>([^<]+)</title>', webpage)
3011                 if mobj is None:
3012                         self._downloader.trouble(u'ERROR: unable to extract title')
3013                         return
3014
3015                 video_title = mobj.group(1)
3016                 video_title = sanitize_title(video_title)
3017
3018                 try:
3019                         print(video_url)
3020                         self._downloader.process_info({
3021                                 'id':           video_id,
3022                                 'url':          video_url,
3023                                 'uploader':     u'NA',
3024                                 'upload_date':  u'NA',
3025                                 'title':        video_title,
3026                                 'stitle':       simple_title,
3027                                 'ext':          u'flv',
3028                                 'format':       u'NA',
3029                                 'player_url':   None,
3030                         })
3031                 except UnavailableVideoError:
3032                         self._downloader.trouble(u'\nERROR: Unable to download video')
3033
3034 class PostProcessor(object):
3035         """Post Processor class.
3036
3037         PostProcessor objects can be added to downloaders with their
3038         add_post_processor() method. When the downloader has finished a
3039         successful download, it will take its internal chain of PostProcessors
3040         and start calling the run() method on each one of them, first with
3041         an initial argument and then with the returned value of the previous
3042         PostProcessor.
3043
3044         The chain will be stopped if one of them ever returns None or the end
3045         of the chain is reached.
3046
3047         PostProcessor objects follow a "mutual registration" process similar
3048         to InfoExtractor objects.
3049         """
3050
3051         _downloader = None
3052
3053         def __init__(self, downloader=None):
3054                 self._downloader = downloader
3055
3056         def set_downloader(self, downloader):
3057                 """Sets the downloader for this PP."""
3058                 self._downloader = downloader
3059
3060         def run(self, information):
3061                 """Run the PostProcessor.
3062
3063                 The "information" argument is a dictionary like the ones
3064                 composed by InfoExtractors. The only difference is that this
3065                 one has an extra field called "filepath" that points to the
3066                 downloaded file.
3067
3068                 When this method returns None, the postprocessing chain is
3069                 stopped. However, this method may return an information
3070                 dictionary that will be passed to the next postprocessing
3071                 object in the chain. It can be the one it received after
3072                 changing some fields.
3073
3074                 In addition, this method may raise a PostProcessingError
3075                 exception that will be taken into account by the downloader
3076                 it was called from.
3077                 """
3078                 return information # by default, do nothing
3079
3080
3081 class FFmpegExtractAudioPP(PostProcessor):
3082
3083         def __init__(self, downloader=None, preferredcodec=None):
3084                 PostProcessor.__init__(self, downloader)
3085                 if preferredcodec is None:
3086                         preferredcodec = 'best'
3087                 self._preferredcodec = preferredcodec
3088
3089         @staticmethod
3090         def get_audio_codec(path):
3091                 try:
3092                         cmd = ['ffprobe', '-show_streams', '--', path]
3093                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
3094                         output = handle.communicate()[0]
3095                         if handle.wait() != 0:
3096                                 return None
3097                 except (IOError, OSError):
3098                         return None
3099                 audio_codec = None
3100                 for line in output.split('\n'):
3101                         if line.startswith('codec_name='):
3102                                 audio_codec = line.split('=')[1].strip()
3103                         elif line.strip() == 'codec_type=audio' and audio_codec is not None:
3104                                 return audio_codec
3105                 return None
3106
3107         @staticmethod
3108         def run_ffmpeg(path, out_path, codec, more_opts):
3109                 try:
3110                         cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
3111                         ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
3112                         return (ret == 0)
3113                 except (IOError, OSError):
3114                         return False
3115
3116         def run(self, information):
3117                 path = information['filepath']
3118
3119                 filecodec = self.get_audio_codec(path)
3120                 if filecodec is None:
3121                         self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
3122                         return None
3123
3124                 more_opts = []
3125                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
3126                         if filecodec == 'aac' or filecodec == 'mp3':
3127                                 # Lossless if possible
3128                                 acodec = 'copy'
3129                                 extension = filecodec
3130                                 if filecodec == 'aac':
3131                                         more_opts = ['-f', 'adts']
3132                         else:
3133                                 # MP3 otherwise.
3134                                 acodec = 'libmp3lame'
3135                                 extension = 'mp3'
3136                                 more_opts = ['-ab', '128k']
3137                 else:
3138                         # We convert the audio (lossy)
3139                         acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
3140                         extension = self._preferredcodec
3141                         more_opts = ['-ab', '128k']
3142                         if self._preferredcodec == 'aac':
3143                                 more_opts += ['-f', 'adts']
3144
3145                 (prefix, ext) = os.path.splitext(path)
3146                 new_path = prefix + '.' + extension
3147                 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
3148                 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
3149
3150                 if not status:
3151                         self._downloader.to_stderr(u'WARNING: error running ffmpeg')
3152                         return None
3153
3154                 try:
3155                         os.remove(path)
3156                 except (IOError, OSError):
3157                         self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
3158                         return None
3159
3160                 information['filepath'] = new_path
3161                 return information
3162
3163
3164 def updateSelf(downloader, filename):
3165         ''' Update the program file with the latest version from the repository '''
3166         # Note: downloader only used for options
3167         if not os.access(filename, os.W_OK):
3168                 sys.exit('ERROR: no write permissions on %s' % filename)
3169
3170         downloader.to_screen('Updating to latest version...')
3171
3172         try:
3173                 try:
3174                         urlh = urllib.urlopen(UPDATE_URL)
3175                         newcontent = urlh.read()
3176                 finally:
3177                         urlh.close()
3178         except (IOError, OSError), err:
3179                 sys.exit('ERROR: unable to download latest version')
3180
3181         try:
3182                 outf = open(filename, 'wb')
3183                 try:
3184                         outf.write(newcontent)
3185                 finally:
3186                         outf.close()
3187         except (IOError, OSError), err:
3188                 sys.exit('ERROR: unable to overwrite current version')
3189
3190         downloader.to_screen('Updated youtube-dl. Restart to use the new version.')
3191
3192 def parseOpts():
3193         # Deferred imports
3194         import getpass
3195         import optparse
3196
3197         def _format_option_string(option):
3198                 ''' ('-o', '--option') -> -o, --format METAVAR'''
3199
3200                 opts = []
3201
3202                 if option._short_opts: opts.append(option._short_opts[0])
3203                 if option._long_opts: opts.append(option._long_opts[0])
3204                 if len(opts) > 1: opts.insert(1, ', ')
3205
3206                 if option.takes_value(): opts.append(' %s' % option.metavar)
3207
3208                 return "".join(opts)
3209
3210         def _find_term_columns():
3211                 columns = os.environ.get('COLUMNS', None)
3212                 if columns:
3213                         return int(columns)
3214
3215                 try:
3216                         sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3217                         out,err = sp.communicate()
3218                         return int(out.split()[1])
3219                 except:
3220                         pass
3221                 return None
3222
3223         max_width = 80
3224         max_help_position = 80
3225
3226         # No need to wrap help messages if we're on a wide console
3227         columns = _find_term_columns()
3228         if columns: max_width = columns
3229
3230         fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
3231         fmt.format_option_strings = _format_option_string
3232
3233         kw = {
3234                 'version'   : __version__,
3235                 'formatter' : fmt,
3236                 'usage' : '%prog [options] url...',
3237                 'conflict_handler' : 'resolve',
3238         }
3239
3240         parser = optparse.OptionParser(**kw)
3241
3242         # option groups
3243         general        = optparse.OptionGroup(parser, 'General Options')
3244         authentication = optparse.OptionGroup(parser, 'Authentication Options')
3245         video_format   = optparse.OptionGroup(parser, 'Video Format Options')
3246         postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
3247         filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
3248         verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
3249
3250         general.add_option('-h', '--help',
3251                         action='help', help='print this help text and exit')
3252         general.add_option('-v', '--version',
3253                         action='version', help='print program version and exit')
3254         general.add_option('-U', '--update',
3255                         action='store_true', dest='update_self', help='update this program to latest version')
3256         general.add_option('-i', '--ignore-errors',
3257                         action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
3258         general.add_option('-r', '--rate-limit',
3259                         dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
3260         general.add_option('-R', '--retries',
3261                         dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
3262         general.add_option('--playlist-start',
3263                         dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
3264         general.add_option('--playlist-end',
3265                         dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
3266         general.add_option('--dump-user-agent',
3267                         action='store_true', dest='dump_user_agent',
3268                         help='display the current browser identification', default=False)
3269
3270         authentication.add_option('-u', '--username',
3271                         dest='username', metavar='USERNAME', help='account username')
3272         authentication.add_option('-p', '--password',
3273                         dest='password', metavar='PASSWORD', help='account password')
3274         authentication.add_option('-n', '--netrc',
3275                         action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
3276
3277
3278         video_format.add_option('-f', '--format',
3279                         action='store', dest='format', metavar='FORMAT', help='video format code')
3280         video_format.add_option('--all-formats',
3281                         action='store_const', dest='format', help='download all available video formats', const='-1')
3282         video_format.add_option('--max-quality',
3283                         action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
3284
3285
3286         verbosity.add_option('-q', '--quiet',
3287                         action='store_true', dest='quiet', help='activates quiet mode', default=False)
3288         verbosity.add_option('-s', '--simulate',
3289                         action='store_true', dest='simulate', help='do not download video', default=False)
3290         verbosity.add_option('-g', '--get-url',
3291                         action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
3292         verbosity.add_option('-e', '--get-title',
3293                         action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
3294         verbosity.add_option('--get-thumbnail',
3295                         action='store_true', dest='getthumbnail',
3296                         help='simulate, quiet but print thumbnail URL', default=False)
3297         verbosity.add_option('--get-description',
3298                         action='store_true', dest='getdescription',
3299                         help='simulate, quiet but print video description', default=False)
3300         verbosity.add_option('--get-filename',
3301                         action='store_true', dest='getfilename',
3302                         help='simulate, quiet but print output filename', default=False)
3303         verbosity.add_option('--no-progress',
3304                         action='store_true', dest='noprogress', help='do not print progress bar', default=False)
3305         verbosity.add_option('--console-title',
3306                         action='store_true', dest='consoletitle',
3307                         help='display progress in console titlebar', default=False)
3308
3309
3310         filesystem.add_option('-t', '--title',
3311                         action='store_true', dest='usetitle', help='use title in file name', default=False)
3312         filesystem.add_option('-l', '--literal',
3313                         action='store_true', dest='useliteral', help='use literal title in file name', default=False)
3314         filesystem.add_option('-A', '--auto-number',
3315                         action='store_true', dest='autonumber',
3316                         help='number downloaded files starting from 00000', default=False)
3317         filesystem.add_option('-o', '--output',
3318                         dest='outtmpl', metavar='TEMPLATE', help='output filename template')
3319         filesystem.add_option('-a', '--batch-file',
3320                         dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
3321         filesystem.add_option('-w', '--no-overwrites',
3322                         action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
3323         filesystem.add_option('-c', '--continue',
3324                         action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
3325         filesystem.add_option('--cookies',
3326                         dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
3327         filesystem.add_option('--no-part',
3328                         action='store_true', dest='nopart', help='do not use .part files', default=False)
3329         filesystem.add_option('--no-mtime',
3330                         action='store_false', dest='updatetime',
3331                         help='do not use the Last-modified header to set the file modification time', default=True)
3332         filesystem.add_option('--write-description',
3333                         action='store_true', dest='writedescription',
3334                         help='write video description to a .description file', default=False)
3335         filesystem.add_option('--write-info-json',
3336                         action='store_true', dest='writeinfojson',
3337                         help='write video metadata to a .info.json file', default=False)
3338
3339
3340         postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
3341                         help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
3342         postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
3343                         help='"best", "aac" or "mp3"; best by default')
3344
3345
3346         parser.add_option_group(general)
3347         parser.add_option_group(filesystem)
3348         parser.add_option_group(verbosity)
3349         parser.add_option_group(video_format)
3350         parser.add_option_group(authentication)
3351         parser.add_option_group(postproc)
3352
3353         opts, args = parser.parse_args()
3354
3355         return parser, opts, args
3356
3357 def main():
3358         parser, opts, args = parseOpts()
3359
3360         # Open appropriate CookieJar
3361         if opts.cookiefile is None:
3362                 jar = cookielib.CookieJar()
3363         else:
3364                 try:
3365                         jar = cookielib.MozillaCookieJar(opts.cookiefile)
3366                         if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
3367                                 jar.load()
3368                 except (IOError, OSError), err:
3369                         sys.exit(u'ERROR: unable to open cookie file')
3370
3371         # Dump user agent
3372         if opts.dump_user_agent:
3373                 print std_headers['User-Agent']
3374                 sys.exit(0)
3375
3376         # General configuration
3377         cookie_processor = urllib2.HTTPCookieProcessor(jar)
3378         urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
3379         socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
3380
3381         # Batch file verification
3382         batchurls = []
3383         if opts.batchfile is not None:
3384                 try:
3385                         if opts.batchfile == '-':
3386                                 batchfd = sys.stdin
3387                         else:
3388                                 batchfd = open(opts.batchfile, 'r')
3389                         batchurls = batchfd.readlines()
3390                         batchurls = [x.strip() for x in batchurls]
3391                         batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
3392                 except IOError:
3393                         sys.exit(u'ERROR: batch file could not be read')
3394         all_urls = batchurls + args
3395
3396         # Conflicting, missing and erroneous options
3397         if opts.usenetrc and (opts.username is not None or opts.password is not None):
3398                 parser.error(u'using .netrc conflicts with giving username/password')
3399         if opts.password is not None and opts.username is None:
3400                 parser.error(u'account username missing')
3401         if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
3402                 parser.error(u'using output template conflicts with using title, literal title or auto number')
3403         if opts.usetitle and opts.useliteral:
3404                 parser.error(u'using title conflicts with using literal title')
3405         if opts.username is not None and opts.password is None:
3406                 opts.password = getpass.getpass(u'Type account password and press return:')
3407         if opts.ratelimit is not None:
3408                 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
3409                 if numeric_limit is None:
3410                         parser.error(u'invalid rate limit specified')
3411                 opts.ratelimit = numeric_limit
3412         if opts.retries is not None:
3413                 try:
3414                         opts.retries = long(opts.retries)
3415                 except (TypeError, ValueError), err:
3416                         parser.error(u'invalid retry count specified')
3417         try:
3418                 opts.playliststart = int(opts.playliststart)
3419                 if opts.playliststart <= 0:
3420                         raise ValueError(u'Playlist start must be positive')
3421         except (TypeError, ValueError), err:
3422                 parser.error(u'invalid playlist start number specified')
3423         try:
3424                 opts.playlistend = int(opts.playlistend)
3425                 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
3426                         raise ValueError(u'Playlist end must be greater than playlist start')
3427         except (TypeError, ValueError), err:
3428                 parser.error(u'invalid playlist end number specified')
3429         if opts.extractaudio:
3430                 if opts.audioformat not in ['best', 'aac', 'mp3']:
3431                         parser.error(u'invalid audio format specified')
3432
3433         # Information extractors
3434         youtube_ie = YoutubeIE()
3435         metacafe_ie = MetacafeIE(youtube_ie)
3436         dailymotion_ie = DailymotionIE()
3437         youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
3438         youtube_user_ie = YoutubeUserIE(youtube_ie)
3439         youtube_search_ie = YoutubeSearchIE(youtube_ie)
3440         google_ie = GoogleIE()
3441         google_search_ie = GoogleSearchIE(google_ie)
3442         photobucket_ie = PhotobucketIE()
3443         yahoo_ie = YahooIE()
3444         yahoo_search_ie = YahooSearchIE(yahoo_ie)
3445         deposit_files_ie = DepositFilesIE()
3446         facebook_ie = FacebookIE()
3447         bliptv_ie = BlipTVIE()
3448         vimeo_ie = VimeoIE()
3449         myvideo_ie = MyVideoIE()
3450
3451         generic_ie = GenericIE()
3452
3453         # File downloader
3454         fd = FileDownloader({
3455                 'usenetrc': opts.usenetrc,
3456                 'username': opts.username,
3457                 'password': opts.password,
3458                 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3459                 'forceurl': opts.geturl,
3460                 'forcetitle': opts.gettitle,
3461                 'forcethumbnail': opts.getthumbnail,
3462                 'forcedescription': opts.getdescription,
3463                 'forcefilename': opts.getfilename,
3464                 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3465                 'format': opts.format,
3466                 'format_limit': opts.format_limit,
3467                 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
3468                         or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
3469                         or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
3470                         or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
3471                         or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
3472                         or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
3473                         or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
3474                         or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
3475                         or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
3476                         or u'%(id)s.%(ext)s'),
3477                 'ignoreerrors': opts.ignoreerrors,
3478                 'ratelimit': opts.ratelimit,
3479                 'nooverwrites': opts.nooverwrites,
3480                 'retries': opts.retries,
3481                 'continuedl': opts.continue_dl,
3482                 'noprogress': opts.noprogress,
3483                 'playliststart': opts.playliststart,
3484                 'playlistend': opts.playlistend,
3485                 'logtostderr': opts.outtmpl == '-',
3486                 'consoletitle': opts.consoletitle,
3487                 'nopart': opts.nopart,
3488                 'updatetime': opts.updatetime,
3489                 'writedescription': opts.writedescription,
3490                 'writeinfojson': opts.writeinfojson,
3491                 })
3492         fd.add_info_extractor(youtube_search_ie)
3493         fd.add_info_extractor(youtube_pl_ie)
3494         fd.add_info_extractor(youtube_user_ie)
3495         fd.add_info_extractor(metacafe_ie)
3496         fd.add_info_extractor(dailymotion_ie)
3497         fd.add_info_extractor(youtube_ie)
3498         fd.add_info_extractor(google_ie)
3499         fd.add_info_extractor(google_search_ie)
3500         fd.add_info_extractor(photobucket_ie)
3501         fd.add_info_extractor(yahoo_ie)
3502         fd.add_info_extractor(yahoo_search_ie)
3503         fd.add_info_extractor(deposit_files_ie)
3504         fd.add_info_extractor(facebook_ie)
3505         fd.add_info_extractor(bliptv_ie)
3506         fd.add_info_extractor(vimeo_ie)
3507         fd.add_info_extractor(myvideo_ie)
3508
3509         # This must come last since it's the
3510         # fallback if none of the others work
3511         fd.add_info_extractor(generic_ie)
3512
3513         # PostProcessors
3514         if opts.extractaudio:
3515                 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
3516
3517         # Update version
3518         if opts.update_self:
3519                 updateSelf(fd, sys.argv[0])
3520
3521         # Maybe do nothing
3522         if len(all_urls) < 1:
3523                 if not opts.update_self:
3524                         parser.error(u'you must provide at least one URL')
3525                 else:
3526                         sys.exit()
3527         retcode = fd.download(all_urls)
3528
3529         # Dump cookie jar if requested
3530         if opts.cookiefile is not None:
3531                 try:
3532                         jar.save()
3533                 except (IOError, OSError), err:
3534                         sys.exit(u'ERROR: unable to save cookie jar')
3535
3536         sys.exit(retcode)
3537
3538
3539 if __name__ == '__main__':
3540         try:
3541                 main()
3542         except DownloadError:
3543                 sys.exit(1)
3544         except SameFileError:
3545                 sys.exit(u'ERROR: fixed output name but more than one file to download')
3546         except KeyboardInterrupt:
3547                 sys.exit(u'\nERROR: Interrupted by user')
3548
3549 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: