youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 __author__  = (
   5         "Ricardo Garcia Gonzalez",
   6         "Danny Colligan",
   7         "Benjamin Johnson",
   8         "Vasyl' Vavrychuk",
   9         "Witold Baryluk",
  10         "Paweł Paprota",
  11         "Gergely Imreh",
  12         )
  13
  14 __license__ = "Public Domain"
  15 __version__ = '2011.08.04'
  16
  17 import cookielib
  18 import ctypes
  19 import datetime
  20 import email.utils
  21 import gzip
  22 import htmlentitydefs
  23 import httplib
  24 import locale
  25 import math
  26 import netrc
  27 import os
  28 import os.path
  29 import re
  30 import socket
  31 import string
  32 import StringIO
  33 import subprocess
  34 import sys
  35 import time
  36 import urllib
  37 import urllib2
  38 import zlib
  39
  40 # parse_qs was moved from the cgi module to the urlparse module recently.
  41 try:
  42         from urlparse import parse_qs
  43 except ImportError:
  44         from cgi import parse_qs
  45
  46 std_headers = {
  47         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
  48         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  49         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  50         'Accept-Encoding': 'gzip, deflate',
  51         'Accept-Language': 'en-us,en;q=0.5',
  52 }
  53
  54 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  55
  56 def preferredencoding():
  57         """Get preferred encoding.
  58
  59         Returns the best encoding scheme for the system, based on
  60         locale.getpreferredencoding() and some further tweaks.
  61         """
  62         def yield_preferredencoding():
  63                 try:
  64                         pref = locale.getpreferredencoding()
  65                         u'TEST'.encode(pref)
  66                 except:
  67                         pref = 'UTF-8'
  68                 while True:
  69                         yield pref
  70         return yield_preferredencoding().next()
  71
  72 def htmlentity_transform(matchobj):
  73         """Transforms an HTML entity to a Unicode character.
  74
  75         This function receives a match object and is intended to be used with
  76         the re.sub() function.
  77         """
  78         entity = matchobj.group(1)
  79
  80         # Known non-numeric HTML entity
  81         if entity in htmlentitydefs.name2codepoint:
  82                 return unichr(htmlentitydefs.name2codepoint[entity])
  83
  84         # Unicode character
  85         mobj = re.match(ur'(?u)#(x?\d+)', entity)
  86         if mobj is not None:
  87                 numstr = mobj.group(1)
  88                 if numstr.startswith(u'x'):
  89                         base = 16
  90                         numstr = u'0%s' % numstr
  91                 else:
  92                         base = 10
  93                 return unichr(long(numstr, base))
  94
  95         # Unknown entity in name, return its literal representation
  96         return (u'&%s;' % entity)
  97
  98 def sanitize_title(utitle):
  99         """Sanitizes a video title so it could be used as part of a filename."""
 100         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
 101         return utitle.replace(unicode(os.sep), u'%')
 102
 103 def sanitize_open(filename, open_mode):
 104         """Try to open the given filename, and slightly tweak it if this fails.
 105
 106         Attempts to open the given filename. If this fails, it tries to change
 107         the filename slightly, step by step, until it's either able to open it
 108         or it fails and raises a final exception, like the standard open()
 109         function.
 110
 111         It returns the tuple (stream, definitive_file_name).
 112         """
 113         try:
 114                 if filename == u'-':
 115                         if sys.platform == 'win32':
 116                                 import msvcrt
 117                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 118                         return (sys.stdout, filename)
 119                 stream = open(filename, open_mode)
 120                 return (stream, filename)
 121         except (IOError, OSError), err:
 122                 # In case of error, try to remove win32 forbidden chars
 123                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 124
 125                 # An exception here should be caught in the caller
 126                 stream = open(filename, open_mode)
 127                 return (stream, filename)
 128
 129 def timeconvert(timestr):
 130     """Convert RFC 2822 defined time string into system timestamp"""
 131     timestamp = None
 132     timetuple = email.utils.parsedate_tz(timestr)
 133     if timetuple is not None:
 134         timestamp = email.utils.mktime_tz(timetuple)
 135     return timestamp
 136
 137 class DownloadError(Exception):
 138         """Download Error exception.
 139
 140         This exception may be thrown by FileDownloader objects if they are not
 141         configured to continue on errors. They will contain the appropriate
 142         error message.
 143         """
 144         pass
 145
 146 class SameFileError(Exception):
 147         """Same File exception.
 148
 149         This exception will be thrown by FileDownloader objects if they detect
 150         multiple files would have to be downloaded to the same file on disk.
 151         """
 152         pass
 153
 154 class PostProcessingError(Exception):
 155         """Post Processing exception.
 156
 157         This exception may be raised by PostProcessor's .run() method to
 158         indicate an error in the postprocessing task.
 159         """
 160         pass
 161
 162 class UnavailableVideoError(Exception):
 163         """Unavailable Format exception.
 164
 165         This exception will be thrown when a video is requested
 166         in a format that is not available for that video.
 167         """
 168         pass
 169
 170 class ContentTooShortError(Exception):
 171         """Content Too Short exception.
 172
 173         This exception may be raised by FileDownloader objects when a file they
 174         download is too small for what the server announced first, indicating
 175         the connection was probably interrupted.
 176         """
 177         # Both in bytes
 178         downloaded = None
 179         expected = None
 180
 181         def __init__(self, downloaded, expected):
 182                 self.downloaded = downloaded
 183                 self.expected = expected
 184
 185 class YoutubeDLHandler(urllib2.HTTPHandler):
 186         """Handler for HTTP requests and responses.
 187
 188         This class, when installed with an OpenerDirector, automatically adds
 189         the standard headers to every HTTP request and handles gzipped and
 190         deflated responses from web servers. If compression is to be avoided in
 191         a particular request, the original request in the program code only has
 192         to include the HTTP header "Youtubedl-No-Compression", which will be
 193         removed before making the real request.
 194
 195         Part of this code was copied from:
 196
 197           http://techknack.net/python-urllib2-handlers/
 198
 199         Andrew Rowls, the author of that code, agreed to release it to the
 200         public domain.
 201         """
 202
 203         @staticmethod
 204         def deflate(data):
 205                 try:
 206                         return zlib.decompress(data, -zlib.MAX_WBITS)
 207                 except zlib.error:
 208                         return zlib.decompress(data)
 209
 210         @staticmethod
 211         def addinfourl_wrapper(stream, headers, url, code):
 212                 if hasattr(urllib2.addinfourl, 'getcode'):
 213                         return urllib2.addinfourl(stream, headers, url, code)
 214                 ret = urllib2.addinfourl(stream, headers, url)
 215                 ret.code = code
 216                 return ret
 217
 218         def http_request(self, req):
 219                 for h in std_headers:
 220                         if h in req.headers:
 221                                 del req.headers[h]
 222                         req.add_header(h, std_headers[h])
 223                 if 'Youtubedl-no-compression' in req.headers:
 224                         if 'Accept-encoding' in req.headers:
 225                                 del req.headers['Accept-encoding']
 226                         del req.headers['Youtubedl-no-compression']
 227                 return req
 228
 229         def http_response(self, req, resp):
 230                 old_resp = resp
 231                 # gzip
 232                 if resp.headers.get('Content-encoding', '') == 'gzip':
 233                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
 234                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 235                         resp.msg = old_resp.msg
 236                 # deflate
 237                 if resp.headers.get('Content-encoding', '') == 'deflate':
 238                         gz = StringIO.StringIO(self.deflate(resp.read()))
 239                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 240                         resp.msg = old_resp.msg
 241                 return resp
 242
 243 class FileDownloader(object):
 244         """File Downloader class.
 245
 246         File downloader objects are the ones responsible of downloading the
 247         actual video file and writing it to disk if the user has requested
 248         it, among some other tasks. In most cases there should be one per
 249         program. As, given a video URL, the downloader doesn't know how to
 250         extract all the needed information, task that InfoExtractors do, it
 251         has to pass the URL to one of them.
 252
 253         For this, file downloader objects have a method that allows
 254         InfoExtractors to be registered in a given order. When it is passed
 255         a URL, the file downloader handles it to the first InfoExtractor it
 256         finds that reports being able to handle it. The InfoExtractor extracts
 257         all the information about the video or videos the URL refers to, and
 258         asks the FileDownloader to process the video information, possibly
 259         downloading the video.
 260
 261         File downloaders accept a lot of parameters. In order not to saturate
 262         the object constructor with arguments, it receives a dictionary of
 263         options instead. These options are available through the params
 264         attribute for the InfoExtractors to use. The FileDownloader also
 265         registers itself as the downloader in charge for the InfoExtractors
 266         that are added to it, so this is a "mutual registration".
 267
 268         Available options:
 269
 270         username:         Username for authentication purposes.
 271         password:         Password for authentication purposes.
 272         usenetrc:         Use netrc for authentication instead.
 273         quiet:            Do not print messages to stdout.
 274         forceurl:         Force printing final URL.
 275         forcetitle:       Force printing title.
 276         forcethumbnail:   Force printing thumbnail URL.
 277         forcedescription: Force printing description.
 278         forcefilename:    Force printing final filename.
 279         simulate:         Do not download the video files.
 280         format:           Video format code.
 281         format_limit:     Highest quality format to try.
 282         outtmpl:          Template for output names.
 283         ignoreerrors:     Do not stop on download errors.
 284         ratelimit:        Download speed limit, in bytes/sec.
 285         nooverwrites:     Prevent overwriting files.
 286         retries:          Number of times to retry for HTTP error 5xx
 287         continuedl:       Try to continue downloads if possible.
 288         noprogress:       Do not print the progress bar.
 289         playliststart:    Playlist item to start at.
 290         playlistend:      Playlist item to end at.
 291         logtostderr:      Log messages to stderr instead of stdout.
 292         consoletitle:     Display progress in console window's titlebar.
 293         nopart:           Do not use temporary .part files.
 294         updatetime:       Use the Last-modified header to set output file timestamps.
 295         """
 296
 297         params = None
 298         _ies = []
 299         _pps = []
 300         _download_retcode = None
 301         _num_downloads = None
 302         _screen_file = None
 303
 304         def __init__(self, params):
 305                 """Create a FileDownloader object with the given options."""
 306                 self._ies = []
 307                 self._pps = []
 308                 self._download_retcode = 0
 309                 self._num_downloads = 0
 310                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 311                 self.params = params
 312
 313         @staticmethod
 314         def pmkdir(filename):
 315                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 316                 components = filename.split(os.sep)
 317                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 318                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 319                 for dir in aggregate:
 320                         if not os.path.exists(dir):
 321                                 os.mkdir(dir)
 322
 323         @staticmethod
 324         def format_bytes(bytes):
 325                 if bytes is None:
 326                         return 'N/A'
 327                 if type(bytes) is str:
 328                         bytes = float(bytes)
 329                 if bytes == 0.0:
 330                         exponent = 0
 331                 else:
 332                         exponent = long(math.log(bytes, 1024.0))
 333                 suffix = 'bkMGTPEZY'[exponent]
 334                 converted = float(bytes) / float(1024**exponent)
 335                 return '%.2f%s' % (converted, suffix)
 336
 337         @staticmethod
 338         def calc_percent(byte_counter, data_len):
 339                 if data_len is None:
 340                         return '---.-%'
 341                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 342
 343         @staticmethod
 344         def calc_eta(start, now, total, current):
 345                 if total is None:
 346                         return '--:--'
 347                 dif = now - start
 348                 if current == 0 or dif < 0.001: # One millisecond
 349                         return '--:--'
 350                 rate = float(current) / dif
 351                 eta = long((float(total) - float(current)) / rate)
 352                 (eta_mins, eta_secs) = divmod(eta, 60)
 353                 if eta_mins > 99:
 354                         return '--:--'
 355                 return '%02d:%02d' % (eta_mins, eta_secs)
 356
 357         @staticmethod
 358         def calc_speed(start, now, bytes):
 359                 dif = now - start
 360                 if bytes == 0 or dif < 0.001: # One millisecond
 361                         return '%10s' % '---b/s'
 362                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 363
 364         @staticmethod
 365         def best_block_size(elapsed_time, bytes):
 366                 new_min = max(bytes / 2.0, 1.0)
 367                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 368                 if elapsed_time < 0.001:
 369                         return long(new_max)
 370                 rate = bytes / elapsed_time
 371                 if rate > new_max:
 372                         return long(new_max)
 373                 if rate < new_min:
 374                         return long(new_min)
 375                 return long(rate)
 376
 377         @staticmethod
 378         def parse_bytes(bytestr):
 379                 """Parse a string indicating a byte quantity into a long integer."""
 380                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 381                 if matchobj is None:
 382                         return None
 383                 number = float(matchobj.group(1))
 384                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 385                 return long(round(number * multiplier))
 386
 387         def add_info_extractor(self, ie):
 388                 """Add an InfoExtractor object to the end of the list."""
 389                 self._ies.append(ie)
 390                 ie.set_downloader(self)
 391
 392         def add_post_processor(self, pp):
 393                 """Add a PostProcessor object to the end of the chain."""
 394                 self._pps.append(pp)
 395                 pp.set_downloader(self)
 396
 397         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
 398                 """Print message to stdout if not in quiet mode."""
 399                 try:
 400                         if not self.params.get('quiet', False):
 401                                 terminator = [u'\n', u''][skip_eol]
 402                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
 403                         self._screen_file.flush()
 404                 except (UnicodeEncodeError), err:
 405                         if not ignore_encoding_errors:
 406                                 raise
 407
 408         def to_stderr(self, message):
 409                 """Print message to stderr."""
 410                 print >>sys.stderr, message.encode(preferredencoding())
 411
 412         def to_cons_title(self, message):
 413                 """Set console/terminal window title to message."""
 414                 if not self.params.get('consoletitle', False):
 415                         return
 416                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 417                         # c_wchar_p() might not be necessary if `message` is
 418                         # already of type unicode()
 419                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 420                 elif 'TERM' in os.environ:
 421                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 422
 423         def fixed_template(self):
 424                 """Checks if the output template is fixed."""
 425                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 426
 427         def trouble(self, message=None):
 428                 """Determine action to take when a download problem appears.
 429
 430                 Depending on if the downloader has been configured to ignore
 431                 download errors or not, this method may throw an exception or
 432                 not when errors are found, after printing the message.
 433                 """
 434                 if message is not None:
 435                         self.to_stderr(message)
 436                 if not self.params.get('ignoreerrors', False):
 437                         raise DownloadError(message)
 438                 self._download_retcode = 1
 439
 440         def slow_down(self, start_time, byte_counter):
 441                 """Sleep if the download speed is over the rate limit."""
 442                 rate_limit = self.params.get('ratelimit', None)
 443                 if rate_limit is None or byte_counter == 0:
 444                         return
 445                 now = time.time()
 446                 elapsed = now - start_time
 447                 if elapsed <= 0.0:
 448                         return
 449                 speed = float(byte_counter) / elapsed
 450                 if speed > rate_limit:
 451                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 452
 453         def temp_name(self, filename):
 454                 """Returns a temporary filename for the given filename."""
 455                 if self.params.get('nopart', False) or filename == u'-' or \
 456                                 (os.path.exists(filename) and not os.path.isfile(filename)):
 457                         return filename
 458                 return filename + u'.part'
 459
 460         def undo_temp_name(self, filename):
 461                 if filename.endswith(u'.part'):
 462                         return filename[:-len(u'.part')]
 463                 return filename
 464
 465         def try_rename(self, old_filename, new_filename):
 466                 try:
 467                         if old_filename == new_filename:
 468                                 return
 469                         os.rename(old_filename, new_filename)
 470                 except (IOError, OSError), err:
 471                         self.trouble(u'ERROR: unable to rename file')
 472
 473         def try_utime(self, filename, last_modified_hdr):
 474                 """Try to set the last-modified time of the given file."""
 475                 if last_modified_hdr is None:
 476                         return
 477                 if not os.path.isfile(filename):
 478                         return
 479                 timestr = last_modified_hdr
 480                 if timestr is None:
 481                         return
 482                 filetime = timeconvert(timestr)
 483                 if filetime is None:
 484                         return
 485                 try:
 486                         os.utime(filename,(time.time(), filetime))
 487                 except:
 488                         pass
 489
 490         def report_destination(self, filename):
 491                 """Report destination filename."""
 492                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
 493
 494         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 495                 """Report download progress."""
 496                 if self.params.get('noprogress', False):
 497                         return
 498                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 499                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 500                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 501                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 502
 503         def report_resuming_byte(self, resume_len):
 504                 """Report attempt to resume at given byte."""
 505                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 506
 507         def report_retry(self, count, retries):
 508                 """Report retry in case of HTTP error 5xx"""
 509                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 510
 511         def report_file_already_downloaded(self, file_name):
 512                 """Report file has already been fully downloaded."""
 513                 try:
 514                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
 515                 except (UnicodeEncodeError), err:
 516                         self.to_screen(u'[download] The file has already been downloaded')
 517
 518         def report_unable_to_resume(self):
 519                 """Report it was impossible to resume download."""
 520                 self.to_screen(u'[download] Unable to resume')
 521
 522         def report_finish(self):
 523                 """Report download finished."""
 524                 if self.params.get('noprogress', False):
 525                         self.to_screen(u'[download] Download completed')
 526                 else:
 527                         self.to_screen(u'')
 528
 529         def increment_downloads(self):
 530                 """Increment the ordinal that assigns a number to each file."""
 531                 self._num_downloads += 1
 532
 533         def prepare_filename(self, info_dict):
 534                 """Generate the output filename."""
 535                 try:
 536                         template_dict = dict(info_dict)
 537                         template_dict['epoch'] = unicode(long(time.time()))
 538                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 539                         filename = self.params['outtmpl'] % template_dict
 540                         return filename
 541                 except (ValueError, KeyError), err:
 542                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 543                         return None
 544
 545         def process_info(self, info_dict):
 546                 """Process a single dictionary returned by an InfoExtractor."""
 547                 filename = self.prepare_filename(info_dict)
 548                 # Do nothing else if in simulate mode
 549                 if self.params.get('simulate', False):
 550                         # Forced printings
 551                         if self.params.get('forcetitle', False):
 552                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 553                         if self.params.get('forceurl', False):
 554                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 555                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 556                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 557                         if self.params.get('forcedescription', False) and 'description' in info_dict:
 558                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 559                         if self.params.get('forcefilename', False) and filename is not None:
 560                                 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 561
 562                         return
 563
 564                 if filename is None:
 565                         return
 566                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 567                         self.to_stderr(u'WARNING: file exists and will be skipped')
 568                         return
 569
 570                 try:
 571                         self.pmkdir(filename)
 572                 except (OSError, IOError), err:
 573                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
 574                         return
 575
 576                 try:
 577                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
 578                 except (OSError, IOError), err:
 579                         raise UnavailableVideoError
 580                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 581                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 582                         return
 583                 except (ContentTooShortError, ), err:
 584                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 585                         return
 586
 587                 if success:
 588                         try:
 589                                 self.post_process(filename, info_dict)
 590                         except (PostProcessingError), err:
 591                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
 592                                 return
 593
 594         def download(self, url_list):
 595                 """Download a given list of URLs."""
 596                 if len(url_list) > 1 and self.fixed_template():
 597                         raise SameFileError(self.params['outtmpl'])
 598
 599                 for url in url_list:
 600                         suitable_found = False
 601                         for ie in self._ies:
 602                                 # Go to next InfoExtractor if not suitable
 603                                 if not ie.suitable(url):
 604                                         continue
 605
 606                                 # Suitable InfoExtractor found
 607                                 suitable_found = True
 608
 609                                 # Extract information from URL and process it
 610                                 ie.extract(url)
 611
 612                                 # Suitable InfoExtractor had been found; go to next URL
 613                                 break
 614
 615                         if not suitable_found:
 616                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 617
 618                 return self._download_retcode
 619
 620         def post_process(self, filename, ie_info):
 621                 """Run the postprocessing chain on the given file."""
 622                 info = dict(ie_info)
 623                 info['filepath'] = filename
 624                 for pp in self._pps:
 625                         info = pp.run(info)
 626                         if info is None:
 627                                 break
 628
 629         def _download_with_rtmpdump(self, filename, url, player_url):
 630                 self.report_destination(filename)
 631                 tmpfilename = self.temp_name(filename)
 632
 633                 # Check for rtmpdump first
 634                 try:
 635                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 636                 except (OSError, IOError):
 637                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 638                         return False
 639
 640                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 641                 # the connection was interrumpted and resuming appears to be
 642                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 643                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 644                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 645                 while retval == 2 or retval == 1:
 646                         prevsize = os.path.getsize(tmpfilename)
 647                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 648                         time.sleep(5.0) # This seems to be needed
 649                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 650                         cursize = os.path.getsize(tmpfilename)
 651                         if prevsize == cursize and retval == 1:
 652                                 break
 653                 if retval == 0:
 654                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
 655                         self.try_rename(tmpfilename, filename)
 656                         return True
 657                 else:
 658                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 659                         return False
 660
 661         def _do_download(self, filename, url, player_url):
 662                 # Check file already present
 663                 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
 664                         self.report_file_already_downloaded(filename)
 665                         return True
 666
 667                 # Attempt to download using rtmpdump
 668                 if url.startswith('rtmp'):
 669                         return self._download_with_rtmpdump(filename, url, player_url)
 670
 671                 tmpfilename = self.temp_name(filename)
 672                 stream = None
 673                 open_mode = 'wb'
 674
 675                 # Do not include the Accept-Encoding header
 676                 headers = {'Youtubedl-no-compression': 'True'}
 677                 basic_request = urllib2.Request(url, None, headers)
 678                 request = urllib2.Request(url, None, headers)
 679
 680                 # Establish possible resume length
 681                 if os.path.isfile(tmpfilename):
 682                         resume_len = os.path.getsize(tmpfilename)
 683                 else:
 684                         resume_len = 0
 685
 686                 # Request parameters in case of being able to resume
 687                 if self.params.get('continuedl', False) and resume_len != 0:
 688                         self.report_resuming_byte(resume_len)
 689                         request.add_header('Range','bytes=%d-' % resume_len)
 690                         open_mode = 'ab'
 691
 692                 count = 0
 693                 retries = self.params.get('retries', 0)
 694                 while count <= retries:
 695                         # Establish connection
 696                         try:
 697                                 data = urllib2.urlopen(request)
 698                                 break
 699                         except (urllib2.HTTPError, ), err:
 700                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 701                                         # Unexpected HTTP error
 702                                         raise
 703                                 elif err.code == 416:
 704                                         # Unable to resume (requested range not satisfiable)
 705                                         try:
 706                                                 # Open the connection again without the range header
 707                                                 data = urllib2.urlopen(basic_request)
 708                                                 content_length = data.info()['Content-Length']
 709                                         except (urllib2.HTTPError, ), err:
 710                                                 if err.code < 500 or err.code >= 600:
 711                                                         raise
 712                                         else:
 713                                                 # Examine the reported length
 714                                                 if (content_length is not None and
 715                                                         (resume_len - 100 < long(content_length) < resume_len + 100)):
 716                                                         # The file had already been fully downloaded.
 717                                                         # Explanation to the above condition: in issue #175 it was revealed that
 718                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 719                                                         # changing the file size slightly and causing problems for some users. So
 720                                                         # I decided to implement a suggested change and consider the file
 721                                                         # completely downloaded if the file size differs less than 100 bytes from
 722                                                         # the one in the hard drive.
 723                                                         self.report_file_already_downloaded(filename)
 724                                                         self.try_rename(tmpfilename, filename)
 725                                                         return True
 726                                                 else:
 727                                                         # The length does not match, we start the download over
 728                                                         self.report_unable_to_resume()
 729                                                         open_mode = 'wb'
 730                                                         break
 731                         # Retry
 732                         count += 1
 733                         if count <= retries:
 734                                 self.report_retry(count, retries)
 735
 736                 if count > retries:
 737                         self.trouble(u'ERROR: giving up after %s retries' % retries)
 738                         return False
 739
 740                 data_len = data.info().get('Content-length', None)
 741                 if data_len is not None:
 742                         data_len = long(data_len) + resume_len
 743                 data_len_str = self.format_bytes(data_len)
 744                 byte_counter = 0 + resume_len
 745                 block_size = 1024
 746                 start = time.time()
 747                 while True:
 748                         # Download and write
 749                         before = time.time()
 750                         data_block = data.read(block_size)
 751                         after = time.time()
 752                         if len(data_block) == 0:
 753                                 break
 754                         byte_counter += len(data_block)
 755
 756                         # Open file just in time
 757                         if stream is None:
 758                                 try:
 759                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 760                                         filename = self.undo_temp_name(tmpfilename)
 761                                         self.report_destination(filename)
 762                                 except (OSError, IOError), err:
 763                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 764                                         return False
 765                         try:
 766                                 stream.write(data_block)
 767                         except (IOError, OSError), err:
 768                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 769                                 return False
 770                         block_size = self.best_block_size(after - before, len(data_block))
 771
 772                         # Progress message
 773                         percent_str = self.calc_percent(byte_counter, data_len)
 774                         eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 775                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 776                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 777
 778                         # Apply rate limit
 779                         self.slow_down(start, byte_counter - resume_len)
 780
 781                 stream.close()
 782                 self.report_finish()
 783                 if data_len is not None and byte_counter != data_len:
 784                         raise ContentTooShortError(byte_counter, long(data_len))
 785                 self.try_rename(tmpfilename, filename)
 786
 787                 # Update file modification time
 788                 if self.params.get('updatetime', True):
 789                         self.try_utime(filename, data.info().get('last-modified', None))
 790
 791                 return True
 792
 793 class InfoExtractor(object):
 794         """Information Extractor class.
 795
 796         Information extractors are the classes that, given a URL, extract
 797         information from the video (or videos) the URL refers to. This
 798         information includes the real video URL, the video title and simplified
 799         title, author and others. The information is stored in a dictionary
 800         which is then passed to the FileDownloader. The FileDownloader
 801         processes this information possibly downloading the video to the file
 802         system, among other possible outcomes. The dictionaries must include
 803         the following fields:
 804
 805         id:             Video identifier.
 806         url:            Final video URL.
 807         uploader:       Nickname of the video uploader.
 808         title:          Literal title.
 809         stitle:         Simplified title.
 810         ext:            Video filename extension.
 811         format:         Video format.
 812         player_url:     SWF Player URL (may be None).
 813
 814         The following fields are optional. Their primary purpose is to allow
 815         youtube-dl to serve as the backend for a video search function, such
 816         as the one in youtube2mp3.  They are only used when their respective
 817         forced printing functions are called:
 818
 819         thumbnail:      Full URL to a video thumbnail image.
 820         description:    One-line video description.
 821
 822         Subclasses of this one should re-define the _real_initialize() and
 823         _real_extract() methods, as well as the suitable() static method.
 824         Probably, they should also be instantiated and added to the main
 825         downloader.
 826         """
 827
 828         _ready = False
 829         _downloader = None
 830
 831         def __init__(self, downloader=None):
 832                 """Constructor. Receives an optional downloader."""
 833                 self._ready = False
 834                 self.set_downloader(downloader)
 835
 836         @staticmethod
 837         def suitable(url):
 838                 """Receives a URL and returns True if suitable for this IE."""
 839                 return False
 840
 841         def initialize(self):
 842                 """Initializes an instance (authentication, etc)."""
 843                 if not self._ready:
 844                         self._real_initialize()
 845                         self._ready = True
 846
 847         def extract(self, url):
 848                 """Extracts URL information and returns it in list of dicts."""
 849                 self.initialize()
 850                 return self._real_extract(url)
 851
 852         def set_downloader(self, downloader):
 853                 """Sets the downloader for this IE."""
 854                 self._downloader = downloader
 855
 856         def _real_initialize(self):
 857                 """Real initialization process. Redefine in subclasses."""
 858                 pass
 859
 860         def _real_extract(self, url):
 861                 """Real extraction process. Redefine in subclasses."""
 862                 pass
 863
 864 class YoutubeIE(InfoExtractor):
 865         """Information extractor for youtube.com."""
 866
 867         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
 868         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 869         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
 870         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 871         _NETRC_MACHINE = 'youtube'
 872         # Listed in order of quality
 873         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
 874         _video_extensions = {
 875                 '13': '3gp',
 876                 '17': 'mp4',
 877                 '18': 'mp4',
 878                 '22': 'mp4',
 879                 '37': 'mp4',
 880                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
 881                 '43': 'webm',
 882                 '45': 'webm',
 883         }
 884
 885         @staticmethod
 886         def suitable(url):
 887                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 888
 889         def report_lang(self):
 890                 """Report attempt to set language."""
 891                 self._downloader.to_screen(u'[youtube] Setting language')
 892
 893         def report_login(self):
 894                 """Report attempt to log in."""
 895                 self._downloader.to_screen(u'[youtube] Logging in')
 896
 897         def report_age_confirmation(self):
 898                 """Report attempt to confirm age."""
 899                 self._downloader.to_screen(u'[youtube] Confirming age')
 900
 901         def report_video_webpage_download(self, video_id):
 902                 """Report attempt to download video webpage."""
 903                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
 904
 905         def report_video_info_webpage_download(self, video_id):
 906                 """Report attempt to download video info webpage."""
 907                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
 908
 909         def report_information_extraction(self, video_id):
 910                 """Report attempt to extract video information."""
 911                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
 912
 913         def report_unavailable_format(self, video_id, format):
 914                 """Report extracted video URL."""
 915                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
 916
 917         def report_rtmp_download(self):
 918                 """Indicate the download will use the RTMP protocol."""
 919                 self._downloader.to_screen(u'[youtube] RTMP download detected')
 920
 921         def _real_initialize(self):
 922                 if self._downloader is None:
 923                         return
 924
 925                 username = None
 926                 password = None
 927                 downloader_params = self._downloader.params
 928
 929                 # Attempt to use provided username and password or .netrc data
 930                 if downloader_params.get('username', None) is not None:
 931                         username = downloader_params['username']
 932                         password = downloader_params['password']
 933                 elif downloader_params.get('usenetrc', False):
 934                         try:
 935                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 936                                 if info is not None:
 937                                         username = info[0]
 938                                         password = info[2]
 939                                 else:
 940                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 941                         except (IOError, netrc.NetrcParseError), err:
 942                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 943                                 return
 944
 945                 # Set language
 946                 request = urllib2.Request(self._LANG_URL)
 947                 try:
 948                         self.report_lang()
 949                         urllib2.urlopen(request).read()
 950                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 951                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 952                         return
 953
 954                 # No authentication to be performed
 955                 if username is None:
 956                         return
 957
 958                 # Log in
 959                 login_form = {
 960                                 'current_form': 'loginForm',
 961                                 'next':         '/',
 962                                 'action_login': 'Log In',
 963                                 'username':     username,
 964                                 'password':     password,
 965                                 }
 966                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
 967                 try:
 968                         self.report_login()
 969                         login_results = urllib2.urlopen(request).read()
 970                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 971                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 972                                 return
 973                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 974                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 975                         return
 976
 977                 # Confirm age
 978                 age_form = {
 979                                 'next_url':             '/',
 980                                 'action_confirm':       'Confirm',
 981                                 }
 982                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
 983                 try:
 984                         self.report_age_confirmation()
 985                         age_results = urllib2.urlopen(request).read()
 986                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 987                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 988                         return
 989
 990         def _real_extract(self, url):
 991                 # Extract video id from URL
 992                 mobj = re.match(self._VALID_URL, url)
 993                 if mobj is None:
 994                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 995                         return
 996                 video_id = mobj.group(2)
 997
 998                 # Get video webpage
 999                 self.report_video_webpage_download(video_id)
1000                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
1001                 try:
1002                         video_webpage = urllib2.urlopen(request).read()
1003                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1004                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1005                         return
1006
1007                 # Attempt to extract SWF player URL
1008                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1009                 if mobj is not None:
1010                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1011                 else:
1012                         player_url = None
1013
1014                 # Get video info
1015                 self.report_video_info_webpage_download(video_id)
1016                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1017                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1018                                            % (video_id, el_type))
1019                         request = urllib2.Request(video_info_url)
1020                         try:
1021                                 video_info_webpage = urllib2.urlopen(request).read()
1022                                 video_info = parse_qs(video_info_webpage)
1023                                 if 'token' in video_info:
1024                                         break
1025                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1026                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1027                                 return
1028                 if 'token' not in video_info:
1029                         if 'reason' in video_info:
1030                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1031                         else:
1032                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1033                         return
1034
1035                 # Start extracting information
1036                 self.report_information_extraction(video_id)
1037
1038                 # uploader
1039                 if 'author' not in video_info:
1040                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1041                         return
1042                 video_uploader = urllib.unquote_plus(video_info['author'][0])
1043
1044                 # title
1045                 if 'title' not in video_info:
1046                         self._downloader.trouble(u'ERROR: unable to extract video title')
1047                         return
1048                 video_title = urllib.unquote_plus(video_info['title'][0])
1049                 video_title = video_title.decode('utf-8')
1050                 video_title = sanitize_title(video_title)
1051
1052                 # simplified title
1053                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1054                 simple_title = simple_title.strip(ur'_')
1055
1056                 # thumbnail image
1057                 if 'thumbnail_url' not in video_info:
1058                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1059                         video_thumbnail = ''
1060                 else:   # don't panic if we can't find it
1061                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1062
1063                 # upload date
1064                 upload_date = u'NA'
1065                 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1066                 if mobj is not None:
1067                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1068                         format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1069                         for expression in format_expressions:
1070                                 try:
1071                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1072                                 except:
1073                                         pass
1074
1075                 # description
1076                 video_description = 'No description available.'
1077                 if self._downloader.params.get('forcedescription', False):
1078                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1079                         if mobj is not None:
1080                                 video_description = mobj.group(1)
1081
1082                 # token
1083                 video_token = urllib.unquote_plus(video_info['token'][0])
1084
1085                 # Decide which formats to download
1086                 req_format = self._downloader.params.get('format', None)
1087
1088                 if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1089                         url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
1090                         url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs]
1091                         url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data)
1092                         format_limit = self._downloader.params.get('format_limit', None)
1093                         if format_limit is not None and format_limit in self._available_formats:
1094                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
1095                         else:
1096                                 format_list = self._available_formats
1097                         existing_formats = [x for x in format_list if x in url_map]
1098                         if len(existing_formats) == 0:
1099                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1100                                 return
1101                         if req_format is None:
1102                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1103                         elif req_format == '-1':
1104                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1105                         else:
1106                                 # Specific format
1107                                 if req_format not in url_map:
1108                                         self._downloader.trouble(u'ERROR: requested format not available')
1109                                         return
1110                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
1111
1112                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1113                         self.report_rtmp_download()
1114                         video_url_list = [(None, video_info['conn'][0])]
1115
1116                 else:
1117                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
1118                         return
1119
1120                 for format_param, video_real_url in video_url_list:
1121                         # At this point we have a new video
1122                         self._downloader.increment_downloads()
1123
1124                         # Extension
1125                         video_extension = self._video_extensions.get(format_param, 'flv')
1126
1127                         # Find the video URL in fmt_url_map or conn paramters
1128                         try:
1129                                 # Process video information
1130                                 self._downloader.process_info({
1131                                         'id':           video_id.decode('utf-8'),
1132                                         'url':          video_real_url.decode('utf-8'),
1133                                         'uploader':     video_uploader.decode('utf-8'),
1134                                         'upload_date':  upload_date,
1135                                         'title':        video_title,
1136                                         'stitle':       simple_title,
1137                                         'ext':          video_extension.decode('utf-8'),
1138                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1139                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1140                                         'description':  video_description.decode('utf-8'),
1141                                         'player_url':   player_url,
1142                                 })
1143                         except UnavailableVideoError, err:
1144                                 self._downloader.trouble(u'\nERROR: unable to download video')
1145
1146
1147 class MetacafeIE(InfoExtractor):
1148         """Information Extractor for metacafe.com."""
1149
1150         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1151         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1152         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1153         _youtube_ie = None
1154
1155         def __init__(self, youtube_ie, downloader=None):
1156                 InfoExtractor.__init__(self, downloader)
1157                 self._youtube_ie = youtube_ie
1158
1159         @staticmethod
1160         def suitable(url):
1161                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1162
1163         def report_disclaimer(self):
1164                 """Report disclaimer retrieval."""
1165                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1166
1167         def report_age_confirmation(self):
1168                 """Report attempt to confirm age."""
1169                 self._downloader.to_screen(u'[metacafe] Confirming age')
1170
1171         def report_download_webpage(self, video_id):
1172                 """Report webpage download."""
1173                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1174
1175         def report_extraction(self, video_id):
1176                 """Report information extraction."""
1177                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1178
1179         def _real_initialize(self):
1180                 # Retrieve disclaimer
1181                 request = urllib2.Request(self._DISCLAIMER)
1182                 try:
1183                         self.report_disclaimer()
1184                         disclaimer = urllib2.urlopen(request).read()
1185                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1186                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1187                         return
1188
1189                 # Confirm age
1190                 disclaimer_form = {
1191                         'filters': '0',
1192                         'submit': "Continue - I'm over 18",
1193                         }
1194                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1195                 try:
1196                         self.report_age_confirmation()
1197                         disclaimer = urllib2.urlopen(request).read()
1198                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1199                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1200                         return
1201
1202         def _real_extract(self, url):
1203                 # Extract id and simplified title from URL
1204                 mobj = re.match(self._VALID_URL, url)
1205                 if mobj is None:
1206                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1207                         return
1208
1209                 video_id = mobj.group(1)
1210
1211                 # Check if video comes from YouTube
1212                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1213                 if mobj2 is not None:
1214                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1215                         return
1216
1217                 # At this point we have a new video
1218                 self._downloader.increment_downloads()
1219
1220                 simple_title = mobj.group(2).decode('utf-8')
1221
1222                 # Retrieve video webpage to extract further information
1223                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1224                 try:
1225                         self.report_download_webpage(video_id)
1226                         webpage = urllib2.urlopen(request).read()
1227                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1228                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1229                         return
1230
1231                 # Extract URL, uploader and title from webpage
1232                 self.report_extraction(video_id)
1233                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1234                 if mobj is not None:
1235                         mediaURL = urllib.unquote(mobj.group(1))
1236                         video_extension = mediaURL[-3:]
1237
1238                         # Extract gdaKey if available
1239                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1240                         if mobj is None:
1241                                 video_url = mediaURL
1242                         else:
1243                                 gdaKey = mobj.group(1)
1244                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1245                 else:
1246                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1247                         if mobj is None:
1248                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1249                                 return
1250                         vardict = parse_qs(mobj.group(1))
1251                         if 'mediaData' not in vardict:
1252                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1253                                 return
1254                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1255                         if mobj is None:
1256                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1257                                 return
1258                         mediaURL = mobj.group(1).replace('\\/', '/')
1259                         video_extension = mediaURL[-3:]
1260                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1261
1262                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1263                 if mobj is None:
1264                         self._downloader.trouble(u'ERROR: unable to extract title')
1265                         return
1266                 video_title = mobj.group(1).decode('utf-8')
1267                 video_title = sanitize_title(video_title)
1268
1269                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1270                 if mobj is None:
1271                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1272                         return
1273                 video_uploader = mobj.group(1)
1274
1275                 try:
1276                         # Process video information
1277                         self._downloader.process_info({
1278                                 'id':           video_id.decode('utf-8'),
1279                                 'url':          video_url.decode('utf-8'),
1280                                 'uploader':     video_uploader.decode('utf-8'),
1281                                 'upload_date':  u'NA',
1282                                 'title':        video_title,
1283                                 'stitle':       simple_title,
1284                                 'ext':          video_extension.decode('utf-8'),
1285                                 'format':       u'NA',
1286                                 'player_url':   None,
1287                         })
1288                 except UnavailableVideoError:
1289                         self._downloader.trouble(u'\nERROR: unable to download video')
1290
1291
1292 class DailymotionIE(InfoExtractor):
1293         """Information Extractor for Dailymotion"""
1294
1295         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1296
1297         def __init__(self, downloader=None):
1298                 InfoExtractor.__init__(self, downloader)
1299
1300         @staticmethod
1301         def suitable(url):
1302                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1303
1304         def report_download_webpage(self, video_id):
1305                 """Report webpage download."""
1306                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1307
1308         def report_extraction(self, video_id):
1309                 """Report information extraction."""
1310                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1311
1312         def _real_initialize(self):
1313                 return
1314
1315         def _real_extract(self, url):
1316                 # Extract id and simplified title from URL
1317                 mobj = re.match(self._VALID_URL, url)
1318                 if mobj is None:
1319                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1320                         return
1321
1322                 # At this point we have a new video
1323                 self._downloader.increment_downloads()
1324                 video_id = mobj.group(1)
1325
1326                 simple_title = mobj.group(2).decode('utf-8')
1327                 video_extension = 'flv'
1328
1329                 # Retrieve video webpage to extract further information
1330                 request = urllib2.Request(url)
1331                 try:
1332                         self.report_download_webpage(video_id)
1333                         webpage = urllib2.urlopen(request).read()
1334                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1335                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1336                         return
1337
1338                 # Extract URL, uploader and title from webpage
1339                 self.report_extraction(video_id)
1340                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1341                 if mobj is None:
1342                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1343                         return
1344                 mediaURL = urllib.unquote(mobj.group(1))
1345
1346                 # if needed add http://www.dailymotion.com/ if relative URL
1347
1348                 video_url = mediaURL
1349
1350                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1351                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1352                 if mobj is None:
1353                         self._downloader.trouble(u'ERROR: unable to extract title')
1354                         return
1355                 video_title = mobj.group(1).decode('utf-8')
1356                 video_title = sanitize_title(video_title)
1357
1358                 mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
1359                 if mobj is None:
1360                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1361                         return
1362                 video_uploader = mobj.group(1)
1363
1364                 try:
1365                         # Process video information
1366                         self._downloader.process_info({
1367                                 'id':           video_id.decode('utf-8'),
1368                                 'url':          video_url.decode('utf-8'),
1369                                 'uploader':     video_uploader.decode('utf-8'),
1370                                 'upload_date':  u'NA',
1371                                 'title':        video_title,
1372                                 'stitle':       simple_title,
1373                                 'ext':          video_extension.decode('utf-8'),
1374                                 'format':       u'NA',
1375                                 'player_url':   None,
1376                         })
1377                 except UnavailableVideoError:
1378                         self._downloader.trouble(u'\nERROR: unable to download video')
1379
1380 class GoogleIE(InfoExtractor):
1381         """Information extractor for video.google.com."""
1382
1383         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1384
1385         def __init__(self, downloader=None):
1386                 InfoExtractor.__init__(self, downloader)
1387
1388         @staticmethod
1389         def suitable(url):
1390                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1391
1392         def report_download_webpage(self, video_id):
1393                 """Report webpage download."""
1394                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1395
1396         def report_extraction(self, video_id):
1397                 """Report information extraction."""
1398                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1399
1400         def _real_initialize(self):
1401                 return
1402
1403         def _real_extract(self, url):
1404                 # Extract id from URL
1405                 mobj = re.match(self._VALID_URL, url)
1406                 if mobj is None:
1407                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1408                         return
1409
1410                 # At this point we have a new video
1411                 self._downloader.increment_downloads()
1412                 video_id = mobj.group(1)
1413
1414                 video_extension = 'mp4'
1415
1416                 # Retrieve video webpage to extract further information
1417                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1418                 try:
1419                         self.report_download_webpage(video_id)
1420                         webpage = urllib2.urlopen(request).read()
1421                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1422                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1423                         return
1424
1425                 # Extract URL, uploader, and title from webpage
1426                 self.report_extraction(video_id)
1427                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1428                 if mobj is None:
1429                         video_extension = 'flv'
1430                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1431                 if mobj is None:
1432                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1433                         return
1434                 mediaURL = urllib.unquote(mobj.group(1))
1435                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1436                 mediaURL = mediaURL.replace('\\x26', '\x26')
1437
1438                 video_url = mediaURL
1439
1440                 mobj = re.search(r'<title>(.*)</title>', webpage)
1441                 if mobj is None:
1442                         self._downloader.trouble(u'ERROR: unable to extract title')
1443                         return
1444                 video_title = mobj.group(1).decode('utf-8')
1445                 video_title = sanitize_title(video_title)
1446                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1447
1448                 # Extract video description
1449                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1450                 if mobj is None:
1451                         self._downloader.trouble(u'ERROR: unable to extract video description')
1452                         return
1453                 video_description = mobj.group(1).decode('utf-8')
1454                 if not video_description:
1455                         video_description = 'No description available.'
1456
1457                 # Extract video thumbnail
1458                 if self._downloader.params.get('forcethumbnail', False):
1459                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1460                         try:
1461                                 webpage = urllib2.urlopen(request).read()
1462                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1463                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1464                                 return
1465                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1466                         if mobj is None:
1467                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1468                                 return
1469                         video_thumbnail = mobj.group(1)
1470                 else:   # we need something to pass to process_info
1471                         video_thumbnail = ''
1472
1473
1474                 try:
1475                         # Process video information
1476                         self._downloader.process_info({
1477                                 'id':           video_id.decode('utf-8'),
1478                                 'url':          video_url.decode('utf-8'),
1479                                 'uploader':     u'NA',
1480                                 'upload_date':  u'NA',
1481                                 'title':        video_title,
1482                                 'stitle':       simple_title,
1483                                 'ext':          video_extension.decode('utf-8'),
1484                                 'format':       u'NA',
1485                                 'player_url':   None,
1486                         })
1487                 except UnavailableVideoError:
1488                         self._downloader.trouble(u'\nERROR: unable to download video')
1489
1490
1491 class PhotobucketIE(InfoExtractor):
1492         """Information extractor for photobucket.com."""
1493
1494         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1495
1496         def __init__(self, downloader=None):
1497                 InfoExtractor.__init__(self, downloader)
1498
1499         @staticmethod
1500         def suitable(url):
1501                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1502
1503         def report_download_webpage(self, video_id):
1504                 """Report webpage download."""
1505                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1506
1507         def report_extraction(self, video_id):
1508                 """Report information extraction."""
1509                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1510
1511         def _real_initialize(self):
1512                 return
1513
1514         def _real_extract(self, url):
1515                 # Extract id from URL
1516                 mobj = re.match(self._VALID_URL, url)
1517                 if mobj is None:
1518                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1519                         return
1520
1521                 # At this point we have a new video
1522                 self._downloader.increment_downloads()
1523                 video_id = mobj.group(1)
1524
1525                 video_extension = 'flv'
1526
1527                 # Retrieve video webpage to extract further information
1528                 request = urllib2.Request(url)
1529                 try:
1530                         self.report_download_webpage(video_id)
1531                         webpage = urllib2.urlopen(request).read()
1532                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1533                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1534                         return
1535
1536                 # Extract URL, uploader, and title from webpage
1537                 self.report_extraction(video_id)
1538                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1539                 if mobj is None:
1540                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1541                         return
1542                 mediaURL = urllib.unquote(mobj.group(1))
1543
1544                 video_url = mediaURL
1545
1546                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1547                 if mobj is None:
1548                         self._downloader.trouble(u'ERROR: unable to extract title')
1549                         return
1550                 video_title = mobj.group(1).decode('utf-8')
1551                 video_title = sanitize_title(video_title)
1552                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1553
1554                 video_uploader = mobj.group(2).decode('utf-8')
1555
1556                 try:
1557                         # Process video information
1558                         self._downloader.process_info({
1559                                 'id':           video_id.decode('utf-8'),
1560                                 'url':          video_url.decode('utf-8'),
1561                                 'uploader':     video_uploader,
1562                                 'upload_date':  u'NA',
1563                                 'title':        video_title,
1564                                 'stitle':       simple_title,
1565                                 'ext':          video_extension.decode('utf-8'),
1566                                 'format':       u'NA',
1567                                 'player_url':   None,
1568                         })
1569                 except UnavailableVideoError:
1570                         self._downloader.trouble(u'\nERROR: unable to download video')
1571
1572
1573 class YahooIE(InfoExtractor):
1574         """Information extractor for video.yahoo.com."""
1575
1576         # _VALID_URL matches all Yahoo! Video URLs
1577         # _VPAGE_URL matches only the extractable '/watch/' URLs
1578         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1579         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1580
1581         def __init__(self, downloader=None):
1582                 InfoExtractor.__init__(self, downloader)
1583
1584         @staticmethod
1585         def suitable(url):
1586                 return (re.match(YahooIE._VALID_URL, url) is not None)
1587
1588         def report_download_webpage(self, video_id):
1589                 """Report webpage download."""
1590                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1591
1592         def report_extraction(self, video_id):
1593                 """Report information extraction."""
1594                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1595
1596         def _real_initialize(self):
1597                 return
1598
1599         def _real_extract(self, url, new_video=True):
1600                 # Extract ID from URL
1601                 mobj = re.match(self._VALID_URL, url)
1602                 if mobj is None:
1603                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1604                         return
1605
1606                 # At this point we have a new video
1607                 self._downloader.increment_downloads()
1608                 video_id = mobj.group(2)
1609                 video_extension = 'flv'
1610
1611                 # Rewrite valid but non-extractable URLs as
1612                 # extractable English language /watch/ URLs
1613                 if re.match(self._VPAGE_URL, url) is None:
1614                         request = urllib2.Request(url)
1615                         try:
1616                                 webpage = urllib2.urlopen(request).read()
1617                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1618                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1619                                 return
1620
1621                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1622                         if mobj is None:
1623                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1624                                 return
1625                         yahoo_id = mobj.group(1)
1626
1627                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1628                         if mobj is None:
1629                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1630                                 return
1631                         yahoo_vid = mobj.group(1)
1632
1633                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1634                         return self._real_extract(url, new_video=False)
1635
1636                 # Retrieve video webpage to extract further information
1637                 request = urllib2.Request(url)
1638                 try:
1639                         self.report_download_webpage(video_id)
1640                         webpage = urllib2.urlopen(request).read()
1641                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1642                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1643                         return
1644
1645                 # Extract uploader and title from webpage
1646                 self.report_extraction(video_id)
1647                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1648                 if mobj is None:
1649                         self._downloader.trouble(u'ERROR: unable to extract video title')
1650                         return
1651                 video_title = mobj.group(1).decode('utf-8')
1652                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1653
1654                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1655                 if mobj is None:
1656                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1657                         return
1658                 video_uploader = mobj.group(1).decode('utf-8')
1659
1660                 # Extract video thumbnail
1661                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1662                 if mobj is None:
1663                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1664                         return
1665                 video_thumbnail = mobj.group(1).decode('utf-8')
1666
1667                 # Extract video description
1668                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1669                 if mobj is None:
1670                         self._downloader.trouble(u'ERROR: unable to extract video description')
1671                         return
1672                 video_description = mobj.group(1).decode('utf-8')
1673                 if not video_description: video_description = 'No description available.'
1674
1675                 # Extract video height and width
1676                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1677                 if mobj is None:
1678                         self._downloader.trouble(u'ERROR: unable to extract video height')
1679                         return
1680                 yv_video_height = mobj.group(1)
1681
1682                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1683                 if mobj is None:
1684                         self._downloader.trouble(u'ERROR: unable to extract video width')
1685                         return
1686                 yv_video_width = mobj.group(1)
1687
1688                 # Retrieve video playlist to extract media URL
1689                 # I'm not completely sure what all these options are, but we
1690                 # seem to need most of them, otherwise the server sends a 401.
1691                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1692                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1693                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1694                                                                   '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1695                                                                   '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1696                 try:
1697                         self.report_download_webpage(video_id)
1698                         webpage = urllib2.urlopen(request).read()
1699                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1700                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1701                         return
1702
1703                 # Extract media URL from playlist XML
1704                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1705                 if mobj is None:
1706                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1707                         return
1708                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1709                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1710
1711                 try:
1712                         # Process video information
1713                         self._downloader.process_info({
1714                                 'id':           video_id.decode('utf-8'),
1715                                 'url':          video_url,
1716                                 'uploader':     video_uploader,
1717                                 'upload_date':  u'NA',
1718                                 'title':        video_title,
1719                                 'stitle':       simple_title,
1720                                 'ext':          video_extension.decode('utf-8'),
1721                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1722                                 'description':  video_description,
1723                                 'thumbnail':    video_thumbnail,
1724                                 'description':  video_description,
1725                                 'player_url':   None,
1726                         })
1727                 except UnavailableVideoError:
1728                         self._downloader.trouble(u'\nERROR: unable to download video')
1729
1730
1731 class GenericIE(InfoExtractor):
1732         """Generic last-resort information extractor."""
1733
1734         def __init__(self, downloader=None):
1735                 InfoExtractor.__init__(self, downloader)
1736
1737         @staticmethod
1738         def suitable(url):
1739                 return True
1740
1741         def report_download_webpage(self, video_id):
1742                 """Report webpage download."""
1743                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1744                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1745
1746         def report_extraction(self, video_id):
1747                 """Report information extraction."""
1748                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1749
1750         def _real_initialize(self):
1751                 return
1752
1753         def _real_extract(self, url):
1754                 # At this point we have a new video
1755                 self._downloader.increment_downloads()
1756
1757                 video_id = url.split('/')[-1]
1758                 request = urllib2.Request(url)
1759                 try:
1760                         self.report_download_webpage(video_id)
1761                         webpage = urllib2.urlopen(request).read()
1762                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1763                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1764                         return
1765                 except ValueError, err:
1766                         # since this is the last-resort InfoExtractor, if
1767                         # this error is thrown, it'll be thrown here
1768                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1769                         return
1770
1771                 self.report_extraction(video_id)
1772                 # Start with something easy: JW Player in SWFObject
1773                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1774                 if mobj is None:
1775                         # Broaden the search a little bit
1776                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1777                 if mobj is None:
1778                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1779                         return
1780
1781                 # It's possible that one of the regexes
1782                 # matched, but returned an empty group:
1783                 if mobj.group(1) is None:
1784                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1785                         return
1786
1787                 video_url = urllib.unquote(mobj.group(1))
1788                 video_id  = os.path.basename(video_url)
1789
1790                 # here's a fun little line of code for you:
1791                 video_extension = os.path.splitext(video_id)[1][1:]
1792                 video_id        = os.path.splitext(video_id)[0]
1793
1794                 # it's tempting to parse this further, but you would
1795                 # have to take into account all the variations like
1796                 #   Video Title - Site Name
1797                 #   Site Name | Video Title
1798                 #   Video Title - Tagline | Site Name
1799                 # and so on and so forth; it's just not practical
1800                 mobj = re.search(r'<title>(.*)</title>', webpage)
1801                 if mobj is None:
1802                         self._downloader.trouble(u'ERROR: unable to extract title')
1803                         return
1804                 video_title = mobj.group(1).decode('utf-8')
1805                 video_title = sanitize_title(video_title)
1806                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1807
1808                 # video uploader is domain name
1809                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1810                 if mobj is None:
1811                         self._downloader.trouble(u'ERROR: unable to extract title')
1812                         return
1813                 video_uploader = mobj.group(1).decode('utf-8')
1814
1815                 try:
1816                         # Process video information
1817                         self._downloader.process_info({
1818                                 'id':           video_id.decode('utf-8'),
1819                                 'url':          video_url.decode('utf-8'),
1820                                 'uploader':     video_uploader,
1821                                 'upload_date':  u'NA',
1822                                 'title':        video_title,
1823                                 'stitle':       simple_title,
1824                                 'ext':          video_extension.decode('utf-8'),
1825                                 'format':       u'NA',
1826                                 'player_url':   None,
1827                         })
1828                 except UnavailableVideoError, err:
1829                         self._downloader.trouble(u'\nERROR: unable to download video')
1830
1831
1832 class YoutubeSearchIE(InfoExtractor):
1833         """Information Extractor for YouTube search queries."""
1834         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1835         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1836         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1837         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1838         _youtube_ie = None
1839         _max_youtube_results = 1000
1840
1841         def __init__(self, youtube_ie, downloader=None):
1842                 InfoExtractor.__init__(self, downloader)
1843                 self._youtube_ie = youtube_ie
1844
1845         @staticmethod
1846         def suitable(url):
1847                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1848
1849         def report_download_page(self, query, pagenum):
1850                 """Report attempt to download playlist page with given number."""
1851                 query = query.decode(preferredencoding())
1852                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1853
1854         def _real_initialize(self):
1855                 self._youtube_ie.initialize()
1856
1857         def _real_extract(self, query):
1858                 mobj = re.match(self._VALID_QUERY, query)
1859                 if mobj is None:
1860                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1861                         return
1862
1863                 prefix, query = query.split(':')
1864                 prefix = prefix[8:]
1865                 query  = query.encode('utf-8')
1866                 if prefix == '':
1867                         self._download_n_results(query, 1)
1868                         return
1869                 elif prefix == 'all':
1870                         self._download_n_results(query, self._max_youtube_results)
1871                         return
1872                 else:
1873                         try:
1874                                 n = long(prefix)
1875                                 if n <= 0:
1876                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1877                                         return
1878                                 elif n > self._max_youtube_results:
1879                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1880                                         n = self._max_youtube_results
1881                                 self._download_n_results(query, n)
1882                                 return
1883                         except ValueError: # parsing prefix as integer fails
1884                                 self._download_n_results(query, 1)
1885                                 return
1886
1887         def _download_n_results(self, query, n):
1888                 """Downloads a specified number of results for a query"""
1889
1890                 video_ids = []
1891                 already_seen = set()
1892                 pagenum = 1
1893
1894                 while True:
1895                         self.report_download_page(query, pagenum)
1896                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1897                         request = urllib2.Request(result_url)
1898                         try:
1899                                 page = urllib2.urlopen(request).read()
1900                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1901                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1902                                 return
1903
1904                         # Extract video identifiers
1905                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1906                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1907                                 if video_id not in already_seen:
1908                                         video_ids.append(video_id)
1909                                         already_seen.add(video_id)
1910                                         if len(video_ids) == n:
1911                                                 # Specified n videos reached
1912                                                 for id in video_ids:
1913                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1914                                                 return
1915
1916                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1917                                 for id in video_ids:
1918                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1919                                 return
1920
1921                         pagenum = pagenum + 1
1922
1923 class GoogleSearchIE(InfoExtractor):
1924         """Information Extractor for Google Video search queries."""
1925         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1926         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1927         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1928         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1929         _google_ie = None
1930         _max_google_results = 1000
1931
1932         def __init__(self, google_ie, downloader=None):
1933                 InfoExtractor.__init__(self, downloader)
1934                 self._google_ie = google_ie
1935
1936         @staticmethod
1937         def suitable(url):
1938                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1939
1940         def report_download_page(self, query, pagenum):
1941                 """Report attempt to download playlist page with given number."""
1942                 query = query.decode(preferredencoding())
1943                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1944
1945         def _real_initialize(self):
1946                 self._google_ie.initialize()
1947
1948         def _real_extract(self, query):
1949                 mobj = re.match(self._VALID_QUERY, query)
1950                 if mobj is None:
1951                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1952                         return
1953
1954                 prefix, query = query.split(':')
1955                 prefix = prefix[8:]
1956                 query  = query.encode('utf-8')
1957                 if prefix == '':
1958                         self._download_n_results(query, 1)
1959                         return
1960                 elif prefix == 'all':
1961                         self._download_n_results(query, self._max_google_results)
1962                         return
1963                 else:
1964                         try:
1965                                 n = long(prefix)
1966                                 if n <= 0:
1967                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1968                                         return
1969                                 elif n > self._max_google_results:
1970                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1971                                         n = self._max_google_results
1972                                 self._download_n_results(query, n)
1973                                 return
1974                         except ValueError: # parsing prefix as integer fails
1975                                 self._download_n_results(query, 1)
1976                                 return
1977
1978         def _download_n_results(self, query, n):
1979                 """Downloads a specified number of results for a query"""
1980
1981                 video_ids = []
1982                 already_seen = set()
1983                 pagenum = 1
1984
1985                 while True:
1986                         self.report_download_page(query, pagenum)
1987                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1988                         request = urllib2.Request(result_url)
1989                         try:
1990                                 page = urllib2.urlopen(request).read()
1991                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1992                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1993                                 return
1994
1995                         # Extract video identifiers
1996                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1997                                 video_id = mobj.group(1)
1998                                 if video_id not in already_seen:
1999                                         video_ids.append(video_id)
2000                                         already_seen.add(video_id)
2001                                         if len(video_ids) == n:
2002                                                 # Specified n videos reached
2003                                                 for id in video_ids:
2004                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2005                                                 return
2006
2007                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2008                                 for id in video_ids:
2009                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2010                                 return
2011
2012                         pagenum = pagenum + 1
2013
2014 class YahooSearchIE(InfoExtractor):
2015         """Information Extractor for Yahoo! Video search queries."""
2016         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
2017         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2018         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2019         _MORE_PAGES_INDICATOR = r'\s*Next'
2020         _yahoo_ie = None
2021         _max_yahoo_results = 1000
2022
2023         def __init__(self, yahoo_ie, downloader=None):
2024                 InfoExtractor.__init__(self, downloader)
2025                 self._yahoo_ie = yahoo_ie
2026
2027         @staticmethod
2028         def suitable(url):
2029                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
2030
2031         def report_download_page(self, query, pagenum):
2032                 """Report attempt to download playlist page with given number."""
2033                 query = query.decode(preferredencoding())
2034                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2035
2036         def _real_initialize(self):
2037                 self._yahoo_ie.initialize()
2038
2039         def _real_extract(self, query):
2040                 mobj = re.match(self._VALID_QUERY, query)
2041                 if mobj is None:
2042                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2043                         return
2044
2045                 prefix, query = query.split(':')
2046                 prefix = prefix[8:]
2047                 query  = query.encode('utf-8')
2048                 if prefix == '':
2049                         self._download_n_results(query, 1)
2050                         return
2051                 elif prefix == 'all':
2052                         self._download_n_results(query, self._max_yahoo_results)
2053                         return
2054                 else:
2055                         try:
2056                                 n = long(prefix)
2057                                 if n <= 0:
2058                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2059                                         return
2060                                 elif n > self._max_yahoo_results:
2061                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
2062                                         n = self._max_yahoo_results
2063                                 self._download_n_results(query, n)
2064                                 return
2065                         except ValueError: # parsing prefix as integer fails
2066                                 self._download_n_results(query, 1)
2067                                 return
2068
2069         def _download_n_results(self, query, n):
2070                 """Downloads a specified number of results for a query"""
2071
2072                 video_ids = []
2073                 already_seen = set()
2074                 pagenum = 1
2075
2076                 while True:
2077                         self.report_download_page(query, pagenum)
2078                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2079                         request = urllib2.Request(result_url)
2080                         try:
2081                                 page = urllib2.urlopen(request).read()
2082                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2083                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2084                                 return
2085
2086                         # Extract video identifiers
2087                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2088                                 video_id = mobj.group(1)
2089                                 if video_id not in already_seen:
2090                                         video_ids.append(video_id)
2091                                         already_seen.add(video_id)
2092                                         if len(video_ids) == n:
2093                                                 # Specified n videos reached
2094                                                 for id in video_ids:
2095                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2096                                                 return
2097
2098                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2099                                 for id in video_ids:
2100                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2101                                 return
2102
2103                         pagenum = pagenum + 1
2104
2105 class YoutubePlaylistIE(InfoExtractor):
2106         """Information Extractor for YouTube playlists."""
2107
2108         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2109         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2110         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2111         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2112         _youtube_ie = None
2113
2114         def __init__(self, youtube_ie, downloader=None):
2115                 InfoExtractor.__init__(self, downloader)
2116                 self._youtube_ie = youtube_ie
2117
2118         @staticmethod
2119         def suitable(url):
2120                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2121
2122         def report_download_page(self, playlist_id, pagenum):
2123                 """Report attempt to download playlist page with given number."""
2124                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2125
2126         def _real_initialize(self):
2127                 self._youtube_ie.initialize()
2128
2129         def _real_extract(self, url):
2130                 # Extract playlist id
2131                 mobj = re.match(self._VALID_URL, url)
2132                 if mobj is None:
2133                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2134                         return
2135
2136                 # Single video case
2137                 if mobj.group(3) is not None:
2138                         self._youtube_ie.extract(mobj.group(3))
2139                         return
2140
2141                 # Download playlist pages
2142                 # prefix is 'p' as default for playlists but there are other types that need extra care
2143                 playlist_prefix = mobj.group(1)
2144                 if playlist_prefix == 'a':
2145                         playlist_access = 'artist'
2146                 else:
2147                         playlist_prefix = 'p'
2148                         playlist_access = 'view_play_list'
2149                 playlist_id = mobj.group(2)
2150                 video_ids = []
2151                 pagenum = 1
2152
2153                 while True:
2154                         self.report_download_page(playlist_id, pagenum)
2155                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
2156                         try:
2157                                 page = urllib2.urlopen(request).read()
2158                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2159                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2160                                 return
2161
2162                         # Extract video identifiers
2163                         ids_in_page = []
2164                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2165                                 if mobj.group(1) not in ids_in_page:
2166                                         ids_in_page.append(mobj.group(1))
2167                         video_ids.extend(ids_in_page)
2168
2169                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2170                                 break
2171                         pagenum = pagenum + 1
2172
2173                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2174                 playlistend = self._downloader.params.get('playlistend', -1)
2175                 video_ids = video_ids[playliststart:playlistend]
2176
2177                 for id in video_ids:
2178                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2179                 return
2180
2181 class YoutubeUserIE(InfoExtractor):
2182         """Information Extractor for YouTube users."""
2183
2184         _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2185         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2186         _GDATA_PAGE_SIZE = 50
2187         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2188         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2189         _youtube_ie = None
2190
2191         def __init__(self, youtube_ie, downloader=None):
2192                 InfoExtractor.__init__(self, downloader)
2193                 self._youtube_ie = youtube_ie
2194
2195         @staticmethod
2196         def suitable(url):
2197                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2198
2199         def report_download_page(self, username, start_index):
2200                 """Report attempt to download user page."""
2201                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2202                                            (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2203
2204         def _real_initialize(self):
2205                 self._youtube_ie.initialize()
2206
2207         def _real_extract(self, url):
2208                 # Extract username
2209                 mobj = re.match(self._VALID_URL, url)
2210                 if mobj is None:
2211                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2212                         return
2213
2214                 username = mobj.group(1)
2215
2216                 # Download video ids using YouTube Data API. Result size per
2217                 # query is limited (currently to 50 videos) so we need to query
2218                 # page by page until there are no video ids - it means we got
2219                 # all of them.
2220
2221                 video_ids = []
2222                 pagenum = 0
2223
2224                 while True:
2225                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2226                         self.report_download_page(username, start_index)
2227
2228                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2229
2230                         try:
2231                                 page = urllib2.urlopen(request).read()
2232                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2233                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2234                                 return
2235
2236                         # Extract video identifiers
2237                         ids_in_page = []
2238
2239                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2240                                 if mobj.group(1) not in ids_in_page:
2241                                         ids_in_page.append(mobj.group(1))
2242
2243                         video_ids.extend(ids_in_page)
2244
2245                         # A little optimization - if current page is not
2246                         # "full", ie. does not contain PAGE_SIZE video ids then
2247                         # we can assume that this page is the last one - there
2248                         # are no more ids on further pages - no need to query
2249                         # again.
2250
2251                         if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2252                                 break
2253
2254                         pagenum += 1
2255
2256                 all_ids_count = len(video_ids)
2257                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2258                 playlistend = self._downloader.params.get('playlistend', -1)
2259
2260                 if playlistend == -1:
2261                         video_ids = video_ids[playliststart:]
2262                 else:
2263                         video_ids = video_ids[playliststart:playlistend]
2264
2265                 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2266                                                                   (username, all_ids_count, len(video_ids)))
2267
2268                 for video_id in video_ids:
2269                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2270
2271
2272 class DepositFilesIE(InfoExtractor):
2273         """Information extractor for depositfiles.com"""
2274
2275         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2276
2277         def __init__(self, downloader=None):
2278                 InfoExtractor.__init__(self, downloader)
2279
2280         @staticmethod
2281         def suitable(url):
2282                 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2283
2284         def report_download_webpage(self, file_id):
2285                 """Report webpage download."""
2286                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2287
2288         def report_extraction(self, file_id):
2289                 """Report information extraction."""
2290                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2291
2292         def _real_initialize(self):
2293                 return
2294
2295         def _real_extract(self, url):
2296                 # At this point we have a new file
2297                 self._downloader.increment_downloads()
2298
2299                 file_id = url.split('/')[-1]
2300                 # Rebuild url in english locale
2301                 url = 'http://depositfiles.com/en/files/' + file_id
2302
2303                 # Retrieve file webpage with 'Free download' button pressed
2304                 free_download_indication = { 'gateway_result' : '1' }
2305                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2306                 try:
2307                         self.report_download_webpage(file_id)
2308                         webpage = urllib2.urlopen(request).read()
2309                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2310                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2311                         return
2312
2313                 # Search for the real file URL
2314                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2315                 if (mobj is None) or (mobj.group(1) is None):
2316                         # Try to figure out reason of the error.
2317                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2318                         if (mobj is not None) and (mobj.group(1) is not None):
2319                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2320                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2321                         else:
2322                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2323                         return
2324
2325                 file_url = mobj.group(1)
2326                 file_extension = os.path.splitext(file_url)[1][1:]
2327
2328                 # Search for file title
2329                 mobj = re.search(r'<b title="(.*?)">', webpage)
2330                 if mobj is None:
2331                         self._downloader.trouble(u'ERROR: unable to extract title')
2332                         return
2333                 file_title = mobj.group(1).decode('utf-8')
2334
2335                 try:
2336                         # Process file information
2337                         self._downloader.process_info({
2338                                 'id':           file_id.decode('utf-8'),
2339                                 'url':          file_url.decode('utf-8'),
2340                                 'uploader':     u'NA',
2341                                 'upload_date':  u'NA',
2342                                 'title':        file_title,
2343                                 'stitle':       file_title,
2344                                 'ext':          file_extension.decode('utf-8'),
2345                                 'format':       u'NA',
2346                                 'player_url':   None,
2347                         })
2348                 except UnavailableVideoError, err:
2349                         self._downloader.trouble(u'ERROR: unable to download file')
2350
2351 class FacebookIE(InfoExtractor):
2352         """Information Extractor for Facebook"""
2353
2354         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2355         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2356         _NETRC_MACHINE = 'facebook'
2357         _available_formats = ['highqual', 'lowqual']
2358         _video_extensions = {
2359                 'highqual': 'mp4',
2360                 'lowqual': 'mp4',
2361         }
2362
2363         def __init__(self, downloader=None):
2364                 InfoExtractor.__init__(self, downloader)
2365
2366         @staticmethod
2367         def suitable(url):
2368                 return (re.match(FacebookIE._VALID_URL, url) is not None)
2369
2370         def _reporter(self, message):
2371                 """Add header and report message."""
2372                 self._downloader.to_screen(u'[facebook] %s' % message)
2373
2374         def report_login(self):
2375                 """Report attempt to log in."""
2376                 self._reporter(u'Logging in')
2377
2378         def report_video_webpage_download(self, video_id):
2379                 """Report attempt to download video webpage."""
2380                 self._reporter(u'%s: Downloading video webpage' % video_id)
2381
2382         def report_information_extraction(self, video_id):
2383                 """Report attempt to extract video information."""
2384                 self._reporter(u'%s: Extracting video information' % video_id)
2385
2386         def _parse_page(self, video_webpage):
2387                 """Extract video information from page"""
2388                 # General data
2389                 data = {'title': r'class="video_title datawrap">(.*?)</',
2390                         'description': r'<div class="datawrap">(.*?)</div>',
2391                         'owner': r'\("video_owner_name", "(.*?)"\)',
2392                         'upload_date': r'data-date="(.*?)"',
2393                         'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2394                         }
2395                 video_info = {}
2396                 for piece in data.keys():
2397                         mobj = re.search(data[piece], video_webpage)
2398                         if mobj is not None:
2399                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2400
2401                 # Video urls
2402                 video_urls = {}
2403                 for fmt in self._available_formats:
2404                         mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2405                         if mobj is not None:
2406                                 # URL is in a Javascript segment inside an escaped Unicode format within
2407                                 # the generally utf-8 page
2408                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2409                 video_info['video_urls'] = video_urls
2410
2411                 return video_info
2412
2413         def _real_initialize(self):
2414                 if self._downloader is None:
2415                         return
2416
2417                 useremail = None
2418                 password = None
2419                 downloader_params = self._downloader.params
2420
2421                 # Attempt to use provided username and password or .netrc data
2422                 if downloader_params.get('username', None) is not None:
2423                         useremail = downloader_params['username']
2424                         password = downloader_params['password']
2425                 elif downloader_params.get('usenetrc', False):
2426                         try:
2427                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2428                                 if info is not None:
2429                                         useremail = info[0]
2430                                         password = info[2]
2431                                 else:
2432                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2433                         except (IOError, netrc.NetrcParseError), err:
2434                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2435                                 return
2436
2437                 if useremail is None:
2438                         return
2439
2440                 # Log in
2441                 login_form = {
2442                         'email': useremail,
2443                         'pass': password,
2444                         'login': 'Log+In'
2445                         }
2446                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2447                 try:
2448                         self.report_login()
2449                         login_results = urllib2.urlopen(request).read()
2450                         if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2451                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2452                                 return
2453                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2454                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2455                         return
2456
2457         def _real_extract(self, url):
2458                 mobj = re.match(self._VALID_URL, url)
2459                 if mobj is None:
2460                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2461                         return
2462                 video_id = mobj.group('ID')
2463
2464                 # Get video webpage
2465                 self.report_video_webpage_download(video_id)
2466                 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2467                 try:
2468                         page = urllib2.urlopen(request)
2469                         video_webpage = page.read()
2470                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2471                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2472                         return
2473
2474                 # Start extracting information
2475                 self.report_information_extraction(video_id)
2476
2477                 # Extract information
2478                 video_info = self._parse_page(video_webpage)
2479
2480                 # uploader
2481                 if 'owner' not in video_info:
2482                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2483                         return
2484                 video_uploader = video_info['owner']
2485
2486                 # title
2487                 if 'title' not in video_info:
2488                         self._downloader.trouble(u'ERROR: unable to extract video title')
2489                         return
2490                 video_title = video_info['title']
2491                 video_title = video_title.decode('utf-8')
2492                 video_title = sanitize_title(video_title)
2493
2494                 # simplified title
2495                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2496                 simple_title = simple_title.strip(ur'_')
2497
2498                 # thumbnail image
2499                 if 'thumbnail' not in video_info:
2500                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2501                         video_thumbnail = ''
2502                 else:
2503                         video_thumbnail = video_info['thumbnail']
2504
2505                 # upload date
2506                 upload_date = u'NA'
2507                 if 'upload_date' in video_info:
2508                         upload_time = video_info['upload_date']
2509                         timetuple = email.utils.parsedate_tz(upload_time)
2510                         if timetuple is not None:
2511                                 try:
2512                                         upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2513                                 except:
2514                                         pass
2515
2516                 # description
2517                 video_description = 'No description available.'
2518                 if (self._downloader.params.get('forcedescription', False) and
2519                         'description' in video_info):
2520                         video_description = video_info['description']
2521
2522                 url_map = video_info['video_urls']
2523                 if len(url_map.keys()) > 0:
2524                         # Decide which formats to download
2525                         req_format = self._downloader.params.get('format', None)
2526                         format_limit = self._downloader.params.get('format_limit', None)
2527
2528                         if format_limit is not None and format_limit in self._available_formats:
2529                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
2530                         else:
2531                                 format_list = self._available_formats
2532                         existing_formats = [x for x in format_list if x in url_map]
2533                         if len(existing_formats) == 0:
2534                                 self._downloader.trouble(u'ERROR: no known formats available for video')
2535                                 return
2536                         if req_format is None:
2537                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2538                         elif req_format == '-1':
2539                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2540                         else:
2541                                 # Specific format
2542                                 if req_format not in url_map:
2543                                         self._downloader.trouble(u'ERROR: requested format not available')
2544                                         return
2545                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
2546
2547                 for format_param, video_real_url in video_url_list:
2548
2549                         # At this point we have a new video
2550                         self._downloader.increment_downloads()
2551
2552                         # Extension
2553                         video_extension = self._video_extensions.get(format_param, 'mp4')
2554
2555                         # Find the video URL in fmt_url_map or conn paramters
2556                         try:
2557                                 # Process video information
2558                                 self._downloader.process_info({
2559                                         'id':           video_id.decode('utf-8'),
2560                                         'url':          video_real_url.decode('utf-8'),
2561                                         'uploader':     video_uploader.decode('utf-8'),
2562                                         'upload_date':  upload_date,
2563                                         'title':        video_title,
2564                                         'stitle':       simple_title,
2565                                         'ext':          video_extension.decode('utf-8'),
2566                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
2567                                         'thumbnail':    video_thumbnail.decode('utf-8'),
2568                                         'description':  video_description.decode('utf-8'),
2569                                         'player_url':   None,
2570                                 })
2571                         except UnavailableVideoError, err:
2572                                 self._downloader.trouble(u'\nERROR: unable to download video')
2573
2574 class PostProcessor(object):
2575         """Post Processor class.
2576
2577         PostProcessor objects can be added to downloaders with their
2578         add_post_processor() method. When the downloader has finished a
2579         successful download, it will take its internal chain of PostProcessors
2580         and start calling the run() method on each one of them, first with
2581         an initial argument and then with the returned value of the previous
2582         PostProcessor.
2583
2584         The chain will be stopped if one of them ever returns None or the end
2585         of the chain is reached.
2586
2587         PostProcessor objects follow a "mutual registration" process similar
2588         to InfoExtractor objects.
2589         """
2590
2591         _downloader = None
2592
2593         def __init__(self, downloader=None):
2594                 self._downloader = downloader
2595
2596         def set_downloader(self, downloader):
2597                 """Sets the downloader for this PP."""
2598                 self._downloader = downloader
2599
2600         def run(self, information):
2601                 """Run the PostProcessor.
2602
2603                 The "information" argument is a dictionary like the ones
2604                 composed by InfoExtractors. The only difference is that this
2605                 one has an extra field called "filepath" that points to the
2606                 downloaded file.
2607
2608                 When this method returns None, the postprocessing chain is
2609                 stopped. However, this method may return an information
2610                 dictionary that will be passed to the next postprocessing
2611                 object in the chain. It can be the one it received after
2612                 changing some fields.
2613
2614                 In addition, this method may raise a PostProcessingError
2615                 exception that will be taken into account by the downloader
2616                 it was called from.
2617                 """
2618                 return information # by default, do nothing
2619
2620 class FFmpegExtractAudioPP(PostProcessor):
2621
2622         def __init__(self, downloader=None, preferredcodec=None):
2623                 PostProcessor.__init__(self, downloader)
2624                 if preferredcodec is None:
2625                         preferredcodec = 'best'
2626                 self._preferredcodec = preferredcodec
2627
2628         @staticmethod
2629         def get_audio_codec(path):
2630                 try:
2631                         cmd = ['ffprobe', '-show_streams', '--', path]
2632                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
2633                         output = handle.communicate()[0]
2634                         if handle.wait() != 0:
2635                                 return None
2636                 except (IOError, OSError):
2637                         return None
2638                 audio_codec = None
2639                 for line in output.split('\n'):
2640                         if line.startswith('codec_name='):
2641                                 audio_codec = line.split('=')[1].strip()
2642                         elif line.strip() == 'codec_type=audio' and audio_codec is not None:
2643                                 return audio_codec
2644                 return None
2645
2646         @staticmethod
2647         def run_ffmpeg(path, out_path, codec, more_opts):
2648                 try:
2649                         cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
2650                         ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
2651                         return (ret == 0)
2652                 except (IOError, OSError):
2653                         return False
2654
2655         def run(self, information):
2656                 path = information['filepath']
2657
2658                 filecodec = self.get_audio_codec(path)
2659                 if filecodec is None:
2660                         self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
2661                         return None
2662
2663                 more_opts = []
2664                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
2665                         if filecodec == 'aac' or filecodec == 'mp3':
2666                                 # Lossless if possible
2667                                 acodec = 'copy'
2668                                 extension = filecodec
2669                                 if filecodec == 'aac':
2670                                         more_opts = ['-f', 'adts']
2671                         else:
2672                                 # MP3 otherwise.
2673                                 acodec = 'libmp3lame'
2674                                 extension = 'mp3'
2675                                 more_opts = ['-ab', '128k']
2676                 else:
2677                         # We convert the audio (lossy)
2678                         acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
2679                         extension = self._preferredcodec
2680                         more_opts = ['-ab', '128k']
2681                         if self._preferredcodec == 'aac':
2682                                 more_opts += ['-f', 'adts']
2683
2684                 (prefix, ext) = os.path.splitext(path)
2685                 new_path = prefix + '.' + extension
2686                 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
2687                 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
2688
2689                 if not status:
2690                         self._downloader.to_stderr(u'WARNING: error running ffmpeg')
2691                         return None
2692
2693                 try:
2694                         os.remove(path)
2695                 except (IOError, OSError):
2696                         self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
2697                         return None
2698
2699                 information['filepath'] = new_path
2700                 return information
2701
2702
2703 def updateSelf(downloader, filename):
2704         ''' Update the program file with the latest version from the repository '''
2705         # Note: downloader only used for options
2706         if not os.access(filename, os.W_OK):
2707                 sys.exit('ERROR: no write permissions on %s' % filename)
2708
2709         downloader.to_screen('Updating to latest stable version...')
2710
2711         try:
2712                 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2713                 latest_version = urllib.urlopen(latest_url).read().strip()
2714                 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2715                 newcontent = urllib.urlopen(prog_url).read()
2716         except (IOError, OSError), err:
2717                 sys.exit('ERROR: unable to download latest version')
2718
2719         try:
2720                 stream = open(filename, 'w')
2721                 stream.write(newcontent)
2722                 stream.close()
2723         except (IOError, OSError), err:
2724                 sys.exit('ERROR: unable to overwrite current version')
2725
2726         downloader.to_screen('Updated to version %s' % latest_version)
2727
2728
2729 def parseOpts():
2730         # Deferred imports
2731         import getpass
2732         import optparse
2733
2734         def _format_option_string(option):
2735                 ''' ('-o', '--option') -> -o, --format METAVAR'''
2736
2737                 opts = []
2738
2739                 if option._short_opts: opts.append(option._short_opts[0])
2740                 if option._long_opts: opts.append(option._long_opts[0])
2741                 if len(opts) > 1: opts.insert(1, ', ')
2742
2743                 if option.takes_value(): opts.append(' %s' % option.metavar)
2744
2745                 return "".join(opts)
2746
2747         def _find_term_columns():
2748                 columns = os.environ.get('COLUMNS', None)
2749                 if columns: return int(columns)
2750
2751                 if sys.platform.startswith('linux'):
2752                         try: columns = os.popen('stty size', 'r').read().split()[1]
2753                         except: pass
2754
2755                 if columns: return int(columns)
2756
2757         max_width = 80
2758         max_help_position = 80
2759
2760         # No need to wrap help messages if we're on a wide console
2761         columns = _find_term_columns()
2762         if columns: max_width = columns
2763
2764         fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
2765         fmt.format_option_strings = _format_option_string
2766
2767         kw = {
2768                 'version'   : __version__,
2769                 'formatter' : fmt,
2770                 'usage' : 'Usage : %prog [options] url...',
2771                 'conflict_handler' : 'resolve',
2772         }
2773
2774         parser = optparse.OptionParser(**kw)
2775
2776         # option groups
2777         general        = optparse.OptionGroup(parser, 'General Options')
2778         authentication = optparse.OptionGroup(parser, 'Authentication Options')
2779         video_format   = optparse.OptionGroup(parser, 'Video Format Options')
2780         postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
2781         filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
2782         verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2783
2784         general.add_option('-h', '--help',
2785                         action='help', help='print this help text and exit')
2786         general.add_option('-v', '--version',
2787                         action='version', help='print program version and exit')
2788         general.add_option('-U', '--update',
2789                         action='store_true', dest='update_self', help='update this program to latest stable version')
2790         general.add_option('-i', '--ignore-errors',
2791                         action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2792         general.add_option('-r', '--rate-limit',
2793                         dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2794         general.add_option('-R', '--retries',
2795                         dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2796         general.add_option('--playlist-start',
2797                         dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2798         general.add_option('--playlist-end',
2799                         dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2800         general.add_option('--dump-user-agent',
2801                         action='store_true', dest='dump_user_agent',
2802                         help='display the current browser identification', default=False)
2803
2804         authentication.add_option('-u', '--username',
2805                         dest='username', metavar='USERNAME', help='account username')
2806         authentication.add_option('-p', '--password',
2807                         dest='password', metavar='PASSWORD', help='account password')
2808         authentication.add_option('-n', '--netrc',
2809                         action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2810
2811
2812         video_format.add_option('-f', '--format',
2813                         action='store', dest='format', metavar='FORMAT', help='video format code')
2814         video_format.add_option('--all-formats',
2815                         action='store_const', dest='format', help='download all available video formats', const='-1')
2816         video_format.add_option('--max-quality',
2817                         action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2818
2819
2820         verbosity.add_option('-q', '--quiet',
2821                         action='store_true', dest='quiet', help='activates quiet mode', default=False)
2822         verbosity.add_option('-s', '--simulate',
2823                         action='store_true', dest='simulate', help='do not download video', default=False)
2824         verbosity.add_option('-g', '--get-url',
2825                         action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2826         verbosity.add_option('-e', '--get-title',
2827                         action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2828         verbosity.add_option('--get-thumbnail',
2829                         action='store_true', dest='getthumbnail',
2830                         help='simulate, quiet but print thumbnail URL', default=False)
2831         verbosity.add_option('--get-description',
2832                         action='store_true', dest='getdescription',
2833                         help='simulate, quiet but print video description', default=False)
2834         verbosity.add_option('--get-filename',
2835                         action='store_true', dest='getfilename',
2836                         help='simulate, quiet but print output filename', default=False)
2837         verbosity.add_option('--no-progress',
2838                         action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2839         verbosity.add_option('--console-title',
2840                         action='store_true', dest='consoletitle',
2841                         help='display progress in console titlebar', default=False)
2842
2843
2844         filesystem.add_option('-t', '--title',
2845                         action='store_true', dest='usetitle', help='use title in file name', default=False)
2846         filesystem.add_option('-l', '--literal',
2847                         action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2848         filesystem.add_option('-A', '--auto-number',
2849                         action='store_true', dest='autonumber',
2850                         help='number downloaded files starting from 00000', default=False)
2851         filesystem.add_option('-o', '--output',
2852                         dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2853         filesystem.add_option('-a', '--batch-file',
2854                         dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2855         filesystem.add_option('-w', '--no-overwrites',
2856                         action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2857         filesystem.add_option('-c', '--continue',
2858                         action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2859         filesystem.add_option('--cookies',
2860                         dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2861         filesystem.add_option('--no-part',
2862                         action='store_true', dest='nopart', help='do not use .part files', default=False)
2863         filesystem.add_option('--no-mtime',
2864                         action='store_false', dest='updatetime',
2865                         help='do not use the Last-modified header to set the file modification time', default=True)
2866
2867
2868         postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
2869                         help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
2870         postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
2871                         help='"best", "aac" or "mp3"; best by default')
2872
2873
2874         parser.add_option_group(general)
2875         parser.add_option_group(filesystem)
2876         parser.add_option_group(verbosity)
2877         parser.add_option_group(video_format)
2878         parser.add_option_group(authentication)
2879         parser.add_option_group(postproc)
2880
2881         opts, args = parser.parse_args()
2882
2883         return parser, opts, args
2884
2885 def main():
2886         parser, opts, args = parseOpts()
2887
2888         # Open appropriate CookieJar
2889         if opts.cookiefile is None:
2890                 jar = cookielib.CookieJar()
2891         else:
2892                 try:
2893                         jar = cookielib.MozillaCookieJar(opts.cookiefile)
2894                         if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2895                                 jar.load()
2896                 except (IOError, OSError), err:
2897                         sys.exit(u'ERROR: unable to open cookie file')
2898
2899         # Dump user agent
2900         if opts.dump_user_agent:
2901                 print std_headers['User-Agent']
2902                 sys.exit(0)
2903
2904         # General configuration
2905         cookie_processor = urllib2.HTTPCookieProcessor(jar)
2906         urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
2907         socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2908
2909         # Batch file verification
2910         batchurls = []
2911         if opts.batchfile is not None:
2912                 try:
2913                         if opts.batchfile == '-':
2914                                 batchfd = sys.stdin
2915                         else:
2916                                 batchfd = open(opts.batchfile, 'r')
2917                         batchurls = batchfd.readlines()
2918                         batchurls = [x.strip() for x in batchurls]
2919                         batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2920                 except IOError:
2921                         sys.exit(u'ERROR: batch file could not be read')
2922         all_urls = batchurls + args
2923
2924         # Conflicting, missing and erroneous options
2925         if opts.usenetrc and (opts.username is not None or opts.password is not None):
2926                 parser.error(u'using .netrc conflicts with giving username/password')
2927         if opts.password is not None and opts.username is None:
2928                 parser.error(u'account username missing')
2929         if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2930                 parser.error(u'using output template conflicts with using title, literal title or auto number')
2931         if opts.usetitle and opts.useliteral:
2932                 parser.error(u'using title conflicts with using literal title')
2933         if opts.username is not None and opts.password is None:
2934                 opts.password = getpass.getpass(u'Type account password and press return:')
2935         if opts.ratelimit is not None:
2936                 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2937                 if numeric_limit is None:
2938                         parser.error(u'invalid rate limit specified')
2939                 opts.ratelimit = numeric_limit
2940         if opts.retries is not None:
2941                 try:
2942                         opts.retries = long(opts.retries)
2943                 except (TypeError, ValueError), err:
2944                         parser.error(u'invalid retry count specified')
2945         try:
2946                 opts.playliststart = long(opts.playliststart)
2947                 if opts.playliststart <= 0:
2948                         raise ValueError
2949         except (TypeError, ValueError), err:
2950                 parser.error(u'invalid playlist start number specified')
2951         try:
2952                 opts.playlistend = long(opts.playlistend)
2953                 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2954                         raise ValueError
2955         except (TypeError, ValueError), err:
2956                 parser.error(u'invalid playlist end number specified')
2957         if opts.extractaudio:
2958                 if opts.audioformat not in ['best', 'aac', 'mp3']:
2959                         parser.error(u'invalid audio format specified')
2960
2961         # Information extractors
2962         youtube_ie = YoutubeIE()
2963         metacafe_ie = MetacafeIE(youtube_ie)
2964         dailymotion_ie = DailymotionIE()
2965         youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2966         youtube_user_ie = YoutubeUserIE(youtube_ie)
2967         youtube_search_ie = YoutubeSearchIE(youtube_ie)
2968         google_ie = GoogleIE()
2969         google_search_ie = GoogleSearchIE(google_ie)
2970         photobucket_ie = PhotobucketIE()
2971         yahoo_ie = YahooIE()
2972         yahoo_search_ie = YahooSearchIE(yahoo_ie)
2973         deposit_files_ie = DepositFilesIE()
2974         facebook_ie = FacebookIE()
2975         generic_ie = GenericIE()
2976
2977         # File downloader
2978         fd = FileDownloader({
2979                 'usenetrc': opts.usenetrc,
2980                 'username': opts.username,
2981                 'password': opts.password,
2982                 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2983                 'forceurl': opts.geturl,
2984                 'forcetitle': opts.gettitle,
2985                 'forcethumbnail': opts.getthumbnail,
2986                 'forcedescription': opts.getdescription,
2987                 'forcefilename': opts.getfilename,
2988                 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2989                 'format': opts.format,
2990                 'format_limit': opts.format_limit,
2991                 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2992                         or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2993                         or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2994                         or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2995                         or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2996                         or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2997                         or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2998                         or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2999                         or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
3000                         or u'%(id)s.%(ext)s'),
3001                 'ignoreerrors': opts.ignoreerrors,
3002                 'ratelimit': opts.ratelimit,
3003                 'nooverwrites': opts.nooverwrites,
3004                 'retries': opts.retries,
3005                 'continuedl': opts.continue_dl,
3006                 'noprogress': opts.noprogress,
3007                 'playliststart': opts.playliststart,
3008                 'playlistend': opts.playlistend,
3009                 'logtostderr': opts.outtmpl == '-',
3010                 'consoletitle': opts.consoletitle,
3011                 'nopart': opts.nopart,
3012                 'updatetime': opts.updatetime,
3013                 })
3014         fd.add_info_extractor(youtube_search_ie)
3015         fd.add_info_extractor(youtube_pl_ie)
3016         fd.add_info_extractor(youtube_user_ie)
3017         fd.add_info_extractor(metacafe_ie)
3018         fd.add_info_extractor(dailymotion_ie)
3019         fd.add_info_extractor(youtube_ie)
3020         fd.add_info_extractor(google_ie)
3021         fd.add_info_extractor(google_search_ie)
3022         fd.add_info_extractor(photobucket_ie)
3023         fd.add_info_extractor(yahoo_ie)
3024         fd.add_info_extractor(yahoo_search_ie)
3025         fd.add_info_extractor(deposit_files_ie)
3026         fd.add_info_extractor(facebook_ie)
3027
3028         # This must come last since it's the
3029         # fallback if none of the others work
3030         fd.add_info_extractor(generic_ie)
3031
3032         # PostProcessors
3033         if opts.extractaudio:
3034                 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
3035
3036         # Update version
3037         if opts.update_self:
3038                 updateSelf(fd, sys.argv[0])
3039
3040         # Maybe do nothing
3041         if len(all_urls) < 1:
3042                 if not opts.update_self:
3043                         parser.error(u'you must provide at least one URL')
3044                 else:
3045                         sys.exit()
3046         retcode = fd.download(all_urls)
3047
3048         # Dump cookie jar if requested
3049         if opts.cookiefile is not None:
3050                 try:
3051                         jar.save()
3052                 except (IOError, OSError), err:
3053                         sys.exit(u'ERROR: unable to save cookie jar')
3054
3055         sys.exit(retcode)
3056
3057
3058 if __name__ == '__main__':
3059         try:
3060                 main()
3061         except DownloadError:
3062                 sys.exit(1)
3063         except SameFileError:
3064                 sys.exit(u'ERROR: fixed output name but more than one file to download')
3065         except KeyboardInterrupt:
3066                 sys.exit(u'\nERROR: Interrupted by user')
3067
3068 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: