Use a temporary filename to download files
[youtube-dl.git] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
7 import cookielib
8 import datetime
9 import htmlentitydefs
10 import httplib
11 import locale
12 import math
13 import netrc
14 import os
15 import os.path
16 import re
17 import socket
18 import string
19 import subprocess
20 import sys
21 import time
22 import urllib
23 import urllib2
24
25 # parse_qs was moved from the cgi module to the urlparse module recently.
26 try:
27         from urlparse import parse_qs
28 except ImportError:
29         from cgi import parse_qs
30
31 std_headers = {
32         'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12',
33         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
34         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
35         'Accept-Language': 'en-us,en;q=0.5',
36 }
37
38 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
39
40 def preferredencoding():
41         """Get preferred encoding.
42
43         Returns the best encoding scheme for the system, based on
44         locale.getpreferredencoding() and some further tweaks.
45         """
46         def yield_preferredencoding():
47                 try:
48                         pref = locale.getpreferredencoding()
49                         u'TEST'.encode(pref)
50                 except:
51                         pref = 'UTF-8'
52                 while True:
53                         yield pref
54         return yield_preferredencoding().next()
55
56 def htmlentity_transform(matchobj):
57         """Transforms an HTML entity to a Unicode character.
58         
59         This function receives a match object and is intended to be used with
60         the re.sub() function.
61         """
62         entity = matchobj.group(1)
63
64         # Known non-numeric HTML entity
65         if entity in htmlentitydefs.name2codepoint:
66                 return unichr(htmlentitydefs.name2codepoint[entity])
67
68         # Unicode character
69         mobj = re.match(ur'(?u)#(x?\d+)', entity)
70         if mobj is not None:
71                 numstr = mobj.group(1)
72                 if numstr.startswith(u'x'):
73                         base = 16
74                         numstr = u'0%s' % numstr
75                 else:
76                         base = 10
77                 return unichr(long(numstr, base))
78
79         # Unknown entity in name, return its literal representation
80         return (u'&%s;' % entity)
81
82 def sanitize_title(utitle):
83         """Sanitizes a video title so it could be used as part of a filename."""
84         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
85         return utitle.replace(unicode(os.sep), u'%')
86
87 def sanitize_open(filename, open_mode):
88         """Try to open the given filename, and slightly tweak it if this fails.
89
90         Attempts to open the given filename. If this fails, it tries to change
91         the filename slightly, step by step, until it's either able to open it
92         or it fails and raises a final exception, like the standard open()
93         function.
94
95         It returns the tuple (stream, definitive_file_name).
96         """
97         try:
98                 if filename == u'-':
99                         if sys.platform == 'win32':
100                                 import msvcrt
101                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
102                         return (sys.stdout, filename)
103                 stream = open(filename, open_mode)
104                 return (stream, filename)
105         except (IOError, OSError), err:
106                 # In case of error, try to remove win32 forbidden chars
107                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
108
109                 # An exception here should be caught in the caller
110                 stream = open(filename, open_mode)
111                 return (stream, filename)
112
113
114 class DownloadError(Exception):
115         """Download Error exception.
116         
117         This exception may be thrown by FileDownloader objects if they are not
118         configured to continue on errors. They will contain the appropriate
119         error message.
120         """
121         pass
122
123 class SameFileError(Exception):
124         """Same File exception.
125
126         This exception will be thrown by FileDownloader objects if they detect
127         multiple files would have to be downloaded to the same file on disk.
128         """
129         pass
130
131 class PostProcessingError(Exception):
132         """Post Processing exception.
133
134         This exception may be raised by PostProcessor's .run() method to
135         indicate an error in the postprocessing task.
136         """
137         pass
138
139 class UnavailableVideoError(Exception):
140         """Unavailable Format exception.
141
142         This exception will be thrown when a video is requested
143         in a format that is not available for that video.
144         """
145         pass
146
147 class ContentTooShortError(Exception):
148         """Content Too Short exception.
149
150         This exception may be raised by FileDownloader objects when a file they
151         download is too small for what the server announced first, indicating
152         the connection was probably interrupted.
153         """
154         # Both in bytes
155         downloaded = None
156         expected = None
157
158         def __init__(self, downloaded, expected):
159                 self.downloaded = downloaded
160                 self.expected = expected
161
162 class FileDownloader(object):
163         """File Downloader class.
164
165         File downloader objects are the ones responsible of downloading the
166         actual video file and writing it to disk if the user has requested
167         it, among some other tasks. In most cases there should be one per
168         program. As, given a video URL, the downloader doesn't know how to
169         extract all the needed information, task that InfoExtractors do, it
170         has to pass the URL to one of them.
171
172         For this, file downloader objects have a method that allows
173         InfoExtractors to be registered in a given order. When it is passed
174         a URL, the file downloader handles it to the first InfoExtractor it
175         finds that reports being able to handle it. The InfoExtractor extracts
176         all the information about the video or videos the URL refers to, and
177         asks the FileDownloader to process the video information, possibly
178         downloading the video.
179
180         File downloaders accept a lot of parameters. In order not to saturate
181         the object constructor with arguments, it receives a dictionary of
182         options instead. These options are available through the params
183         attribute for the InfoExtractors to use. The FileDownloader also
184         registers itself as the downloader in charge for the InfoExtractors
185         that are added to it, so this is a "mutual registration".
186
187         Available options:
188
189         username:         Username for authentication purposes.
190         password:         Password for authentication purposes.
191         usenetrc:         Use netrc for authentication instead.
192         quiet:            Do not print messages to stdout.
193         forceurl:         Force printing final URL.
194         forcetitle:       Force printing title.
195         forcethumbnail:   Force printing thumbnail URL.
196         forcedescription: Force printing description.
197         simulate:         Do not download the video files.
198         format:           Video format code.
199         format_limit:     Highest quality format to try.
200         outtmpl:          Template for output names.
201         ignoreerrors:     Do not stop on download errors.
202         ratelimit:        Download speed limit, in bytes/sec.
203         nooverwrites:     Prevent overwriting files.
204         retries:          Number of times to retry for HTTP error 5xx
205         continuedl:       Try to continue downloads if possible.
206         noprogress:       Do not print the progress bar.
207         playliststart:    Playlist item to start at.
208         playlistend:      Playlist item to end at.
209         logtostderr:      Log messages to stderr instead of stdout.
210         """
211
212         params = None
213         _ies = []
214         _pps = []
215         _download_retcode = None
216         _num_downloads = None
217         _screen_file = None
218
219         def __init__(self, params):
220                 """Create a FileDownloader object with the given options."""
221                 self._ies = []
222                 self._pps = []
223                 self._download_retcode = 0
224                 self._num_downloads = 0
225                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
226                 self.params = params
227         
228         @staticmethod
229         def pmkdir(filename):
230                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
231                 components = filename.split(os.sep)
232                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
233                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
234                 for dir in aggregate:
235                         if not os.path.exists(dir):
236                                 os.mkdir(dir)
237         
238         @staticmethod
239         def temp_name(filename):
240                 """Returns a temporary filename for the given filename."""
241                 return filename + '.part'
242         
243         @staticmethod
244         def format_bytes(bytes):
245                 if bytes is None:
246                         return 'N/A'
247                 if type(bytes) is str:
248                         bytes = float(bytes)
249                 if bytes == 0.0:
250                         exponent = 0
251                 else:
252                         exponent = long(math.log(bytes, 1024.0))
253                 suffix = 'bkMGTPEZY'[exponent]
254                 converted = float(bytes) / float(1024**exponent)
255                 return '%.2f%s' % (converted, suffix)
256
257         @staticmethod
258         def calc_percent(byte_counter, data_len):
259                 if data_len is None:
260                         return '---.-%'
261                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
262
263         @staticmethod
264         def calc_eta(start, now, total, current):
265                 if total is None:
266                         return '--:--'
267                 dif = now - start
268                 if current == 0 or dif < 0.001: # One millisecond
269                         return '--:--'
270                 rate = float(current) / dif
271                 eta = long((float(total) - float(current)) / rate)
272                 (eta_mins, eta_secs) = divmod(eta, 60)
273                 if eta_mins > 99:
274                         return '--:--'
275                 return '%02d:%02d' % (eta_mins, eta_secs)
276
277         @staticmethod
278         def calc_speed(start, now, bytes):
279                 dif = now - start
280                 if bytes == 0 or dif < 0.001: # One millisecond
281                         return '%10s' % '---b/s'
282                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
283
284         @staticmethod
285         def best_block_size(elapsed_time, bytes):
286                 new_min = max(bytes / 2.0, 1.0)
287                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
288                 if elapsed_time < 0.001:
289                         return long(new_max)
290                 rate = bytes / elapsed_time
291                 if rate > new_max:
292                         return long(new_max)
293                 if rate < new_min:
294                         return long(new_min)
295                 return long(rate)
296
297         @staticmethod
298         def parse_bytes(bytestr):
299                 """Parse a string indicating a byte quantity into a long integer."""
300                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
301                 if matchobj is None:
302                         return None
303                 number = float(matchobj.group(1))
304                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
305                 return long(round(number * multiplier))
306
307         def add_info_extractor(self, ie):
308                 """Add an InfoExtractor object to the end of the list."""
309                 self._ies.append(ie)
310                 ie.set_downloader(self)
311         
312         def add_post_processor(self, pp):
313                 """Add a PostProcessor object to the end of the chain."""
314                 self._pps.append(pp)
315                 pp.set_downloader(self)
316         
317         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
318                 """Print message to stdout if not in quiet mode."""
319                 try:
320                         if not self.params.get('quiet', False):
321                                 terminator = [u'\n', u''][skip_eol]
322                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
323                         self._screen_file.flush()
324                 except (UnicodeEncodeError), err:
325                         if not ignore_encoding_errors:
326                                 raise
327         
328         def to_stderr(self, message):
329                 """Print message to stderr."""
330                 print >>sys.stderr, message.encode(preferredencoding())
331         
332         def fixed_template(self):
333                 """Checks if the output template is fixed."""
334                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
335
336         def trouble(self, message=None):
337                 """Determine action to take when a download problem appears.
338
339                 Depending on if the downloader has been configured to ignore
340                 download errors or not, this method may throw an exception or
341                 not when errors are found, after printing the message.
342                 """
343                 if message is not None:
344                         self.to_stderr(message)
345                 if not self.params.get('ignoreerrors', False):
346                         raise DownloadError(message)
347                 self._download_retcode = 1
348
349         def slow_down(self, start_time, byte_counter):
350                 """Sleep if the download speed is over the rate limit."""
351                 rate_limit = self.params.get('ratelimit', None)
352                 if rate_limit is None or byte_counter == 0:
353                         return
354                 now = time.time()
355                 elapsed = now - start_time
356                 if elapsed <= 0.0:
357                         return
358                 speed = float(byte_counter) / elapsed
359                 if speed > rate_limit:
360                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
361         
362         def try_rename(self, old_filename, new_filename):
363                 try:
364                         os.rename(old_filename, new_filename)
365                 except (IOError, OSError), err:
366                         self.trouble(u'ERROR: unable to rename file')
367
368         def report_destination(self, filename):
369                 """Report destination filename."""
370                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
371         
372         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
373                 """Report download progress."""
374                 if self.params.get('noprogress', False):
375                         return
376                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
377                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
378
379         def report_resuming_byte(self, resume_len):
380                 """Report attempt to resume at given byte."""
381                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
382         
383         def report_retry(self, count, retries):
384                 """Report retry in case of HTTP error 5xx"""
385                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
386         
387         def report_file_already_downloaded(self, file_name):
388                 """Report file has already been fully downloaded."""
389                 try:
390                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
391                 except (UnicodeEncodeError), err:
392                         self.to_screen(u'[download] The file has already been downloaded')
393         
394         def report_unable_to_resume(self):
395                 """Report it was impossible to resume download."""
396                 self.to_screen(u'[download] Unable to resume')
397         
398         def report_finish(self):
399                 """Report download finished."""
400                 if self.params.get('noprogress', False):
401                         self.to_screen(u'[download] Download completed')
402                 else:
403                         self.to_screen(u'')
404         
405         def increment_downloads(self):
406                 """Increment the ordinal that assigns a number to each file."""
407                 self._num_downloads += 1
408
409         def process_info(self, info_dict):
410                 """Process a single dictionary returned by an InfoExtractor."""
411                 # Do nothing else if in simulate mode
412                 if self.params.get('simulate', False):
413                         # Forced printings
414                         if self.params.get('forcetitle', False):
415                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
416                         if self.params.get('forceurl', False):
417                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
418                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
419                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
420                         if self.params.get('forcedescription', False) and 'description' in info_dict:
421                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
422
423                         return
424                         
425                 try:
426                         template_dict = dict(info_dict)
427                         template_dict['epoch'] = unicode(long(time.time()))
428                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
429                         filename = self.params['outtmpl'] % template_dict
430                 except (ValueError, KeyError), err:
431                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
432                         return
433                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
434                         self.to_stderr(u'WARNING: file exists and will be skipped')
435                         return
436
437                 try:
438                         self.pmkdir(filename)
439                 except (OSError, IOError), err:
440                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
441                         return
442
443                 try:
444                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
445                 except (OSError, IOError), err:
446                         raise UnavailableVideoError
447                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
448                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
449                         return
450                 except (ContentTooShortError, ), err:
451                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
452                         return
453
454                 if success:
455                         try:
456                                 self.post_process(filename, info_dict)
457                         except (PostProcessingError), err:
458                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
459                                 return
460
461         def download(self, url_list):
462                 """Download a given list of URLs."""
463                 if len(url_list) > 1 and self.fixed_template():
464                         raise SameFileError(self.params['outtmpl'])
465
466                 for url in url_list:
467                         suitable_found = False
468                         for ie in self._ies:
469                                 # Go to next InfoExtractor if not suitable
470                                 if not ie.suitable(url):
471                                         continue
472
473                                 # Suitable InfoExtractor found
474                                 suitable_found = True
475
476                                 # Extract information from URL and process it
477                                 ie.extract(url)
478
479                                 # Suitable InfoExtractor had been found; go to next URL
480                                 break
481
482                         if not suitable_found:
483                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
484
485                 return self._download_retcode
486
487         def post_process(self, filename, ie_info):
488                 """Run the postprocessing chain on the given file."""
489                 info = dict(ie_info)
490                 info['filepath'] = filename
491                 for pp in self._pps:
492                         info = pp.run(info)
493                         if info is None:
494                                 break
495         
496         def _download_with_rtmpdump(self, filename, url, player_url):
497                 self.report_destination(filename)
498                 tmpfilename = self.temp_name(filename)
499
500                 # Check for rtmpdump first
501                 try:
502                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
503                 except (OSError, IOError):
504                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
505                         return False
506
507                 # Download using rtmpdump. rtmpdump returns exit code 2 when
508                 # the connection was interrumpted and resuming appears to be
509                 # possible. This is part of rtmpdump's normal usage, AFAIK.
510                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
511                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
512                 while retval == 2 or retval == 1:
513                         prevsize = os.path.getsize(tmpfilename)
514                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
515                         time.sleep(5.0) # This seems to be needed
516                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
517                         cursize = os.path.getsize(tmpfilename)
518                         if prevsize == cursize and retval == 1:
519                                 break
520                 if retval == 0:
521                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
522                         self.try_rename(tmpfilename, filename)
523                         return True
524                 else:
525                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
526                         return False
527
528         def _do_download(self, filename, url, player_url):
529                 # Check file already present
530                 if self.params.get('continuedl', False) and os.path.isfile(filename):
531                         self.report_file_already_downloaded(filename)
532                         return True
533
534                 # Attempt to download using rtmpdump
535                 if url.startswith('rtmp'):
536                         return self._download_with_rtmpdump(filename, url, player_url)
537
538                 tmpfilename = self.temp_name(filename)
539                 stream = None
540                 open_mode = 'wb'
541                 basic_request = urllib2.Request(url, None, std_headers)
542                 request = urllib2.Request(url, None, std_headers)
543
544                 # Establish possible resume length
545                 if os.path.isfile(tmpfilename):
546                         resume_len = os.path.getsize(tmpfilename)
547                 else:
548                         resume_len = 0
549
550                 # Request parameters in case of being able to resume
551                 if self.params.get('continuedl', False) and resume_len != 0:
552                         self.report_resuming_byte(resume_len)
553                         request.add_header('Range','bytes=%d-' % resume_len)
554                         open_mode = 'ab'
555
556                 count = 0
557                 retries = self.params.get('retries', 0)
558                 while count <= retries:
559                         # Establish connection
560                         try:
561                                 data = urllib2.urlopen(request)
562                                 break
563                         except (urllib2.HTTPError, ), err:
564                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
565                                         # Unexpected HTTP error
566                                         raise
567                                 elif err.code == 416:
568                                         # Unable to resume (requested range not satisfiable)
569                                         try:
570                                                 # Open the connection again without the range header
571                                                 data = urllib2.urlopen(basic_request)
572                                                 content_length = data.info()['Content-Length']
573                                         except (urllib2.HTTPError, ), err:
574                                                 if err.code < 500 or err.code >= 600:
575                                                         raise
576                                         else:
577                                                 # Examine the reported length
578                                                 if (content_length is not None and
579                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
580                                                         # The file had already been fully downloaded.
581                                                         # Explanation to the above condition: in issue #175 it was revealed that
582                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
583                                                         # changing the file size slightly and causing problems for some users. So
584                                                         # I decided to implement a suggested change and consider the file
585                                                         # completely downloaded if the file size differs less than 100 bytes from
586                                                         # the one in the hard drive.
587                                                         self.report_file_already_downloaded(filename)
588                                                         self.try_rename(tmpfilename, filename)
589                                                         return True
590                                                 else:
591                                                         # The length does not match, we start the download over
592                                                         self.report_unable_to_resume()
593                                                         open_mode = 'wb'
594                                                         break
595                         # Retry
596                         count += 1
597                         if count <= retries:
598                                 self.report_retry(count, retries)
599
600                 if count > retries:
601                         self.trouble(u'ERROR: giving up after %s retries' % retries)
602                         return False
603
604                 data_len = data.info().get('Content-length', None)
605                 data_len_str = self.format_bytes(data_len)
606                 byte_counter = 0
607                 block_size = 1024
608                 start = time.time()
609                 while True:
610                         # Download and write
611                         before = time.time()
612                         data_block = data.read(block_size)
613                         after = time.time()
614                         data_block_len = len(data_block)
615                         if data_block_len == 0:
616                                 break
617                         byte_counter += data_block_len
618
619                         # Open file just in time
620                         if stream is None:
621                                 try:
622                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
623                                         self.report_destination(filename)
624                                 except (OSError, IOError), err:
625                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
626                                         return False
627                         try:
628                                 stream.write(data_block)
629                         except (IOError, OSError), err:
630                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
631                                 return False
632                         block_size = self.best_block_size(after - before, data_block_len)
633
634                         # Progress message
635                         percent_str = self.calc_percent(byte_counter, data_len)
636                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
637                         speed_str = self.calc_speed(start, time.time(), byte_counter)
638                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
639
640                         # Apply rate limit
641                         self.slow_down(start, byte_counter)
642
643                 self.report_finish()
644                 if data_len is not None and str(byte_counter) != data_len:
645                         raise ContentTooShortError(byte_counter, long(data_len))
646                 self.try_rename(tmpfilename, filename)
647                 return True
648
649 class InfoExtractor(object):
650         """Information Extractor class.
651
652         Information extractors are the classes that, given a URL, extract
653         information from the video (or videos) the URL refers to. This
654         information includes the real video URL, the video title and simplified
655         title, author and others. The information is stored in a dictionary
656         which is then passed to the FileDownloader. The FileDownloader
657         processes this information possibly downloading the video to the file
658         system, among other possible outcomes. The dictionaries must include
659         the following fields:
660
661         id:             Video identifier.
662         url:            Final video URL.
663         uploader:       Nickname of the video uploader.
664         title:          Literal title.
665         stitle:         Simplified title.
666         ext:            Video filename extension.
667         format:         Video format.
668         player_url:     SWF Player URL (may be None).
669
670         The following fields are optional. Their primary purpose is to allow
671         youtube-dl to serve as the backend for a video search function, such
672         as the one in youtube2mp3.  They are only used when their respective
673         forced printing functions are called:
674
675         thumbnail:      Full URL to a video thumbnail image.
676         description:    One-line video description.
677
678         Subclasses of this one should re-define the _real_initialize() and
679         _real_extract() methods, as well as the suitable() static method.
680         Probably, they should also be instantiated and added to the main
681         downloader.
682         """
683
684         _ready = False
685         _downloader = None
686
687         def __init__(self, downloader=None):
688                 """Constructor. Receives an optional downloader."""
689                 self._ready = False
690                 self.set_downloader(downloader)
691
692         @staticmethod
693         def suitable(url):
694                 """Receives a URL and returns True if suitable for this IE."""
695                 return False
696
697         def initialize(self):
698                 """Initializes an instance (authentication, etc)."""
699                 if not self._ready:
700                         self._real_initialize()
701                         self._ready = True
702
703         def extract(self, url):
704                 """Extracts URL information and returns it in list of dicts."""
705                 self.initialize()
706                 return self._real_extract(url)
707
708         def set_downloader(self, downloader):
709                 """Sets the downloader for this IE."""
710                 self._downloader = downloader
711         
712         def _real_initialize(self):
713                 """Real initialization process. Redefine in subclasses."""
714                 pass
715
716         def _real_extract(self, url):
717                 """Real extraction process. Redefine in subclasses."""
718                 pass
719
720 class YoutubeIE(InfoExtractor):
721         """Information extractor for youtube.com."""
722
723         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
724         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
725         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
726         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
727         _NETRC_MACHINE = 'youtube'
728         # Listed in order of quality
729         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
730         _video_extensions = {
731                 '13': '3gp',
732                 '17': 'mp4',
733                 '18': 'mp4',
734                 '22': 'mp4',
735                 '37': 'mp4',
736                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
737                 '43': 'webm',
738                 '45': 'webm',
739         }
740
741         @staticmethod
742         def suitable(url):
743                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
744
745         def report_lang(self):
746                 """Report attempt to set language."""
747                 self._downloader.to_screen(u'[youtube] Setting language')
748
749         def report_login(self):
750                 """Report attempt to log in."""
751                 self._downloader.to_screen(u'[youtube] Logging in')
752         
753         def report_age_confirmation(self):
754                 """Report attempt to confirm age."""
755                 self._downloader.to_screen(u'[youtube] Confirming age')
756         
757         def report_video_webpage_download(self, video_id):
758                 """Report attempt to download video webpage."""
759                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
760         
761         def report_video_info_webpage_download(self, video_id):
762                 """Report attempt to download video info webpage."""
763                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
764         
765         def report_information_extraction(self, video_id):
766                 """Report attempt to extract video information."""
767                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
768         
769         def report_unavailable_format(self, video_id, format):
770                 """Report extracted video URL."""
771                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
772         
773         def report_rtmp_download(self):
774                 """Indicate the download will use the RTMP protocol."""
775                 self._downloader.to_screen(u'[youtube] RTMP download detected')
776         
777         def _real_initialize(self):
778                 if self._downloader is None:
779                         return
780
781                 username = None
782                 password = None
783                 downloader_params = self._downloader.params
784
785                 # Attempt to use provided username and password or .netrc data
786                 if downloader_params.get('username', None) is not None:
787                         username = downloader_params['username']
788                         password = downloader_params['password']
789                 elif downloader_params.get('usenetrc', False):
790                         try:
791                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
792                                 if info is not None:
793                                         username = info[0]
794                                         password = info[2]
795                                 else:
796                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
797                         except (IOError, netrc.NetrcParseError), err:
798                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
799                                 return
800
801                 # Set language
802                 request = urllib2.Request(self._LANG_URL, None, std_headers)
803                 try:
804                         self.report_lang()
805                         urllib2.urlopen(request).read()
806                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
807                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
808                         return
809
810                 # No authentication to be performed
811                 if username is None:
812                         return
813
814                 # Log in
815                 login_form = {
816                                 'current_form': 'loginForm',
817                                 'next':         '/',
818                                 'action_login': 'Log In',
819                                 'username':     username,
820                                 'password':     password,
821                                 }
822                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
823                 try:
824                         self.report_login()
825                         login_results = urllib2.urlopen(request).read()
826                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
827                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
828                                 return
829                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
830                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
831                         return
832         
833                 # Confirm age
834                 age_form = {
835                                 'next_url':             '/',
836                                 'action_confirm':       'Confirm',
837                                 }
838                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
839                 try:
840                         self.report_age_confirmation()
841                         age_results = urllib2.urlopen(request).read()
842                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
843                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
844                         return
845
846         def _real_extract(self, url):
847                 # Extract video id from URL
848                 mobj = re.match(self._VALID_URL, url)
849                 if mobj is None:
850                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
851                         return
852                 video_id = mobj.group(2)
853
854                 # Get video webpage
855                 self.report_video_webpage_download(video_id)
856                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id, None, std_headers)
857                 try:
858                         video_webpage = urllib2.urlopen(request).read()
859                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
860                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
861                         return
862
863                 # Attempt to extract SWF player URL
864                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
865                 if mobj is not None:
866                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
867                 else:
868                         player_url = None
869
870                 # Get video info
871                 self.report_video_info_webpage_download(video_id)
872                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
873                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
874                                            % (video_id, el_type))
875                         request = urllib2.Request(video_info_url, None, std_headers)
876                         try:
877                                 video_info_webpage = urllib2.urlopen(request).read()
878                                 video_info = parse_qs(video_info_webpage)
879                                 if 'token' in video_info:
880                                         break
881                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
882                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
883                                 return
884                 if 'token' not in video_info:
885                         if 'reason' in video_info:
886                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
887                         else:
888                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
889                         return
890
891                 # Start extracting information
892                 self.report_information_extraction(video_id)
893
894                 # uploader
895                 if 'author' not in video_info:
896                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
897                         return
898                 video_uploader = urllib.unquote_plus(video_info['author'][0])
899
900                 # title
901                 if 'title' not in video_info:
902                         self._downloader.trouble(u'ERROR: unable to extract video title')
903                         return
904                 video_title = urllib.unquote_plus(video_info['title'][0])
905                 video_title = video_title.decode('utf-8')
906                 video_title = sanitize_title(video_title)
907
908                 # simplified title
909                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
910                 simple_title = simple_title.strip(ur'_')
911
912                 # thumbnail image
913                 if 'thumbnail_url' not in video_info:
914                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
915                         video_thumbnail = ''
916                 else:   # don't panic if we can't find it
917                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
918
919                 # upload date
920                 upload_date = u'NA'
921                 mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
922                 if mobj is not None:
923                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
924                         format_expressions = ['%d %B %Y', '%B %d %Y']
925                         for expression in format_expressions:
926                                 try:
927                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
928                                 except:
929                                         pass
930
931                 # description
932                 video_description = 'No description available.'
933                 if self._downloader.params.get('forcedescription', False):
934                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
935                         if mobj is not None:
936                                 video_description = mobj.group(1)
937
938                 # token
939                 video_token = urllib.unquote_plus(video_info['token'][0])
940
941                 # Decide which formats to download
942                 requested_format = self._downloader.params.get('format', None)
943                 get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
944
945                 if 'fmt_url_map' in video_info:
946                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
947                         format_limit = self._downloader.params.get('format_limit', None)
948                         if format_limit is not None and format_limit in self._available_formats:
949                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
950                         else:
951                                 format_list = self._available_formats
952                         existing_formats = [x for x in format_list if x in url_map]
953                         if len(existing_formats) == 0:
954                                 self._downloader.trouble(u'ERROR: no known formats available for video')
955                                 return
956                         if requested_format is None:
957                                 video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality
958                         elif requested_format == '-1':
959                                 video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats
960                         else:
961                                 video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format
962
963                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
964                         self.report_rtmp_download()
965                         video_url_list = [(None, video_info['conn'][0])]
966
967                 else:
968                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
969                         return
970
971                 for format_param, video_real_url in video_url_list:
972                         # At this point we have a new video
973                         self._downloader.increment_downloads()
974
975                         # Extension
976                         video_extension = self._video_extensions.get(format_param, 'flv')
977
978                         # Find the video URL in fmt_url_map or conn paramters
979                         try:
980                                 # Process video information
981                                 self._downloader.process_info({
982                                         'id':           video_id.decode('utf-8'),
983                                         'url':          video_real_url.decode('utf-8'),
984                                         'uploader':     video_uploader.decode('utf-8'),
985                                         'upload_date':  upload_date,
986                                         'title':        video_title,
987                                         'stitle':       simple_title,
988                                         'ext':          video_extension.decode('utf-8'),
989                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
990                                         'thumbnail':    video_thumbnail.decode('utf-8'),
991                                         'description':  video_description.decode('utf-8'),
992                                         'player_url':   player_url,
993                                 })
994                         except UnavailableVideoError, err:
995                                 self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
996
997
998 class MetacafeIE(InfoExtractor):
999         """Information Extractor for metacafe.com."""
1000
1001         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1002         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1003         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1004         _youtube_ie = None
1005
1006         def __init__(self, youtube_ie, downloader=None):
1007                 InfoExtractor.__init__(self, downloader)
1008                 self._youtube_ie = youtube_ie
1009
1010         @staticmethod
1011         def suitable(url):
1012                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1013
1014         def report_disclaimer(self):
1015                 """Report disclaimer retrieval."""
1016                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1017
1018         def report_age_confirmation(self):
1019                 """Report attempt to confirm age."""
1020                 self._downloader.to_screen(u'[metacafe] Confirming age')
1021         
1022         def report_download_webpage(self, video_id):
1023                 """Report webpage download."""
1024                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1025         
1026         def report_extraction(self, video_id):
1027                 """Report information extraction."""
1028                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1029
1030         def _real_initialize(self):
1031                 # Retrieve disclaimer
1032                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
1033                 try:
1034                         self.report_disclaimer()
1035                         disclaimer = urllib2.urlopen(request).read()
1036                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1037                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1038                         return
1039
1040                 # Confirm age
1041                 disclaimer_form = {
1042                         'filters': '0',
1043                         'submit': "Continue - I'm over 18",
1044                         }
1045                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
1046                 try:
1047                         self.report_age_confirmation()
1048                         disclaimer = urllib2.urlopen(request).read()
1049                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1050                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1051                         return
1052         
1053         def _real_extract(self, url):
1054                 # Extract id and simplified title from URL
1055                 mobj = re.match(self._VALID_URL, url)
1056                 if mobj is None:
1057                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1058                         return
1059
1060                 video_id = mobj.group(1)
1061
1062                 # Check if video comes from YouTube
1063                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1064                 if mobj2 is not None:
1065                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1066                         return
1067
1068                 # At this point we have a new video
1069                 self._downloader.increment_downloads()
1070
1071                 simple_title = mobj.group(2).decode('utf-8')
1072
1073                 # Retrieve video webpage to extract further information
1074                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1075                 try:
1076                         self.report_download_webpage(video_id)
1077                         webpage = urllib2.urlopen(request).read()
1078                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1079                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1080                         return
1081
1082                 # Extract URL, uploader and title from webpage
1083                 self.report_extraction(video_id)
1084                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1085                 if mobj is not None:
1086                         mediaURL = urllib.unquote(mobj.group(1))
1087                         video_extension = mediaURL[-3:]
1088                         
1089                         # Extract gdaKey if available
1090                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1091                         if mobj is None:
1092                                 video_url = mediaURL
1093                         else:
1094                                 gdaKey = mobj.group(1)
1095                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1096                 else:
1097                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1098                         if mobj is None:
1099                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1100                                 return
1101                         vardict = parse_qs(mobj.group(1))
1102                         if 'mediaData' not in vardict:
1103                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1104                                 return
1105                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1106                         if mobj is None:
1107                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1108                                 return
1109                         mediaURL = mobj.group(1).replace('\\/', '/')
1110                         video_extension = mediaURL[-3:]
1111                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1112
1113                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1114                 if mobj is None:
1115                         self._downloader.trouble(u'ERROR: unable to extract title')
1116                         return
1117                 video_title = mobj.group(1).decode('utf-8')
1118                 video_title = sanitize_title(video_title)
1119
1120                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1121                 if mobj is None:
1122                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1123                         return
1124                 video_uploader = mobj.group(1)
1125
1126                 try:
1127                         # Process video information
1128                         self._downloader.process_info({
1129                                 'id':           video_id.decode('utf-8'),
1130                                 'url':          video_url.decode('utf-8'),
1131                                 'uploader':     video_uploader.decode('utf-8'),
1132                                 'upload_date':  u'NA',
1133                                 'title':        video_title,
1134                                 'stitle':       simple_title,
1135                                 'ext':          video_extension.decode('utf-8'),
1136                                 'format':       u'NA',
1137                                 'player_url':   None,
1138                         })
1139                 except UnavailableVideoError:
1140                         self._downloader.trouble(u'ERROR: unable to download video')
1141
1142
1143 class DailymotionIE(InfoExtractor):
1144         """Information Extractor for Dailymotion"""
1145
1146         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1147
1148         def __init__(self, downloader=None):
1149                 InfoExtractor.__init__(self, downloader)
1150
1151         @staticmethod
1152         def suitable(url):
1153                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1154
1155         def report_download_webpage(self, video_id):
1156                 """Report webpage download."""
1157                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1158         
1159         def report_extraction(self, video_id):
1160                 """Report information extraction."""
1161                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1162
1163         def _real_initialize(self):
1164                 return
1165
1166         def _real_extract(self, url):
1167                 # Extract id and simplified title from URL
1168                 mobj = re.match(self._VALID_URL, url)
1169                 if mobj is None:
1170                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1171                         return
1172
1173                 # At this point we have a new video
1174                 self._downloader.increment_downloads()
1175                 video_id = mobj.group(1)
1176
1177                 simple_title = mobj.group(2).decode('utf-8')
1178                 video_extension = 'flv'
1179
1180                 # Retrieve video webpage to extract further information
1181                 request = urllib2.Request(url)
1182                 try:
1183                         self.report_download_webpage(video_id)
1184                         webpage = urllib2.urlopen(request).read()
1185                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1186                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1187                         return
1188
1189                 # Extract URL, uploader and title from webpage
1190                 self.report_extraction(video_id)
1191                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1192                 if mobj is None:
1193                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1194                         return
1195                 mediaURL = urllib.unquote(mobj.group(1))
1196
1197                 # if needed add http://www.dailymotion.com/ if relative URL
1198
1199                 video_url = mediaURL
1200
1201                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1202                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1203                 if mobj is None:
1204                         self._downloader.trouble(u'ERROR: unable to extract title')
1205                         return
1206                 video_title = mobj.group(1).decode('utf-8')
1207                 video_title = sanitize_title(video_title)
1208
1209                 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
1210                 if mobj is None:
1211                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1212                         return
1213                 video_uploader = mobj.group(1)
1214
1215                 try:
1216                         # Process video information
1217                         self._downloader.process_info({
1218                                 'id':           video_id.decode('utf-8'),
1219                                 'url':          video_url.decode('utf-8'),
1220                                 'uploader':     video_uploader.decode('utf-8'),
1221                                 'upload_date':  u'NA',
1222                                 'title':        video_title,
1223                                 'stitle':       simple_title,
1224                                 'ext':          video_extension.decode('utf-8'),
1225                                 'format':       u'NA',
1226                                 'player_url':   None,
1227                         })
1228                 except UnavailableVideoError:
1229                         self._downloader.trouble(u'ERROR: unable to download video')
1230
1231 class GoogleIE(InfoExtractor):
1232         """Information extractor for video.google.com."""
1233
1234         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1235
1236         def __init__(self, downloader=None):
1237                 InfoExtractor.__init__(self, downloader)
1238
1239         @staticmethod
1240         def suitable(url):
1241                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1242
1243         def report_download_webpage(self, video_id):
1244                 """Report webpage download."""
1245                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1246
1247         def report_extraction(self, video_id):
1248                 """Report information extraction."""
1249                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1250
1251         def _real_initialize(self):
1252                 return
1253
1254         def _real_extract(self, url):
1255                 # Extract id from URL
1256                 mobj = re.match(self._VALID_URL, url)
1257                 if mobj is None:
1258                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1259                         return
1260
1261                 # At this point we have a new video
1262                 self._downloader.increment_downloads()
1263                 video_id = mobj.group(1)
1264
1265                 video_extension = 'mp4'
1266
1267                 # Retrieve video webpage to extract further information
1268                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1269                 try:
1270                         self.report_download_webpage(video_id)
1271                         webpage = urllib2.urlopen(request).read()
1272                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1273                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1274                         return
1275
1276                 # Extract URL, uploader, and title from webpage
1277                 self.report_extraction(video_id)
1278                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1279                 if mobj is None:
1280                         video_extension = 'flv'
1281                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1282                 if mobj is None:
1283                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1284                         return
1285                 mediaURL = urllib.unquote(mobj.group(1))
1286                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1287                 mediaURL = mediaURL.replace('\\x26', '\x26')
1288
1289                 video_url = mediaURL
1290
1291                 mobj = re.search(r'<title>(.*)</title>', webpage)
1292                 if mobj is None:
1293                         self._downloader.trouble(u'ERROR: unable to extract title')
1294                         return
1295                 video_title = mobj.group(1).decode('utf-8')
1296                 video_title = sanitize_title(video_title)
1297                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1298
1299                 # Extract video description
1300                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1301                 if mobj is None:
1302                         self._downloader.trouble(u'ERROR: unable to extract video description')
1303                         return
1304                 video_description = mobj.group(1).decode('utf-8')
1305                 if not video_description:
1306                         video_description = 'No description available.'
1307
1308                 # Extract video thumbnail
1309                 if self._downloader.params.get('forcethumbnail', False):
1310                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1311                         try:
1312                                 webpage = urllib2.urlopen(request).read()
1313                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1314                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1315                                 return
1316                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1317                         if mobj is None:
1318                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1319                                 return
1320                         video_thumbnail = mobj.group(1)
1321                 else:   # we need something to pass to process_info
1322                         video_thumbnail = ''
1323
1324
1325                 try:
1326                         # Process video information
1327                         self._downloader.process_info({
1328                                 'id':           video_id.decode('utf-8'),
1329                                 'url':          video_url.decode('utf-8'),
1330                                 'uploader':     u'NA',
1331                                 'upload_date':  u'NA',
1332                                 'title':        video_title,
1333                                 'stitle':       simple_title,
1334                                 'ext':          video_extension.decode('utf-8'),
1335                                 'format':       u'NA',
1336                                 'player_url':   None,
1337                         })
1338                 except UnavailableVideoError:
1339                         self._downloader.trouble(u'ERROR: unable to download video')
1340
1341
1342 class PhotobucketIE(InfoExtractor):
1343         """Information extractor for photobucket.com."""
1344
1345         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1346
1347         def __init__(self, downloader=None):
1348                 InfoExtractor.__init__(self, downloader)
1349
1350         @staticmethod
1351         def suitable(url):
1352                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1353
1354         def report_download_webpage(self, video_id):
1355                 """Report webpage download."""
1356                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1357
1358         def report_extraction(self, video_id):
1359                 """Report information extraction."""
1360                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1361
1362         def _real_initialize(self):
1363                 return
1364
1365         def _real_extract(self, url):
1366                 # Extract id from URL
1367                 mobj = re.match(self._VALID_URL, url)
1368                 if mobj is None:
1369                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1370                         return
1371
1372                 # At this point we have a new video
1373                 self._downloader.increment_downloads()
1374                 video_id = mobj.group(1)
1375
1376                 video_extension = 'flv'
1377
1378                 # Retrieve video webpage to extract further information
1379                 request = urllib2.Request(url)
1380                 try:
1381                         self.report_download_webpage(video_id)
1382                         webpage = urllib2.urlopen(request).read()
1383                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1384                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1385                         return
1386
1387                 # Extract URL, uploader, and title from webpage
1388                 self.report_extraction(video_id)
1389                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1390                 if mobj is None:
1391                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1392                         return
1393                 mediaURL = urllib.unquote(mobj.group(1))
1394
1395                 video_url = mediaURL
1396
1397                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1398                 if mobj is None:
1399                         self._downloader.trouble(u'ERROR: unable to extract title')
1400                         return
1401                 video_title = mobj.group(1).decode('utf-8')
1402                 video_title = sanitize_title(video_title)
1403                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1404
1405                 video_uploader = mobj.group(2).decode('utf-8')
1406
1407                 try:
1408                         # Process video information
1409                         self._downloader.process_info({
1410                                 'id':           video_id.decode('utf-8'),
1411                                 'url':          video_url.decode('utf-8'),
1412                                 'uploader':     video_uploader,
1413                                 'upload_date':  u'NA',
1414                                 'title':        video_title,
1415                                 'stitle':       simple_title,
1416                                 'ext':          video_extension.decode('utf-8'),
1417                                 'format':       u'NA',
1418                                 'player_url':   None,
1419                         })
1420                 except UnavailableVideoError:
1421                         self._downloader.trouble(u'ERROR: unable to download video')
1422
1423
1424 class YahooIE(InfoExtractor):
1425         """Information extractor for video.yahoo.com."""
1426
1427         # _VALID_URL matches all Yahoo! Video URLs
1428         # _VPAGE_URL matches only the extractable '/watch/' URLs
1429         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1430         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1431
1432         def __init__(self, downloader=None):
1433                 InfoExtractor.__init__(self, downloader)
1434
1435         @staticmethod
1436         def suitable(url):
1437                 return (re.match(YahooIE._VALID_URL, url) is not None)
1438
1439         def report_download_webpage(self, video_id):
1440                 """Report webpage download."""
1441                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1442
1443         def report_extraction(self, video_id):
1444                 """Report information extraction."""
1445                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1446
1447         def _real_initialize(self):
1448                 return
1449
1450         def _real_extract(self, url, new_video=True):
1451                 # Extract ID from URL
1452                 mobj = re.match(self._VALID_URL, url)
1453                 if mobj is None:
1454                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1455                         return
1456
1457                 # At this point we have a new video
1458                 self._downloader.increment_downloads()
1459                 video_id = mobj.group(2)
1460                 video_extension = 'flv'
1461
1462                 # Rewrite valid but non-extractable URLs as
1463                 # extractable English language /watch/ URLs
1464                 if re.match(self._VPAGE_URL, url) is None:
1465                         request = urllib2.Request(url)
1466                         try:
1467                                 webpage = urllib2.urlopen(request).read()
1468                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1469                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1470                                 return
1471
1472                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1473                         if mobj is None:
1474                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1475                                 return
1476                         yahoo_id = mobj.group(1)
1477
1478                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1479                         if mobj is None:
1480                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1481                                 return
1482                         yahoo_vid = mobj.group(1)
1483
1484                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1485                         return self._real_extract(url, new_video=False)
1486
1487                 # Retrieve video webpage to extract further information
1488                 request = urllib2.Request(url)
1489                 try:
1490                         self.report_download_webpage(video_id)
1491                         webpage = urllib2.urlopen(request).read()
1492                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1493                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1494                         return
1495
1496                 # Extract uploader and title from webpage
1497                 self.report_extraction(video_id)
1498                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1499                 if mobj is None:
1500                         self._downloader.trouble(u'ERROR: unable to extract video title')
1501                         return
1502                 video_title = mobj.group(1).decode('utf-8')
1503                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1504
1505                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1506                 if mobj is None:
1507                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1508                         return
1509                 video_uploader = mobj.group(1).decode('utf-8')
1510
1511                 # Extract video thumbnail
1512                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1513                 if mobj is None:
1514                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1515                         return
1516                 video_thumbnail = mobj.group(1).decode('utf-8')
1517
1518                 # Extract video description
1519                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1520                 if mobj is None:
1521                         self._downloader.trouble(u'ERROR: unable to extract video description')
1522                         return
1523                 video_description = mobj.group(1).decode('utf-8')
1524                 if not video_description: video_description = 'No description available.'
1525
1526                 # Extract video height and width
1527                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1528                 if mobj is None:
1529                         self._downloader.trouble(u'ERROR: unable to extract video height')
1530                         return
1531                 yv_video_height = mobj.group(1)
1532
1533                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1534                 if mobj is None:
1535                         self._downloader.trouble(u'ERROR: unable to extract video width')
1536                         return
1537                 yv_video_width = mobj.group(1)
1538
1539                 # Retrieve video playlist to extract media URL
1540                 # I'm not completely sure what all these options are, but we
1541                 # seem to need most of them, otherwise the server sends a 401.
1542                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1543                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1544                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1545                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1546                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1547                 try:
1548                         self.report_download_webpage(video_id)
1549                         webpage = urllib2.urlopen(request).read()
1550                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1551                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1552                         return
1553
1554                 # Extract media URL from playlist XML
1555                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1556                 if mobj is None:
1557                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1558                         return
1559                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1560                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1561
1562                 try:
1563                         # Process video information
1564                         self._downloader.process_info({
1565                                 'id':           video_id.decode('utf-8'),
1566                                 'url':          video_url,
1567                                 'uploader':     video_uploader,
1568                                 'upload_date':  u'NA',
1569                                 'title':        video_title,
1570                                 'stitle':       simple_title,
1571                                 'ext':          video_extension.decode('utf-8'),
1572                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1573                                 'description':  video_description,
1574                                 'thumbnail':    video_thumbnail,
1575                                 'description':  video_description,
1576                                 'player_url':   None,
1577                         })
1578                 except UnavailableVideoError:
1579                         self._downloader.trouble(u'ERROR: unable to download video')
1580
1581
1582 class GenericIE(InfoExtractor):
1583         """Generic last-resort information extractor."""
1584
1585         def __init__(self, downloader=None):
1586                 InfoExtractor.__init__(self, downloader)
1587
1588         @staticmethod
1589         def suitable(url):
1590                 return True
1591
1592         def report_download_webpage(self, video_id):
1593                 """Report webpage download."""
1594                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1595                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1596
1597         def report_extraction(self, video_id):
1598                 """Report information extraction."""
1599                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1600
1601         def _real_initialize(self):
1602                 return
1603
1604         def _real_extract(self, url):
1605                 # At this point we have a new video
1606                 self._downloader.increment_downloads()
1607
1608                 video_id = url.split('/')[-1]
1609                 request = urllib2.Request(url)
1610                 try:
1611                         self.report_download_webpage(video_id)
1612                         webpage = urllib2.urlopen(request).read()
1613                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1614                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1615                         return
1616                 except ValueError, err:
1617                         # since this is the last-resort InfoExtractor, if
1618                         # this error is thrown, it'll be thrown here
1619                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1620                         return
1621
1622                 # Start with something easy: JW Player in SWFObject
1623                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1624                 if mobj is None:
1625                         # Broaden the search a little bit
1626                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1627                 if mobj is None:
1628                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1629                         return
1630
1631                 # It's possible that one of the regexes
1632                 # matched, but returned an empty group:
1633                 if mobj.group(1) is None:
1634                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1635                         return
1636
1637                 video_url = urllib.unquote(mobj.group(1))
1638                 video_id  = os.path.basename(video_url)
1639
1640                 # here's a fun little line of code for you:
1641                 video_extension = os.path.splitext(video_id)[1][1:]
1642                 video_id        = os.path.splitext(video_id)[0]
1643
1644                 # it's tempting to parse this further, but you would
1645                 # have to take into account all the variations like
1646                 #   Video Title - Site Name
1647                 #   Site Name | Video Title
1648                 #   Video Title - Tagline | Site Name
1649                 # and so on and so forth; it's just not practical
1650                 mobj = re.search(r'<title>(.*)</title>', webpage)
1651                 if mobj is None:
1652                         self._downloader.trouble(u'ERROR: unable to extract title')
1653                         return
1654                 video_title = mobj.group(1).decode('utf-8')
1655                 video_title = sanitize_title(video_title)
1656                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1657
1658                 # video uploader is domain name
1659                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1660                 if mobj is None:
1661                         self._downloader.trouble(u'ERROR: unable to extract title')
1662                         return
1663                 video_uploader = mobj.group(1).decode('utf-8')
1664
1665                 try:
1666                         # Process video information
1667                         self._downloader.process_info({
1668                                 'id':           video_id.decode('utf-8'),
1669                                 'url':          video_url.decode('utf-8'),
1670                                 'uploader':     video_uploader,
1671                                 'upload_date':  u'NA',
1672                                 'title':        video_title,
1673                                 'stitle':       simple_title,
1674                                 'ext':          video_extension.decode('utf-8'),
1675                                 'format':       u'NA',
1676                                 'player_url':   None,
1677                         })
1678                 except UnavailableVideoError, err:
1679                         self._downloader.trouble(u'ERROR: unable to download video')
1680
1681
1682 class YoutubeSearchIE(InfoExtractor):
1683         """Information Extractor for YouTube search queries."""
1684         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1685         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1686         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1687         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1688         _youtube_ie = None
1689         _max_youtube_results = 1000
1690
1691         def __init__(self, youtube_ie, downloader=None):
1692                 InfoExtractor.__init__(self, downloader)
1693                 self._youtube_ie = youtube_ie
1694         
1695         @staticmethod
1696         def suitable(url):
1697                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1698
1699         def report_download_page(self, query, pagenum):
1700                 """Report attempt to download playlist page with given number."""
1701                 query = query.decode(preferredencoding())
1702                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1703
1704         def _real_initialize(self):
1705                 self._youtube_ie.initialize()
1706         
1707         def _real_extract(self, query):
1708                 mobj = re.match(self._VALID_QUERY, query)
1709                 if mobj is None:
1710                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1711                         return
1712
1713                 prefix, query = query.split(':')
1714                 prefix = prefix[8:]
1715                 query  = query.encode('utf-8')
1716                 if prefix == '':
1717                         self._download_n_results(query, 1)
1718                         return
1719                 elif prefix == 'all':
1720                         self._download_n_results(query, self._max_youtube_results)
1721                         return
1722                 else:
1723                         try:
1724                                 n = long(prefix)
1725                                 if n <= 0:
1726                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1727                                         return
1728                                 elif n > self._max_youtube_results:
1729                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1730                                         n = self._max_youtube_results
1731                                 self._download_n_results(query, n)
1732                                 return
1733                         except ValueError: # parsing prefix as integer fails
1734                                 self._download_n_results(query, 1)
1735                                 return
1736
1737         def _download_n_results(self, query, n):
1738                 """Downloads a specified number of results for a query"""
1739
1740                 video_ids = []
1741                 already_seen = set()
1742                 pagenum = 1
1743
1744                 while True:
1745                         self.report_download_page(query, pagenum)
1746                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1747                         request = urllib2.Request(result_url, None, std_headers)
1748                         try:
1749                                 page = urllib2.urlopen(request).read()
1750                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1751                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1752                                 return
1753
1754                         # Extract video identifiers
1755                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1756                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1757                                 if video_id not in already_seen:
1758                                         video_ids.append(video_id)
1759                                         already_seen.add(video_id)
1760                                         if len(video_ids) == n:
1761                                                 # Specified n videos reached
1762                                                 for id in video_ids:
1763                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1764                                                 return
1765
1766                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1767                                 for id in video_ids:
1768                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1769                                 return
1770
1771                         pagenum = pagenum + 1
1772
1773 class GoogleSearchIE(InfoExtractor):
1774         """Information Extractor for Google Video search queries."""
1775         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1776         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1777         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1778         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1779         _google_ie = None
1780         _max_google_results = 1000
1781
1782         def __init__(self, google_ie, downloader=None):
1783                 InfoExtractor.__init__(self, downloader)
1784                 self._google_ie = google_ie
1785         
1786         @staticmethod
1787         def suitable(url):
1788                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1789
1790         def report_download_page(self, query, pagenum):
1791                 """Report attempt to download playlist page with given number."""
1792                 query = query.decode(preferredencoding())
1793                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1794
1795         def _real_initialize(self):
1796                 self._google_ie.initialize()
1797         
1798         def _real_extract(self, query):
1799                 mobj = re.match(self._VALID_QUERY, query)
1800                 if mobj is None:
1801                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1802                         return
1803
1804                 prefix, query = query.split(':')
1805                 prefix = prefix[8:]
1806                 query  = query.encode('utf-8')
1807                 if prefix == '':
1808                         self._download_n_results(query, 1)
1809                         return
1810                 elif prefix == 'all':
1811                         self._download_n_results(query, self._max_google_results)
1812                         return
1813                 else:
1814                         try:
1815                                 n = long(prefix)
1816                                 if n <= 0:
1817                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1818                                         return
1819                                 elif n > self._max_google_results:
1820                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1821                                         n = self._max_google_results
1822                                 self._download_n_results(query, n)
1823                                 return
1824                         except ValueError: # parsing prefix as integer fails
1825                                 self._download_n_results(query, 1)
1826                                 return
1827
1828         def _download_n_results(self, query, n):
1829                 """Downloads a specified number of results for a query"""
1830
1831                 video_ids = []
1832                 already_seen = set()
1833                 pagenum = 1
1834
1835                 while True:
1836                         self.report_download_page(query, pagenum)
1837                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1838                         request = urllib2.Request(result_url, None, std_headers)
1839                         try:
1840                                 page = urllib2.urlopen(request).read()
1841                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1842                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1843                                 return
1844
1845                         # Extract video identifiers
1846                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1847                                 video_id = mobj.group(1)
1848                                 if video_id not in already_seen:
1849                                         video_ids.append(video_id)
1850                                         already_seen.add(video_id)
1851                                         if len(video_ids) == n:
1852                                                 # Specified n videos reached
1853                                                 for id in video_ids:
1854                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1855                                                 return
1856
1857                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1858                                 for id in video_ids:
1859                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1860                                 return
1861
1862                         pagenum = pagenum + 1
1863
1864 class YahooSearchIE(InfoExtractor):
1865         """Information Extractor for Yahoo! Video search queries."""
1866         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1867         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1868         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1869         _MORE_PAGES_INDICATOR = r'\s*Next'
1870         _yahoo_ie = None
1871         _max_yahoo_results = 1000
1872
1873         def __init__(self, yahoo_ie, downloader=None):
1874                 InfoExtractor.__init__(self, downloader)
1875                 self._yahoo_ie = yahoo_ie
1876         
1877         @staticmethod
1878         def suitable(url):
1879                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1880
1881         def report_download_page(self, query, pagenum):
1882                 """Report attempt to download playlist page with given number."""
1883                 query = query.decode(preferredencoding())
1884                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1885
1886         def _real_initialize(self):
1887                 self._yahoo_ie.initialize()
1888         
1889         def _real_extract(self, query):
1890                 mobj = re.match(self._VALID_QUERY, query)
1891                 if mobj is None:
1892                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1893                         return
1894
1895                 prefix, query = query.split(':')
1896                 prefix = prefix[8:]
1897                 query  = query.encode('utf-8')
1898                 if prefix == '':
1899                         self._download_n_results(query, 1)
1900                         return
1901                 elif prefix == 'all':
1902                         self._download_n_results(query, self._max_yahoo_results)
1903                         return
1904                 else:
1905                         try:
1906                                 n = long(prefix)
1907                                 if n <= 0:
1908                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1909                                         return
1910                                 elif n > self._max_yahoo_results:
1911                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
1912                                         n = self._max_yahoo_results
1913                                 self._download_n_results(query, n)
1914                                 return
1915                         except ValueError: # parsing prefix as integer fails
1916                                 self._download_n_results(query, 1)
1917                                 return
1918
1919         def _download_n_results(self, query, n):
1920                 """Downloads a specified number of results for a query"""
1921
1922                 video_ids = []
1923                 already_seen = set()
1924                 pagenum = 1
1925
1926                 while True:
1927                         self.report_download_page(query, pagenum)
1928                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1929                         request = urllib2.Request(result_url, None, std_headers)
1930                         try:
1931                                 page = urllib2.urlopen(request).read()
1932                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1933                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1934                                 return
1935
1936                         # Extract video identifiers
1937                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1938                                 video_id = mobj.group(1)
1939                                 if video_id not in already_seen:
1940                                         video_ids.append(video_id)
1941                                         already_seen.add(video_id)
1942                                         if len(video_ids) == n:
1943                                                 # Specified n videos reached
1944                                                 for id in video_ids:
1945                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1946                                                 return
1947
1948                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1949                                 for id in video_ids:
1950                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1951                                 return
1952
1953                         pagenum = pagenum + 1
1954
1955 class YoutubePlaylistIE(InfoExtractor):
1956         """Information Extractor for YouTube playlists."""
1957
1958         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1959         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1960         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1961         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1962         _youtube_ie = None
1963
1964         def __init__(self, youtube_ie, downloader=None):
1965                 InfoExtractor.__init__(self, downloader)
1966                 self._youtube_ie = youtube_ie
1967         
1968         @staticmethod
1969         def suitable(url):
1970                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1971
1972         def report_download_page(self, playlist_id, pagenum):
1973                 """Report attempt to download playlist page with given number."""
1974                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1975
1976         def _real_initialize(self):
1977                 self._youtube_ie.initialize()
1978         
1979         def _real_extract(self, url):
1980                 # Extract playlist id
1981                 mobj = re.match(self._VALID_URL, url)
1982                 if mobj is None:
1983                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1984                         return
1985
1986                 # Download playlist pages
1987                 playlist_id = mobj.group(1)
1988                 video_ids = []
1989                 pagenum = 1
1990
1991                 while True:
1992                         self.report_download_page(playlist_id, pagenum)
1993                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1994                         try:
1995                                 page = urllib2.urlopen(request).read()
1996                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1997                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1998                                 return
1999
2000                         # Extract video identifiers
2001                         ids_in_page = []
2002                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2003                                 if mobj.group(1) not in ids_in_page:
2004                                         ids_in_page.append(mobj.group(1))
2005                         video_ids.extend(ids_in_page)
2006
2007                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2008                                 break
2009                         pagenum = pagenum + 1
2010
2011                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2012                 playlistend = self._downloader.params.get('playlistend', -1)
2013                 video_ids = video_ids[playliststart:playlistend]
2014
2015                 for id in video_ids:
2016                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2017                 return
2018
2019 class YoutubeUserIE(InfoExtractor):
2020         """Information Extractor for YouTube users."""
2021
2022         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
2023         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2024         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
2025         _youtube_ie = None
2026
2027         def __init__(self, youtube_ie, downloader=None):
2028                 InfoExtractor.__init__(self, downloader)
2029                 self._youtube_ie = youtube_ie
2030         
2031         @staticmethod
2032         def suitable(url):
2033                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2034
2035         def report_download_page(self, username):
2036                 """Report attempt to download user page."""
2037                 self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username))
2038
2039         def _real_initialize(self):
2040                 self._youtube_ie.initialize()
2041         
2042         def _real_extract(self, url):
2043                 # Extract username
2044                 mobj = re.match(self._VALID_URL, url)
2045                 if mobj is None:
2046                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2047                         return
2048
2049                 # Download user page
2050                 username = mobj.group(1)
2051                 video_ids = []
2052                 pagenum = 1
2053
2054                 self.report_download_page(username)
2055                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
2056                 try:
2057                         page = urllib2.urlopen(request).read()
2058                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2059                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2060                         return
2061
2062                 # Extract video identifiers
2063                 ids_in_page = []
2064
2065                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2066                         if mobj.group(1) not in ids_in_page:
2067                                 ids_in_page.append(mobj.group(1))
2068                 video_ids.extend(ids_in_page)
2069
2070                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2071                 playlistend = self._downloader.params.get('playlistend', -1)
2072                 video_ids = video_ids[playliststart:playlistend]
2073
2074                 for id in video_ids:
2075                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2076                 return
2077
2078 class PostProcessor(object):
2079         """Post Processor class.
2080
2081         PostProcessor objects can be added to downloaders with their
2082         add_post_processor() method. When the downloader has finished a
2083         successful download, it will take its internal chain of PostProcessors
2084         and start calling the run() method on each one of them, first with
2085         an initial argument and then with the returned value of the previous
2086         PostProcessor.
2087
2088         The chain will be stopped if one of them ever returns None or the end
2089         of the chain is reached.
2090
2091         PostProcessor objects follow a "mutual registration" process similar
2092         to InfoExtractor objects.
2093         """
2094
2095         _downloader = None
2096
2097         def __init__(self, downloader=None):
2098                 self._downloader = downloader
2099
2100         def set_downloader(self, downloader):
2101                 """Sets the downloader for this PP."""
2102                 self._downloader = downloader
2103         
2104         def run(self, information):
2105                 """Run the PostProcessor.
2106
2107                 The "information" argument is a dictionary like the ones
2108                 composed by InfoExtractors. The only difference is that this
2109                 one has an extra field called "filepath" that points to the
2110                 downloaded file.
2111
2112                 When this method returns None, the postprocessing chain is
2113                 stopped. However, this method may return an information
2114                 dictionary that will be passed to the next postprocessing
2115                 object in the chain. It can be the one it received after
2116                 changing some fields.
2117
2118                 In addition, this method may raise a PostProcessingError
2119                 exception that will be taken into account by the downloader
2120                 it was called from.
2121                 """
2122                 return information # by default, do nothing
2123         
2124 ### MAIN PROGRAM ###
2125 if __name__ == '__main__':
2126         try:
2127                 # Modules needed only when running the main program
2128                 import getpass
2129                 import optparse
2130
2131                 # Function to update the program file with the latest version from bitbucket.org
2132                 def update_self(downloader, filename):
2133                         # Note: downloader only used for options
2134                         if not os.access (filename, os.W_OK):
2135                                 sys.exit('ERROR: no write permissions on %s' % filename)
2136
2137                         downloader.to_screen('Updating to latest stable version...')
2138                         latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2139                         latest_version = urllib.urlopen(latest_url).read().strip()
2140                         prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2141                         newcontent = urllib.urlopen(prog_url).read()
2142                         stream = open(filename, 'w')
2143                         stream.write(newcontent)
2144                         stream.close()
2145                         downloader.to_screen('Updated to version %s' % latest_version)
2146
2147                 # Parse command line
2148                 parser = optparse.OptionParser(
2149                         usage='Usage: %prog [options] url...',
2150                         version='2010.11.19',
2151                         conflict_handler='resolve',
2152                 )
2153
2154                 parser.add_option('-h', '--help',
2155                                 action='help', help='print this help text and exit')
2156                 parser.add_option('-v', '--version',
2157                                 action='version', help='print program version and exit')
2158                 parser.add_option('-U', '--update',
2159                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2160                 parser.add_option('-i', '--ignore-errors',
2161                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2162                 parser.add_option('-r', '--rate-limit',
2163                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2164                 parser.add_option('-R', '--retries',
2165                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2166                 parser.add_option('--playlist-start',
2167                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2168                 parser.add_option('--playlist-end',
2169                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2170
2171                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2172                 authentication.add_option('-u', '--username',
2173                                 dest='username', metavar='USERNAME', help='account username')
2174                 authentication.add_option('-p', '--password',
2175                                 dest='password', metavar='PASSWORD', help='account password')
2176                 authentication.add_option('-n', '--netrc',
2177                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2178                 parser.add_option_group(authentication)
2179
2180                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2181                 video_format.add_option('-f', '--format',
2182                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2183                 video_format.add_option('-m', '--mobile-version',
2184                                 action='store_const', dest='format', help='alias for -f 17', const='17')
2185                 video_format.add_option('--all-formats',
2186                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2187                 video_format.add_option('--max-quality',
2188                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2189                 video_format.add_option('-b', '--best-quality',
2190                                 action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
2191                 parser.add_option_group(video_format)
2192
2193                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2194                 verbosity.add_option('-q', '--quiet',
2195                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2196                 verbosity.add_option('-s', '--simulate',
2197                                 action='store_true', dest='simulate', help='do not download video', default=False)
2198                 verbosity.add_option('-g', '--get-url',
2199                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2200                 verbosity.add_option('-e', '--get-title',
2201                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2202                 verbosity.add_option('--get-thumbnail',
2203                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2204                 verbosity.add_option('--get-description',
2205                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2206                 verbosity.add_option('--no-progress',
2207                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2208                 parser.add_option_group(verbosity)
2209
2210                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2211                 filesystem.add_option('-t', '--title',
2212                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2213                 filesystem.add_option('-l', '--literal',
2214                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2215                 filesystem.add_option('-A', '--auto-number',
2216                                 action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
2217                 filesystem.add_option('-o', '--output',
2218                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2219                 filesystem.add_option('-a', '--batch-file',
2220                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2221                 filesystem.add_option('-w', '--no-overwrites',
2222                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2223                 filesystem.add_option('-c', '--continue',
2224                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2225                 filesystem.add_option('--cookies',
2226                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2227                 parser.add_option_group(filesystem)
2228
2229                 (opts, args) = parser.parse_args()
2230
2231                 # Open appropriate CookieJar
2232                 if opts.cookiefile is None:
2233                         jar = cookielib.CookieJar()
2234                 else:
2235                         try:
2236                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2237                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2238                                         jar.load()
2239                         except (IOError, OSError), err:
2240                                 sys.exit(u'ERROR: unable to open cookie file')
2241
2242                 # General configuration
2243                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2244                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2245                 urllib2.install_opener(urllib2.build_opener(cookie_processor))
2246                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2247
2248                 # Batch file verification
2249                 batchurls = []
2250                 if opts.batchfile is not None:
2251                         try:
2252                                 if opts.batchfile == '-':
2253                                         batchfd = sys.stdin
2254                                 else:
2255                                         batchfd = open(opts.batchfile, 'r')
2256                                 batchurls = batchfd.readlines()
2257                                 batchurls = [x.strip() for x in batchurls]
2258                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2259                         except IOError:
2260                                 sys.exit(u'ERROR: batch file could not be read')
2261                 all_urls = batchurls + args
2262
2263                 # Conflicting, missing and erroneous options
2264                 if opts.bestquality:
2265                         print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
2266                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2267                         parser.error(u'using .netrc conflicts with giving username/password')
2268                 if opts.password is not None and opts.username is None:
2269                         parser.error(u'account username missing')
2270                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2271                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2272                 if opts.usetitle and opts.useliteral:
2273                         parser.error(u'using title conflicts with using literal title')
2274                 if opts.username is not None and opts.password is None:
2275                         opts.password = getpass.getpass(u'Type account password and press return:')
2276                 if opts.ratelimit is not None:
2277                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2278                         if numeric_limit is None:
2279                                 parser.error(u'invalid rate limit specified')
2280                         opts.ratelimit = numeric_limit
2281                 if opts.retries is not None:
2282                         try:
2283                                 opts.retries = long(opts.retries)
2284                         except (TypeError, ValueError), err:
2285                                 parser.error(u'invalid retry count specified')
2286                 try:
2287                         opts.playliststart = long(opts.playliststart)
2288                         if opts.playliststart <= 0:
2289                                 raise ValueError
2290                 except (TypeError, ValueError), err:
2291                         parser.error(u'invalid playlist start number specified')
2292                 try:
2293                         opts.playlistend = long(opts.playlistend)
2294                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2295                                 raise ValueError
2296                 except (TypeError, ValueError), err:
2297                         parser.error(u'invalid playlist end number specified')
2298
2299                 # Information extractors
2300                 youtube_ie = YoutubeIE()
2301                 metacafe_ie = MetacafeIE(youtube_ie)
2302                 dailymotion_ie = DailymotionIE()
2303                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2304                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2305                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2306                 google_ie = GoogleIE()
2307                 google_search_ie = GoogleSearchIE(google_ie)
2308                 photobucket_ie = PhotobucketIE()
2309                 yahoo_ie = YahooIE()
2310                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2311                 generic_ie = GenericIE()
2312
2313                 # File downloader
2314                 fd = FileDownloader({
2315                         'usenetrc': opts.usenetrc,
2316                         'username': opts.username,
2317                         'password': opts.password,
2318                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2319                         'forceurl': opts.geturl,
2320                         'forcetitle': opts.gettitle,
2321                         'forcethumbnail': opts.getthumbnail,
2322                         'forcedescription': opts.getdescription,
2323                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2324                         'format': opts.format,
2325                         'format_limit': opts.format_limit,
2326                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2327                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2328                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2329                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2330                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2331                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2332                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2333                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2334                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2335                                 or u'%(id)s.%(ext)s'),
2336                         'ignoreerrors': opts.ignoreerrors,
2337                         'ratelimit': opts.ratelimit,
2338                         'nooverwrites': opts.nooverwrites,
2339                         'retries': opts.retries,
2340                         'continuedl': opts.continue_dl,
2341                         'noprogress': opts.noprogress,
2342                         'playliststart': opts.playliststart,
2343                         'playlistend': opts.playlistend,
2344                         'logtostderr': opts.outtmpl == '-',
2345                         })
2346                 fd.add_info_extractor(youtube_search_ie)
2347                 fd.add_info_extractor(youtube_pl_ie)
2348                 fd.add_info_extractor(youtube_user_ie)
2349                 fd.add_info_extractor(metacafe_ie)
2350                 fd.add_info_extractor(dailymotion_ie)
2351                 fd.add_info_extractor(youtube_ie)
2352                 fd.add_info_extractor(google_ie)
2353                 fd.add_info_extractor(google_search_ie)
2354                 fd.add_info_extractor(photobucket_ie)
2355                 fd.add_info_extractor(yahoo_ie)
2356                 fd.add_info_extractor(yahoo_search_ie)
2357
2358                 # This must come last since it's the
2359                 # fallback if none of the others work
2360                 fd.add_info_extractor(generic_ie)
2361
2362                 # Update version
2363                 if opts.update_self:
2364                         update_self(fd, sys.argv[0])
2365
2366                 # Maybe do nothing
2367                 if len(all_urls) < 1:
2368                         if not opts.update_self:
2369                                 parser.error(u'you must provide at least one URL')
2370                         else:
2371                                 sys.exit()
2372                 retcode = fd.download(all_urls)
2373
2374                 # Dump cookie jar if requested
2375                 if opts.cookiefile is not None:
2376                         try:
2377                                 jar.save()
2378                         except (IOError, OSError), err:
2379                                 sys.exit(u'ERROR: unable to save cookie jar')
2380
2381                 sys.exit(retcode)
2382
2383         except DownloadError:
2384                 sys.exit(1)
2385         except SameFileError:
2386                 sys.exit(u'ERROR: fixed output name but more than one file to download')
2387         except KeyboardInterrupt:
2388                 sys.exit(u'\nERROR: Interrupted by user')