Added --console-title to display download progress in console window title.
[youtube-dl.git] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # Author: Vasyl' Vavrychuk
7 # License: Public domain code
8 import cookielib
9 import ctypes
10 import datetime
11 import htmlentitydefs
12 import httplib
13 import locale
14 import math
15 import netrc
16 import os
17 import os.path
18 import re
19 import socket
20 import string
21 import subprocess
22 import sys
23 import time
24 import urllib
25 import urllib2
26
27 # parse_qs was moved from the cgi module to the urlparse module recently.
28 try:
29         from urlparse import parse_qs
30 except ImportError:
31         from cgi import parse_qs
32
33 std_headers = {
34         'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12',
35         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
36         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
37         'Accept-Encoding': 'gzip, deflate',
38         'Accept-Language': 'en-us,en;q=0.5',
39 }
40
41 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
42
43 def preferredencoding():
44         """Get preferred encoding.
45
46         Returns the best encoding scheme for the system, based on
47         locale.getpreferredencoding() and some further tweaks.
48         """
49         def yield_preferredencoding():
50                 try:
51                         pref = locale.getpreferredencoding()
52                         u'TEST'.encode(pref)
53                 except:
54                         pref = 'UTF-8'
55                 while True:
56                         yield pref
57         return yield_preferredencoding().next()
58
59 def htmlentity_transform(matchobj):
60         """Transforms an HTML entity to a Unicode character.
61         
62         This function receives a match object and is intended to be used with
63         the re.sub() function.
64         """
65         entity = matchobj.group(1)
66
67         # Known non-numeric HTML entity
68         if entity in htmlentitydefs.name2codepoint:
69                 return unichr(htmlentitydefs.name2codepoint[entity])
70
71         # Unicode character
72         mobj = re.match(ur'(?u)#(x?\d+)', entity)
73         if mobj is not None:
74                 numstr = mobj.group(1)
75                 if numstr.startswith(u'x'):
76                         base = 16
77                         numstr = u'0%s' % numstr
78                 else:
79                         base = 10
80                 return unichr(long(numstr, base))
81
82         # Unknown entity in name, return its literal representation
83         return (u'&%s;' % entity)
84
85 def sanitize_title(utitle):
86         """Sanitizes a video title so it could be used as part of a filename."""
87         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
88         return utitle.replace(unicode(os.sep), u'%')
89
90 def sanitize_open(filename, open_mode):
91         """Try to open the given filename, and slightly tweak it if this fails.
92
93         Attempts to open the given filename. If this fails, it tries to change
94         the filename slightly, step by step, until it's either able to open it
95         or it fails and raises a final exception, like the standard open()
96         function.
97
98         It returns the tuple (stream, definitive_file_name).
99         """
100         try:
101                 if filename == u'-':
102                         if sys.platform == 'win32':
103                                 import msvcrt
104                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
105                         return (sys.stdout, filename)
106                 stream = open(filename, open_mode)
107                 return (stream, filename)
108         except (IOError, OSError), err:
109                 # In case of error, try to remove win32 forbidden chars
110                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
111
112                 # An exception here should be caught in the caller
113                 stream = open(filename, open_mode)
114                 return (stream, filename)
115
116 class DownloadError(Exception):
117         """Download Error exception.
118         
119         This exception may be thrown by FileDownloader objects if they are not
120         configured to continue on errors. They will contain the appropriate
121         error message.
122         """
123         pass
124
125 class SameFileError(Exception):
126         """Same File exception.
127
128         This exception will be thrown by FileDownloader objects if they detect
129         multiple files would have to be downloaded to the same file on disk.
130         """
131         pass
132
133 class PostProcessingError(Exception):
134         """Post Processing exception.
135
136         This exception may be raised by PostProcessor's .run() method to
137         indicate an error in the postprocessing task.
138         """
139         pass
140
141 class UnavailableVideoError(Exception):
142         """Unavailable Format exception.
143
144         This exception will be thrown when a video is requested
145         in a format that is not available for that video.
146         """
147         pass
148
149 class ContentTooShortError(Exception):
150         """Content Too Short exception.
151
152         This exception may be raised by FileDownloader objects when a file they
153         download is too small for what the server announced first, indicating
154         the connection was probably interrupted.
155         """
156         # Both in bytes
157         downloaded = None
158         expected = None
159
160         def __init__(self, downloaded, expected):
161                 self.downloaded = downloaded
162                 self.expected = expected
163
164 class FileDownloader(object):
165         """File Downloader class.
166
167         File downloader objects are the ones responsible of downloading the
168         actual video file and writing it to disk if the user has requested
169         it, among some other tasks. In most cases there should be one per
170         program. As, given a video URL, the downloader doesn't know how to
171         extract all the needed information, task that InfoExtractors do, it
172         has to pass the URL to one of them.
173
174         For this, file downloader objects have a method that allows
175         InfoExtractors to be registered in a given order. When it is passed
176         a URL, the file downloader handles it to the first InfoExtractor it
177         finds that reports being able to handle it. The InfoExtractor extracts
178         all the information about the video or videos the URL refers to, and
179         asks the FileDownloader to process the video information, possibly
180         downloading the video.
181
182         File downloaders accept a lot of parameters. In order not to saturate
183         the object constructor with arguments, it receives a dictionary of
184         options instead. These options are available through the params
185         attribute for the InfoExtractors to use. The FileDownloader also
186         registers itself as the downloader in charge for the InfoExtractors
187         that are added to it, so this is a "mutual registration".
188
189         Available options:
190
191         username:         Username for authentication purposes.
192         password:         Password for authentication purposes.
193         usenetrc:         Use netrc for authentication instead.
194         quiet:            Do not print messages to stdout.
195         forceurl:         Force printing final URL.
196         forcetitle:       Force printing title.
197         forcethumbnail:   Force printing thumbnail URL.
198         forcedescription: Force printing description.
199         simulate:         Do not download the video files.
200         format:           Video format code.
201         format_limit:     Highest quality format to try.
202         outtmpl:          Template for output names.
203         ignoreerrors:     Do not stop on download errors.
204         ratelimit:        Download speed limit, in bytes/sec.
205         nooverwrites:     Prevent overwriting files.
206         retries:          Number of times to retry for HTTP error 5xx
207         continuedl:       Try to continue downloads if possible.
208         noprogress:       Do not print the progress bar.
209         playliststart:    Playlist item to start at.
210         playlistend:      Playlist item to end at.
211         logtostderr:      Log messages to stderr instead of stdout.
212         consoletitle:     Display progress in console window's titlebar.
213         """
214
215         params = None
216         _ies = []
217         _pps = []
218         _download_retcode = None
219         _num_downloads = None
220         _screen_file = None
221
222         def __init__(self, params):
223                 """Create a FileDownloader object with the given options."""
224                 self._ies = []
225                 self._pps = []
226                 self._download_retcode = 0
227                 self._num_downloads = 0
228                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
229                 self.params = params
230         
231         @staticmethod
232         def pmkdir(filename):
233                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
234                 components = filename.split(os.sep)
235                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
236                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
237                 for dir in aggregate:
238                         if not os.path.exists(dir):
239                                 os.mkdir(dir)
240         
241         @staticmethod
242         def temp_name(filename):
243                 """Returns a temporary filename for the given filename."""
244                 if filename == u'-' or (os.path.exists(filename) and not os.path.isfile(filename)):
245                         return filename
246                 return filename + u'.part'
247         
248         @staticmethod
249         def format_bytes(bytes):
250                 if bytes is None:
251                         return 'N/A'
252                 if type(bytes) is str:
253                         bytes = float(bytes)
254                 if bytes == 0.0:
255                         exponent = 0
256                 else:
257                         exponent = long(math.log(bytes, 1024.0))
258                 suffix = 'bkMGTPEZY'[exponent]
259                 converted = float(bytes) / float(1024**exponent)
260                 return '%.2f%s' % (converted, suffix)
261
262         @staticmethod
263         def calc_percent(byte_counter, data_len):
264                 if data_len is None:
265                         return '---.-%'
266                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
267
268         @staticmethod
269         def calc_eta(start, now, total, current):
270                 if total is None:
271                         return '--:--'
272                 dif = now - start
273                 if current == 0 or dif < 0.001: # One millisecond
274                         return '--:--'
275                 rate = float(current) / dif
276                 eta = long((float(total) - float(current)) / rate)
277                 (eta_mins, eta_secs) = divmod(eta, 60)
278                 if eta_mins > 99:
279                         return '--:--'
280                 return '%02d:%02d' % (eta_mins, eta_secs)
281
282         @staticmethod
283         def calc_speed(start, now, bytes):
284                 dif = now - start
285                 if bytes == 0 or dif < 0.001: # One millisecond
286                         return '%10s' % '---b/s'
287                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
288
289         @staticmethod
290         def best_block_size(elapsed_time, bytes):
291                 new_min = max(bytes / 2.0, 1.0)
292                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
293                 if elapsed_time < 0.001:
294                         return long(new_max)
295                 rate = bytes / elapsed_time
296                 if rate > new_max:
297                         return long(new_max)
298                 if rate < new_min:
299                         return long(new_min)
300                 return long(rate)
301
302         @staticmethod
303         def parse_bytes(bytestr):
304                 """Parse a string indicating a byte quantity into a long integer."""
305                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
306                 if matchobj is None:
307                         return None
308                 number = float(matchobj.group(1))
309                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
310                 return long(round(number * multiplier))
311
312         def add_info_extractor(self, ie):
313                 """Add an InfoExtractor object to the end of the list."""
314                 self._ies.append(ie)
315                 ie.set_downloader(self)
316         
317         def add_post_processor(self, pp):
318                 """Add a PostProcessor object to the end of the chain."""
319                 self._pps.append(pp)
320                 pp.set_downloader(self)
321         
322         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
323                 """Print message to stdout if not in quiet mode."""
324                 try:
325                         if not self.params.get('quiet', False):
326                                 terminator = [u'\n', u''][skip_eol]
327                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
328                         self._screen_file.flush()
329                 except (UnicodeEncodeError), err:
330                         if not ignore_encoding_errors:
331                                 raise
332         
333         def to_stderr(self, message):
334                 """Print message to stderr."""
335                 print >>sys.stderr, message.encode(preferredencoding())
336         
337         def to_cons_title(self, message):
338                 """Set console/terminal window title to message."""
339                 if not self.params.get('consoletitle', False):
340                         return
341                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
342                         # c_wchar_p() might not be necessary if `message` is
343                         # already of type unicode()
344                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
345                 elif 'TERM' in os.environ:
346                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
347
348         def fixed_template(self):
349                 """Checks if the output template is fixed."""
350                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
351
352         def trouble(self, message=None):
353                 """Determine action to take when a download problem appears.
354
355                 Depending on if the downloader has been configured to ignore
356                 download errors or not, this method may throw an exception or
357                 not when errors are found, after printing the message.
358                 """
359                 if message is not None:
360                         self.to_stderr(message)
361                 if not self.params.get('ignoreerrors', False):
362                         raise DownloadError(message)
363                 self._download_retcode = 1
364
365         def slow_down(self, start_time, byte_counter):
366                 """Sleep if the download speed is over the rate limit."""
367                 rate_limit = self.params.get('ratelimit', None)
368                 if rate_limit is None or byte_counter == 0:
369                         return
370                 now = time.time()
371                 elapsed = now - start_time
372                 if elapsed <= 0.0:
373                         return
374                 speed = float(byte_counter) / elapsed
375                 if speed > rate_limit:
376                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
377         
378         def try_rename(self, old_filename, new_filename):
379                 try:
380                         if old_filename == new_filename:
381                                 return
382                         os.rename(old_filename, new_filename)
383                 except (IOError, OSError), err:
384                         self.trouble(u'ERROR: unable to rename file')
385
386         def report_destination(self, filename):
387                 """Report destination filename."""
388                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
389         
390         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
391                 """Report download progress."""
392                 if self.params.get('noprogress', False):
393                         return
394                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
395                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
396                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
397                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
398
399         def report_resuming_byte(self, resume_len):
400                 """Report attempt to resume at given byte."""
401                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
402         
403         def report_retry(self, count, retries):
404                 """Report retry in case of HTTP error 5xx"""
405                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
406         
407         def report_file_already_downloaded(self, file_name):
408                 """Report file has already been fully downloaded."""
409                 try:
410                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
411                 except (UnicodeEncodeError), err:
412                         self.to_screen(u'[download] The file has already been downloaded')
413         
414         def report_unable_to_resume(self):
415                 """Report it was impossible to resume download."""
416                 self.to_screen(u'[download] Unable to resume')
417         
418         def report_finish(self):
419                 """Report download finished."""
420                 if self.params.get('noprogress', False):
421                         self.to_screen(u'[download] Download completed')
422                 else:
423                         self.to_screen(u'')
424         
425         def increment_downloads(self):
426                 """Increment the ordinal that assigns a number to each file."""
427                 self._num_downloads += 1
428
429         def process_info(self, info_dict):
430                 """Process a single dictionary returned by an InfoExtractor."""
431                 # Do nothing else if in simulate mode
432                 if self.params.get('simulate', False):
433                         # Forced printings
434                         if self.params.get('forcetitle', False):
435                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
436                         if self.params.get('forceurl', False):
437                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
438                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
439                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
440                         if self.params.get('forcedescription', False) and 'description' in info_dict:
441                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
442
443                         return
444                         
445                 try:
446                         template_dict = dict(info_dict)
447                         template_dict['epoch'] = unicode(long(time.time()))
448                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
449                         filename = self.params['outtmpl'] % template_dict
450                 except (ValueError, KeyError), err:
451                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
452                         return
453                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
454                         self.to_stderr(u'WARNING: file exists and will be skipped')
455                         return
456
457                 try:
458                         self.pmkdir(filename)
459                 except (OSError, IOError), err:
460                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
461                         return
462
463                 try:
464                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
465                 except (OSError, IOError), err:
466                         raise UnavailableVideoError
467                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
468                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
469                         return
470                 except (ContentTooShortError, ), err:
471                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
472                         return
473
474                 if success:
475                         try:
476                                 self.post_process(filename, info_dict)
477                         except (PostProcessingError), err:
478                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
479                                 return
480
481         def download(self, url_list):
482                 """Download a given list of URLs."""
483                 if len(url_list) > 1 and self.fixed_template():
484                         raise SameFileError(self.params['outtmpl'])
485
486                 for url in url_list:
487                         suitable_found = False
488                         for ie in self._ies:
489                                 # Go to next InfoExtractor if not suitable
490                                 if not ie.suitable(url):
491                                         continue
492
493                                 # Suitable InfoExtractor found
494                                 suitable_found = True
495
496                                 # Extract information from URL and process it
497                                 ie.extract(url)
498
499                                 # Suitable InfoExtractor had been found; go to next URL
500                                 break
501
502                         if not suitable_found:
503                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
504
505                 return self._download_retcode
506
507         def post_process(self, filename, ie_info):
508                 """Run the postprocessing chain on the given file."""
509                 info = dict(ie_info)
510                 info['filepath'] = filename
511                 for pp in self._pps:
512                         info = pp.run(info)
513                         if info is None:
514                                 break
515         
516         def _download_with_rtmpdump(self, filename, url, player_url):
517                 self.report_destination(filename)
518                 tmpfilename = self.temp_name(filename)
519
520                 # Check for rtmpdump first
521                 try:
522                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
523                 except (OSError, IOError):
524                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
525                         return False
526
527                 # Download using rtmpdump. rtmpdump returns exit code 2 when
528                 # the connection was interrumpted and resuming appears to be
529                 # possible. This is part of rtmpdump's normal usage, AFAIK.
530                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
531                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
532                 while retval == 2 or retval == 1:
533                         prevsize = os.path.getsize(tmpfilename)
534                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
535                         time.sleep(5.0) # This seems to be needed
536                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
537                         cursize = os.path.getsize(tmpfilename)
538                         if prevsize == cursize and retval == 1:
539                                 break
540                 if retval == 0:
541                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
542                         self.try_rename(tmpfilename, filename)
543                         return True
544                 else:
545                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
546                         return False
547
548         def _do_download(self, filename, url, player_url):
549                 # Check file already present
550                 if self.params.get('continuedl', False) and os.path.isfile(filename):
551                         self.report_file_already_downloaded(filename)
552                         return True
553
554                 # Attempt to download using rtmpdump
555                 if url.startswith('rtmp'):
556                         return self._download_with_rtmpdump(filename, url, player_url)
557
558                 tmpfilename = self.temp_name(filename)
559                 stream = None
560                 open_mode = 'wb'
561                 basic_request = urllib2.Request(url, None, std_headers)
562                 request = urllib2.Request(url, None, std_headers)
563
564                 # Establish possible resume length
565                 if os.path.isfile(tmpfilename):
566                         resume_len = os.path.getsize(tmpfilename)
567                 else:
568                         resume_len = 0
569
570                 # Request parameters in case of being able to resume
571                 if self.params.get('continuedl', False) and resume_len != 0:
572                         self.report_resuming_byte(resume_len)
573                         request.add_header('Range','bytes=%d-' % resume_len)
574                         open_mode = 'ab'
575
576                 count = 0
577                 retries = self.params.get('retries', 0)
578                 while count <= retries:
579                         # Establish connection
580                         try:
581                                 data = urllib2.urlopen(request)
582                                 break
583                         except (urllib2.HTTPError, ), err:
584                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
585                                         # Unexpected HTTP error
586                                         raise
587                                 elif err.code == 416:
588                                         # Unable to resume (requested range not satisfiable)
589                                         try:
590                                                 # Open the connection again without the range header
591                                                 data = urllib2.urlopen(basic_request)
592                                                 content_length = data.info()['Content-Length']
593                                         except (urllib2.HTTPError, ), err:
594                                                 if err.code < 500 or err.code >= 600:
595                                                         raise
596                                         else:
597                                                 # Examine the reported length
598                                                 if (content_length is not None and
599                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
600                                                         # The file had already been fully downloaded.
601                                                         # Explanation to the above condition: in issue #175 it was revealed that
602                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
603                                                         # changing the file size slightly and causing problems for some users. So
604                                                         # I decided to implement a suggested change and consider the file
605                                                         # completely downloaded if the file size differs less than 100 bytes from
606                                                         # the one in the hard drive.
607                                                         self.report_file_already_downloaded(filename)
608                                                         self.try_rename(tmpfilename, filename)
609                                                         return True
610                                                 else:
611                                                         # The length does not match, we start the download over
612                                                         self.report_unable_to_resume()
613                                                         open_mode = 'wb'
614                                                         break
615                         # Retry
616                         count += 1
617                         if count <= retries:
618                                 self.report_retry(count, retries)
619
620                 if count > retries:
621                         self.trouble(u'ERROR: giving up after %s retries' % retries)
622                         return False
623
624                 data_len = data.info().get('Content-length', None)
625                 if data_len is not None:
626                         data_len = long(data_len) + resume_len
627                 data_len_str = self.format_bytes(data_len)
628                 byte_counter = 0 + resume_len
629                 block_size = 1024
630                 start = time.time()
631                 while True:
632                         # Download and write
633                         before = time.time()
634                         data_block = data.read(block_size)
635                         after = time.time()
636                         if len(data_block) == 0:
637                                 break
638                         byte_counter += len(data_block)
639
640                         # Open file just in time
641                         if stream is None:
642                                 try:
643                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
644                                         self.report_destination(filename)
645                                 except (OSError, IOError), err:
646                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
647                                         return False
648                         try:
649                                 stream.write(data_block)
650                         except (IOError, OSError), err:
651                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
652                                 return False
653                         block_size = self.best_block_size(after - before, len(data_block))
654
655                         # Progress message
656                         percent_str = self.calc_percent(byte_counter, data_len)
657                         eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
658                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
659                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
660
661                         # Apply rate limit
662                         self.slow_down(start, byte_counter - resume_len)
663
664                 stream.close()
665                 self.report_finish()
666                 if data_len is not None and byte_counter != data_len:
667                         raise ContentTooShortError(byte_counter, long(data_len))
668                 self.try_rename(tmpfilename, filename)
669                 return True
670
671 class InfoExtractor(object):
672         """Information Extractor class.
673
674         Information extractors are the classes that, given a URL, extract
675         information from the video (or videos) the URL refers to. This
676         information includes the real video URL, the video title and simplified
677         title, author and others. The information is stored in a dictionary
678         which is then passed to the FileDownloader. The FileDownloader
679         processes this information possibly downloading the video to the file
680         system, among other possible outcomes. The dictionaries must include
681         the following fields:
682
683         id:             Video identifier.
684         url:            Final video URL.
685         uploader:       Nickname of the video uploader.
686         title:          Literal title.
687         stitle:         Simplified title.
688         ext:            Video filename extension.
689         format:         Video format.
690         player_url:     SWF Player URL (may be None).
691
692         The following fields are optional. Their primary purpose is to allow
693         youtube-dl to serve as the backend for a video search function, such
694         as the one in youtube2mp3.  They are only used when their respective
695         forced printing functions are called:
696
697         thumbnail:      Full URL to a video thumbnail image.
698         description:    One-line video description.
699
700         Subclasses of this one should re-define the _real_initialize() and
701         _real_extract() methods, as well as the suitable() static method.
702         Probably, they should also be instantiated and added to the main
703         downloader.
704         """
705
706         _ready = False
707         _downloader = None
708
709         def __init__(self, downloader=None):
710                 """Constructor. Receives an optional downloader."""
711                 self._ready = False
712                 self.set_downloader(downloader)
713
714         @staticmethod
715         def suitable(url):
716                 """Receives a URL and returns True if suitable for this IE."""
717                 return False
718
719         def initialize(self):
720                 """Initializes an instance (authentication, etc)."""
721                 if not self._ready:
722                         self._real_initialize()
723                         self._ready = True
724
725         def extract(self, url):
726                 """Extracts URL information and returns it in list of dicts."""
727                 self.initialize()
728                 return self._real_extract(url)
729
730         def set_downloader(self, downloader):
731                 """Sets the downloader for this IE."""
732                 self._downloader = downloader
733         
734         def _real_initialize(self):
735                 """Real initialization process. Redefine in subclasses."""
736                 pass
737
738         def _real_extract(self, url):
739                 """Real extraction process. Redefine in subclasses."""
740                 pass
741
742 class YoutubeIE(InfoExtractor):
743         """Information extractor for youtube.com."""
744
745         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
746         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
747         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
748         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
749         _NETRC_MACHINE = 'youtube'
750         # Listed in order of quality
751         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
752         _video_extensions = {
753                 '13': '3gp',
754                 '17': 'mp4',
755                 '18': 'mp4',
756                 '22': 'mp4',
757                 '37': 'mp4',
758                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
759                 '43': 'webm',
760                 '45': 'webm',
761         }
762
763         @staticmethod
764         def suitable(url):
765                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
766
767         def report_lang(self):
768                 """Report attempt to set language."""
769                 self._downloader.to_screen(u'[youtube] Setting language')
770
771         def report_login(self):
772                 """Report attempt to log in."""
773                 self._downloader.to_screen(u'[youtube] Logging in')
774         
775         def report_age_confirmation(self):
776                 """Report attempt to confirm age."""
777                 self._downloader.to_screen(u'[youtube] Confirming age')
778         
779         def report_video_webpage_download(self, video_id):
780                 """Report attempt to download video webpage."""
781                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
782         
783         def report_video_info_webpage_download(self, video_id):
784                 """Report attempt to download video info webpage."""
785                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
786         
787         def report_information_extraction(self, video_id):
788                 """Report attempt to extract video information."""
789                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
790         
791         def report_unavailable_format(self, video_id, format):
792                 """Report extracted video URL."""
793                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
794         
795         def report_rtmp_download(self):
796                 """Indicate the download will use the RTMP protocol."""
797                 self._downloader.to_screen(u'[youtube] RTMP download detected')
798         
799         def _real_initialize(self):
800                 if self._downloader is None:
801                         return
802
803                 username = None
804                 password = None
805                 downloader_params = self._downloader.params
806
807                 # Attempt to use provided username and password or .netrc data
808                 if downloader_params.get('username', None) is not None:
809                         username = downloader_params['username']
810                         password = downloader_params['password']
811                 elif downloader_params.get('usenetrc', False):
812                         try:
813                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
814                                 if info is not None:
815                                         username = info[0]
816                                         password = info[2]
817                                 else:
818                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
819                         except (IOError, netrc.NetrcParseError), err:
820                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
821                                 return
822
823                 # Set language
824                 request = urllib2.Request(self._LANG_URL, None, std_headers)
825                 try:
826                         self.report_lang()
827                         urllib2.urlopen(request).read()
828                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
829                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
830                         return
831
832                 # No authentication to be performed
833                 if username is None:
834                         return
835
836                 # Log in
837                 login_form = {
838                                 'current_form': 'loginForm',
839                                 'next':         '/',
840                                 'action_login': 'Log In',
841                                 'username':     username,
842                                 'password':     password,
843                                 }
844                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
845                 try:
846                         self.report_login()
847                         login_results = urllib2.urlopen(request).read()
848                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
849                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
850                                 return
851                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
852                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
853                         return
854         
855                 # Confirm age
856                 age_form = {
857                                 'next_url':             '/',
858                                 'action_confirm':       'Confirm',
859                                 }
860                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
861                 try:
862                         self.report_age_confirmation()
863                         age_results = urllib2.urlopen(request).read()
864                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
865                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
866                         return
867
868         def _real_extract(self, url):
869                 # Extract video id from URL
870                 mobj = re.match(self._VALID_URL, url)
871                 if mobj is None:
872                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
873                         return
874                 video_id = mobj.group(2)
875
876                 # Get video webpage
877                 self.report_video_webpage_download(video_id)
878                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id, None, std_headers)
879                 try:
880                         video_webpage = urllib2.urlopen(request).read()
881                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
882                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
883                         return
884
885                 # Attempt to extract SWF player URL
886                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
887                 if mobj is not None:
888                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
889                 else:
890                         player_url = None
891
892                 # Get video info
893                 self.report_video_info_webpage_download(video_id)
894                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
895                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
896                                            % (video_id, el_type))
897                         request = urllib2.Request(video_info_url, None, std_headers)
898                         try:
899                                 video_info_webpage = urllib2.urlopen(request).read()
900                                 video_info = parse_qs(video_info_webpage)
901                                 if 'token' in video_info:
902                                         break
903                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
904                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
905                                 return
906                 if 'token' not in video_info:
907                         if 'reason' in video_info:
908                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
909                         else:
910                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
911                         return
912
913                 # Start extracting information
914                 self.report_information_extraction(video_id)
915
916                 # uploader
917                 if 'author' not in video_info:
918                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
919                         return
920                 video_uploader = urllib.unquote_plus(video_info['author'][0])
921
922                 # title
923                 if 'title' not in video_info:
924                         self._downloader.trouble(u'ERROR: unable to extract video title')
925                         return
926                 video_title = urllib.unquote_plus(video_info['title'][0])
927                 video_title = video_title.decode('utf-8')
928                 video_title = sanitize_title(video_title)
929
930                 # simplified title
931                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
932                 simple_title = simple_title.strip(ur'_')
933
934                 # thumbnail image
935                 if 'thumbnail_url' not in video_info:
936                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
937                         video_thumbnail = ''
938                 else:   # don't panic if we can't find it
939                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
940
941                 # upload date
942                 upload_date = u'NA'
943                 mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
944                 if mobj is not None:
945                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
946                         format_expressions = ['%d %B %Y', '%B %d %Y']
947                         for expression in format_expressions:
948                                 try:
949                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
950                                 except:
951                                         pass
952
953                 # description
954                 video_description = 'No description available.'
955                 if self._downloader.params.get('forcedescription', False):
956                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
957                         if mobj is not None:
958                                 video_description = mobj.group(1)
959
960                 # token
961                 video_token = urllib.unquote_plus(video_info['token'][0])
962
963                 # Decide which formats to download
964                 req_format = self._downloader.params.get('format', None)
965
966                 if 'fmt_url_map' in video_info:
967                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
968                         format_limit = self._downloader.params.get('format_limit', None)
969                         if format_limit is not None and format_limit in self._available_formats:
970                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
971                         else:
972                                 format_list = self._available_formats
973                         existing_formats = [x for x in format_list if x in url_map]
974                         if len(existing_formats) == 0:
975                                 self._downloader.trouble(u'ERROR: no known formats available for video')
976                                 return
977                         if req_format is None:
978                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
979                         elif req_format == '-1':
980                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
981                         else:
982                                 # Specific format
983                                 if req_format not in url_map:
984                                         self._downloader.trouble(u'ERROR: requested format not available')
985                                         return
986                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
987
988                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
989                         self.report_rtmp_download()
990                         video_url_list = [(None, video_info['conn'][0])]
991
992                 else:
993                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
994                         return
995
996                 for format_param, video_real_url in video_url_list:
997                         # At this point we have a new video
998                         self._downloader.increment_downloads()
999
1000                         # Extension
1001                         video_extension = self._video_extensions.get(format_param, 'flv')
1002
1003                         # Find the video URL in fmt_url_map or conn paramters
1004                         try:
1005                                 # Process video information
1006                                 self._downloader.process_info({
1007                                         'id':           video_id.decode('utf-8'),
1008                                         'url':          video_real_url.decode('utf-8'),
1009                                         'uploader':     video_uploader.decode('utf-8'),
1010                                         'upload_date':  upload_date,
1011                                         'title':        video_title,
1012                                         'stitle':       simple_title,
1013                                         'ext':          video_extension.decode('utf-8'),
1014                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1015                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1016                                         'description':  video_description.decode('utf-8'),
1017                                         'player_url':   player_url,
1018                                 })
1019                         except UnavailableVideoError, err:
1020                                 self._downloader.trouble(u'\nERROR: unable to download video')
1021
1022
1023 class MetacafeIE(InfoExtractor):
1024         """Information Extractor for metacafe.com."""
1025
1026         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1027         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1028         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1029         _youtube_ie = None
1030
1031         def __init__(self, youtube_ie, downloader=None):
1032                 InfoExtractor.__init__(self, downloader)
1033                 self._youtube_ie = youtube_ie
1034
1035         @staticmethod
1036         def suitable(url):
1037                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1038
1039         def report_disclaimer(self):
1040                 """Report disclaimer retrieval."""
1041                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1042
1043         def report_age_confirmation(self):
1044                 """Report attempt to confirm age."""
1045                 self._downloader.to_screen(u'[metacafe] Confirming age')
1046         
1047         def report_download_webpage(self, video_id):
1048                 """Report webpage download."""
1049                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1050         
1051         def report_extraction(self, video_id):
1052                 """Report information extraction."""
1053                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1054
1055         def _real_initialize(self):
1056                 # Retrieve disclaimer
1057                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
1058                 try:
1059                         self.report_disclaimer()
1060                         disclaimer = urllib2.urlopen(request).read()
1061                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1062                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1063                         return
1064
1065                 # Confirm age
1066                 disclaimer_form = {
1067                         'filters': '0',
1068                         'submit': "Continue - I'm over 18",
1069                         }
1070                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
1071                 try:
1072                         self.report_age_confirmation()
1073                         disclaimer = urllib2.urlopen(request).read()
1074                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1075                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1076                         return
1077         
1078         def _real_extract(self, url):
1079                 # Extract id and simplified title from URL
1080                 mobj = re.match(self._VALID_URL, url)
1081                 if mobj is None:
1082                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1083                         return
1084
1085                 video_id = mobj.group(1)
1086
1087                 # Check if video comes from YouTube
1088                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1089                 if mobj2 is not None:
1090                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1091                         return
1092
1093                 # At this point we have a new video
1094                 self._downloader.increment_downloads()
1095
1096                 simple_title = mobj.group(2).decode('utf-8')
1097
1098                 # Retrieve video webpage to extract further information
1099                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1100                 try:
1101                         self.report_download_webpage(video_id)
1102                         webpage = urllib2.urlopen(request).read()
1103                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1104                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1105                         return
1106
1107                 # Extract URL, uploader and title from webpage
1108                 self.report_extraction(video_id)
1109                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1110                 if mobj is not None:
1111                         mediaURL = urllib.unquote(mobj.group(1))
1112                         video_extension = mediaURL[-3:]
1113                         
1114                         # Extract gdaKey if available
1115                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1116                         if mobj is None:
1117                                 video_url = mediaURL
1118                         else:
1119                                 gdaKey = mobj.group(1)
1120                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1121                 else:
1122                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1123                         if mobj is None:
1124                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1125                                 return
1126                         vardict = parse_qs(mobj.group(1))
1127                         if 'mediaData' not in vardict:
1128                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1129                                 return
1130                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1131                         if mobj is None:
1132                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1133                                 return
1134                         mediaURL = mobj.group(1).replace('\\/', '/')
1135                         video_extension = mediaURL[-3:]
1136                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1137
1138                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1139                 if mobj is None:
1140                         self._downloader.trouble(u'ERROR: unable to extract title')
1141                         return
1142                 video_title = mobj.group(1).decode('utf-8')
1143                 video_title = sanitize_title(video_title)
1144
1145                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1146                 if mobj is None:
1147                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1148                         return
1149                 video_uploader = mobj.group(1)
1150
1151                 try:
1152                         # Process video information
1153                         self._downloader.process_info({
1154                                 'id':           video_id.decode('utf-8'),
1155                                 'url':          video_url.decode('utf-8'),
1156                                 'uploader':     video_uploader.decode('utf-8'),
1157                                 'upload_date':  u'NA',
1158                                 'title':        video_title,
1159                                 'stitle':       simple_title,
1160                                 'ext':          video_extension.decode('utf-8'),
1161                                 'format':       u'NA',
1162                                 'player_url':   None,
1163                         })
1164                 except UnavailableVideoError:
1165                         self._downloader.trouble(u'\nERROR: unable to download video')
1166
1167
1168 class DailymotionIE(InfoExtractor):
1169         """Information Extractor for Dailymotion"""
1170
1171         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1172
1173         def __init__(self, downloader=None):
1174                 InfoExtractor.__init__(self, downloader)
1175
1176         @staticmethod
1177         def suitable(url):
1178                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1179
1180         def report_download_webpage(self, video_id):
1181                 """Report webpage download."""
1182                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1183         
1184         def report_extraction(self, video_id):
1185                 """Report information extraction."""
1186                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1187
1188         def _real_initialize(self):
1189                 return
1190
1191         def _real_extract(self, url):
1192                 # Extract id and simplified title from URL
1193                 mobj = re.match(self._VALID_URL, url)
1194                 if mobj is None:
1195                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1196                         return
1197
1198                 # At this point we have a new video
1199                 self._downloader.increment_downloads()
1200                 video_id = mobj.group(1)
1201
1202                 simple_title = mobj.group(2).decode('utf-8')
1203                 video_extension = 'flv'
1204
1205                 # Retrieve video webpage to extract further information
1206                 request = urllib2.Request(url)
1207                 try:
1208                         self.report_download_webpage(video_id)
1209                         webpage = urllib2.urlopen(request).read()
1210                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1211                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1212                         return
1213
1214                 # Extract URL, uploader and title from webpage
1215                 self.report_extraction(video_id)
1216                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1217                 if mobj is None:
1218                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1219                         return
1220                 mediaURL = urllib.unquote(mobj.group(1))
1221
1222                 # if needed add http://www.dailymotion.com/ if relative URL
1223
1224                 video_url = mediaURL
1225
1226                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1227                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1228                 if mobj is None:
1229                         self._downloader.trouble(u'ERROR: unable to extract title')
1230                         return
1231                 video_title = mobj.group(1).decode('utf-8')
1232                 video_title = sanitize_title(video_title)
1233
1234                 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
1235                 if mobj is None:
1236                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1237                         return
1238                 video_uploader = mobj.group(1)
1239
1240                 try:
1241                         # Process video information
1242                         self._downloader.process_info({
1243                                 'id':           video_id.decode('utf-8'),
1244                                 'url':          video_url.decode('utf-8'),
1245                                 'uploader':     video_uploader.decode('utf-8'),
1246                                 'upload_date':  u'NA',
1247                                 'title':        video_title,
1248                                 'stitle':       simple_title,
1249                                 'ext':          video_extension.decode('utf-8'),
1250                                 'format':       u'NA',
1251                                 'player_url':   None,
1252                         })
1253                 except UnavailableVideoError:
1254                         self._downloader.trouble(u'\nERROR: unable to download video')
1255
1256 class GoogleIE(InfoExtractor):
1257         """Information extractor for video.google.com."""
1258
1259         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1260
1261         def __init__(self, downloader=None):
1262                 InfoExtractor.__init__(self, downloader)
1263
1264         @staticmethod
1265         def suitable(url):
1266                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1267
1268         def report_download_webpage(self, video_id):
1269                 """Report webpage download."""
1270                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1271
1272         def report_extraction(self, video_id):
1273                 """Report information extraction."""
1274                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1275
1276         def _real_initialize(self):
1277                 return
1278
1279         def _real_extract(self, url):
1280                 # Extract id from URL
1281                 mobj = re.match(self._VALID_URL, url)
1282                 if mobj is None:
1283                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1284                         return
1285
1286                 # At this point we have a new video
1287                 self._downloader.increment_downloads()
1288                 video_id = mobj.group(1)
1289
1290                 video_extension = 'mp4'
1291
1292                 # Retrieve video webpage to extract further information
1293                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1294                 try:
1295                         self.report_download_webpage(video_id)
1296                         webpage = urllib2.urlopen(request).read()
1297                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1298                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1299                         return
1300
1301                 # Extract URL, uploader, and title from webpage
1302                 self.report_extraction(video_id)
1303                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1304                 if mobj is None:
1305                         video_extension = 'flv'
1306                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1307                 if mobj is None:
1308                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1309                         return
1310                 mediaURL = urllib.unquote(mobj.group(1))
1311                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1312                 mediaURL = mediaURL.replace('\\x26', '\x26')
1313
1314                 video_url = mediaURL
1315
1316                 mobj = re.search(r'<title>(.*)</title>', webpage)
1317                 if mobj is None:
1318                         self._downloader.trouble(u'ERROR: unable to extract title')
1319                         return
1320                 video_title = mobj.group(1).decode('utf-8')
1321                 video_title = sanitize_title(video_title)
1322                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1323
1324                 # Extract video description
1325                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1326                 if mobj is None:
1327                         self._downloader.trouble(u'ERROR: unable to extract video description')
1328                         return
1329                 video_description = mobj.group(1).decode('utf-8')
1330                 if not video_description:
1331                         video_description = 'No description available.'
1332
1333                 # Extract video thumbnail
1334                 if self._downloader.params.get('forcethumbnail', False):
1335                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1336                         try:
1337                                 webpage = urllib2.urlopen(request).read()
1338                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1339                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1340                                 return
1341                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1342                         if mobj is None:
1343                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1344                                 return
1345                         video_thumbnail = mobj.group(1)
1346                 else:   # we need something to pass to process_info
1347                         video_thumbnail = ''
1348
1349
1350                 try:
1351                         # Process video information
1352                         self._downloader.process_info({
1353                                 'id':           video_id.decode('utf-8'),
1354                                 'url':          video_url.decode('utf-8'),
1355                                 'uploader':     u'NA',
1356                                 'upload_date':  u'NA',
1357                                 'title':        video_title,
1358                                 'stitle':       simple_title,
1359                                 'ext':          video_extension.decode('utf-8'),
1360                                 'format':       u'NA',
1361                                 'player_url':   None,
1362                         })
1363                 except UnavailableVideoError:
1364                         self._downloader.trouble(u'\nERROR: unable to download video')
1365
1366
1367 class PhotobucketIE(InfoExtractor):
1368         """Information extractor for photobucket.com."""
1369
1370         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1371
1372         def __init__(self, downloader=None):
1373                 InfoExtractor.__init__(self, downloader)
1374
1375         @staticmethod
1376         def suitable(url):
1377                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1378
1379         def report_download_webpage(self, video_id):
1380                 """Report webpage download."""
1381                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1382
1383         def report_extraction(self, video_id):
1384                 """Report information extraction."""
1385                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1386
1387         def _real_initialize(self):
1388                 return
1389
1390         def _real_extract(self, url):
1391                 # Extract id from URL
1392                 mobj = re.match(self._VALID_URL, url)
1393                 if mobj is None:
1394                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1395                         return
1396
1397                 # At this point we have a new video
1398                 self._downloader.increment_downloads()
1399                 video_id = mobj.group(1)
1400
1401                 video_extension = 'flv'
1402
1403                 # Retrieve video webpage to extract further information
1404                 request = urllib2.Request(url)
1405                 try:
1406                         self.report_download_webpage(video_id)
1407                         webpage = urllib2.urlopen(request).read()
1408                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1409                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1410                         return
1411
1412                 # Extract URL, uploader, and title from webpage
1413                 self.report_extraction(video_id)
1414                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1415                 if mobj is None:
1416                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1417                         return
1418                 mediaURL = urllib.unquote(mobj.group(1))
1419
1420                 video_url = mediaURL
1421
1422                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1423                 if mobj is None:
1424                         self._downloader.trouble(u'ERROR: unable to extract title')
1425                         return
1426                 video_title = mobj.group(1).decode('utf-8')
1427                 video_title = sanitize_title(video_title)
1428                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1429
1430                 video_uploader = mobj.group(2).decode('utf-8')
1431
1432                 try:
1433                         # Process video information
1434                         self._downloader.process_info({
1435                                 'id':           video_id.decode('utf-8'),
1436                                 'url':          video_url.decode('utf-8'),
1437                                 'uploader':     video_uploader,
1438                                 'upload_date':  u'NA',
1439                                 'title':        video_title,
1440                                 'stitle':       simple_title,
1441                                 'ext':          video_extension.decode('utf-8'),
1442                                 'format':       u'NA',
1443                                 'player_url':   None,
1444                         })
1445                 except UnavailableVideoError:
1446                         self._downloader.trouble(u'\nERROR: unable to download video')
1447
1448
1449 class YahooIE(InfoExtractor):
1450         """Information extractor for video.yahoo.com."""
1451
1452         # _VALID_URL matches all Yahoo! Video URLs
1453         # _VPAGE_URL matches only the extractable '/watch/' URLs
1454         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1455         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1456
1457         def __init__(self, downloader=None):
1458                 InfoExtractor.__init__(self, downloader)
1459
1460         @staticmethod
1461         def suitable(url):
1462                 return (re.match(YahooIE._VALID_URL, url) is not None)
1463
1464         def report_download_webpage(self, video_id):
1465                 """Report webpage download."""
1466                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1467
1468         def report_extraction(self, video_id):
1469                 """Report information extraction."""
1470                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1471
1472         def _real_initialize(self):
1473                 return
1474
1475         def _real_extract(self, url, new_video=True):
1476                 # Extract ID from URL
1477                 mobj = re.match(self._VALID_URL, url)
1478                 if mobj is None:
1479                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1480                         return
1481
1482                 # At this point we have a new video
1483                 self._downloader.increment_downloads()
1484                 video_id = mobj.group(2)
1485                 video_extension = 'flv'
1486
1487                 # Rewrite valid but non-extractable URLs as
1488                 # extractable English language /watch/ URLs
1489                 if re.match(self._VPAGE_URL, url) is None:
1490                         request = urllib2.Request(url)
1491                         try:
1492                                 webpage = urllib2.urlopen(request).read()
1493                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1494                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1495                                 return
1496
1497                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1498                         if mobj is None:
1499                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1500                                 return
1501                         yahoo_id = mobj.group(1)
1502
1503                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1504                         if mobj is None:
1505                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1506                                 return
1507                         yahoo_vid = mobj.group(1)
1508
1509                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1510                         return self._real_extract(url, new_video=False)
1511
1512                 # Retrieve video webpage to extract further information
1513                 request = urllib2.Request(url)
1514                 try:
1515                         self.report_download_webpage(video_id)
1516                         webpage = urllib2.urlopen(request).read()
1517                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1518                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1519                         return
1520
1521                 # Extract uploader and title from webpage
1522                 self.report_extraction(video_id)
1523                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1524                 if mobj is None:
1525                         self._downloader.trouble(u'ERROR: unable to extract video title')
1526                         return
1527                 video_title = mobj.group(1).decode('utf-8')
1528                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1529
1530                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1531                 if mobj is None:
1532                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1533                         return
1534                 video_uploader = mobj.group(1).decode('utf-8')
1535
1536                 # Extract video thumbnail
1537                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1538                 if mobj is None:
1539                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1540                         return
1541                 video_thumbnail = mobj.group(1).decode('utf-8')
1542
1543                 # Extract video description
1544                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1545                 if mobj is None:
1546                         self._downloader.trouble(u'ERROR: unable to extract video description')
1547                         return
1548                 video_description = mobj.group(1).decode('utf-8')
1549                 if not video_description: video_description = 'No description available.'
1550
1551                 # Extract video height and width
1552                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1553                 if mobj is None:
1554                         self._downloader.trouble(u'ERROR: unable to extract video height')
1555                         return
1556                 yv_video_height = mobj.group(1)
1557
1558                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1559                 if mobj is None:
1560                         self._downloader.trouble(u'ERROR: unable to extract video width')
1561                         return
1562                 yv_video_width = mobj.group(1)
1563
1564                 # Retrieve video playlist to extract media URL
1565                 # I'm not completely sure what all these options are, but we
1566                 # seem to need most of them, otherwise the server sends a 401.
1567                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1568                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1569                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1570                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1571                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1572                 try:
1573                         self.report_download_webpage(video_id)
1574                         webpage = urllib2.urlopen(request).read()
1575                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1576                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1577                         return
1578
1579                 # Extract media URL from playlist XML
1580                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1581                 if mobj is None:
1582                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1583                         return
1584                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1585                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1586
1587                 try:
1588                         # Process video information
1589                         self._downloader.process_info({
1590                                 'id':           video_id.decode('utf-8'),
1591                                 'url':          video_url,
1592                                 'uploader':     video_uploader,
1593                                 'upload_date':  u'NA',
1594                                 'title':        video_title,
1595                                 'stitle':       simple_title,
1596                                 'ext':          video_extension.decode('utf-8'),
1597                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1598                                 'description':  video_description,
1599                                 'thumbnail':    video_thumbnail,
1600                                 'description':  video_description,
1601                                 'player_url':   None,
1602                         })
1603                 except UnavailableVideoError:
1604                         self._downloader.trouble(u'\nERROR: unable to download video')
1605
1606
1607 class GenericIE(InfoExtractor):
1608         """Generic last-resort information extractor."""
1609
1610         def __init__(self, downloader=None):
1611                 InfoExtractor.__init__(self, downloader)
1612
1613         @staticmethod
1614         def suitable(url):
1615                 return True
1616
1617         def report_download_webpage(self, video_id):
1618                 """Report webpage download."""
1619                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1620                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1621
1622         def report_extraction(self, video_id):
1623                 """Report information extraction."""
1624                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1625
1626         def _real_initialize(self):
1627                 return
1628
1629         def _real_extract(self, url):
1630                 # At this point we have a new video
1631                 self._downloader.increment_downloads()
1632
1633                 video_id = url.split('/')[-1]
1634                 request = urllib2.Request(url)
1635                 try:
1636                         self.report_download_webpage(video_id)
1637                         webpage = urllib2.urlopen(request).read()
1638                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1639                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1640                         return
1641                 except ValueError, err:
1642                         # since this is the last-resort InfoExtractor, if
1643                         # this error is thrown, it'll be thrown here
1644                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1645                         return
1646
1647                 self.report_extraction(video_id)
1648                 # Start with something easy: JW Player in SWFObject
1649                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1650                 if mobj is None:
1651                         # Broaden the search a little bit
1652                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1653                 if mobj is None:
1654                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1655                         return
1656
1657                 # It's possible that one of the regexes
1658                 # matched, but returned an empty group:
1659                 if mobj.group(1) is None:
1660                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1661                         return
1662
1663                 video_url = urllib.unquote(mobj.group(1))
1664                 video_id  = os.path.basename(video_url)
1665
1666                 # here's a fun little line of code for you:
1667                 video_extension = os.path.splitext(video_id)[1][1:]
1668                 video_id        = os.path.splitext(video_id)[0]
1669
1670                 # it's tempting to parse this further, but you would
1671                 # have to take into account all the variations like
1672                 #   Video Title - Site Name
1673                 #   Site Name | Video Title
1674                 #   Video Title - Tagline | Site Name
1675                 # and so on and so forth; it's just not practical
1676                 mobj = re.search(r'<title>(.*)</title>', webpage)
1677                 if mobj is None:
1678                         self._downloader.trouble(u'ERROR: unable to extract title')
1679                         return
1680                 video_title = mobj.group(1).decode('utf-8')
1681                 video_title = sanitize_title(video_title)
1682                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1683
1684                 # video uploader is domain name
1685                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1686                 if mobj is None:
1687                         self._downloader.trouble(u'ERROR: unable to extract title')
1688                         return
1689                 video_uploader = mobj.group(1).decode('utf-8')
1690
1691                 try:
1692                         # Process video information
1693                         self._downloader.process_info({
1694                                 'id':           video_id.decode('utf-8'),
1695                                 'url':          video_url.decode('utf-8'),
1696                                 'uploader':     video_uploader,
1697                                 'upload_date':  u'NA',
1698                                 'title':        video_title,
1699                                 'stitle':       simple_title,
1700                                 'ext':          video_extension.decode('utf-8'),
1701                                 'format':       u'NA',
1702                                 'player_url':   None,
1703                         })
1704                 except UnavailableVideoError, err:
1705                         self._downloader.trouble(u'\nERROR: unable to download video')
1706
1707
1708 class YoutubeSearchIE(InfoExtractor):
1709         """Information Extractor for YouTube search queries."""
1710         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1711         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1712         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1713         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1714         _youtube_ie = None
1715         _max_youtube_results = 1000
1716
1717         def __init__(self, youtube_ie, downloader=None):
1718                 InfoExtractor.__init__(self, downloader)
1719                 self._youtube_ie = youtube_ie
1720         
1721         @staticmethod
1722         def suitable(url):
1723                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1724
1725         def report_download_page(self, query, pagenum):
1726                 """Report attempt to download playlist page with given number."""
1727                 query = query.decode(preferredencoding())
1728                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1729
1730         def _real_initialize(self):
1731                 self._youtube_ie.initialize()
1732         
1733         def _real_extract(self, query):
1734                 mobj = re.match(self._VALID_QUERY, query)
1735                 if mobj is None:
1736                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1737                         return
1738
1739                 prefix, query = query.split(':')
1740                 prefix = prefix[8:]
1741                 query  = query.encode('utf-8')
1742                 if prefix == '':
1743                         self._download_n_results(query, 1)
1744                         return
1745                 elif prefix == 'all':
1746                         self._download_n_results(query, self._max_youtube_results)
1747                         return
1748                 else:
1749                         try:
1750                                 n = long(prefix)
1751                                 if n <= 0:
1752                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1753                                         return
1754                                 elif n > self._max_youtube_results:
1755                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1756                                         n = self._max_youtube_results
1757                                 self._download_n_results(query, n)
1758                                 return
1759                         except ValueError: # parsing prefix as integer fails
1760                                 self._download_n_results(query, 1)
1761                                 return
1762
1763         def _download_n_results(self, query, n):
1764                 """Downloads a specified number of results for a query"""
1765
1766                 video_ids = []
1767                 already_seen = set()
1768                 pagenum = 1
1769
1770                 while True:
1771                         self.report_download_page(query, pagenum)
1772                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1773                         request = urllib2.Request(result_url, None, std_headers)
1774                         try:
1775                                 page = urllib2.urlopen(request).read()
1776                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1777                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1778                                 return
1779
1780                         # Extract video identifiers
1781                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1782                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1783                                 if video_id not in already_seen:
1784                                         video_ids.append(video_id)
1785                                         already_seen.add(video_id)
1786                                         if len(video_ids) == n:
1787                                                 # Specified n videos reached
1788                                                 for id in video_ids:
1789                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1790                                                 return
1791
1792                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1793                                 for id in video_ids:
1794                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1795                                 return
1796
1797                         pagenum = pagenum + 1
1798
1799 class GoogleSearchIE(InfoExtractor):
1800         """Information Extractor for Google Video search queries."""
1801         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1802         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1803         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1804         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1805         _google_ie = None
1806         _max_google_results = 1000
1807
1808         def __init__(self, google_ie, downloader=None):
1809                 InfoExtractor.__init__(self, downloader)
1810                 self._google_ie = google_ie
1811         
1812         @staticmethod
1813         def suitable(url):
1814                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1815
1816         def report_download_page(self, query, pagenum):
1817                 """Report attempt to download playlist page with given number."""
1818                 query = query.decode(preferredencoding())
1819                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1820
1821         def _real_initialize(self):
1822                 self._google_ie.initialize()
1823         
1824         def _real_extract(self, query):
1825                 mobj = re.match(self._VALID_QUERY, query)
1826                 if mobj is None:
1827                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1828                         return
1829
1830                 prefix, query = query.split(':')
1831                 prefix = prefix[8:]
1832                 query  = query.encode('utf-8')
1833                 if prefix == '':
1834                         self._download_n_results(query, 1)
1835                         return
1836                 elif prefix == 'all':
1837                         self._download_n_results(query, self._max_google_results)
1838                         return
1839                 else:
1840                         try:
1841                                 n = long(prefix)
1842                                 if n <= 0:
1843                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1844                                         return
1845                                 elif n > self._max_google_results:
1846                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1847                                         n = self._max_google_results
1848                                 self._download_n_results(query, n)
1849                                 return
1850                         except ValueError: # parsing prefix as integer fails
1851                                 self._download_n_results(query, 1)
1852                                 return
1853
1854         def _download_n_results(self, query, n):
1855                 """Downloads a specified number of results for a query"""
1856
1857                 video_ids = []
1858                 already_seen = set()
1859                 pagenum = 1
1860
1861                 while True:
1862                         self.report_download_page(query, pagenum)
1863                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1864                         request = urllib2.Request(result_url, None, std_headers)
1865                         try:
1866                                 page = urllib2.urlopen(request).read()
1867                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1868                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1869                                 return
1870
1871                         # Extract video identifiers
1872                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1873                                 video_id = mobj.group(1)
1874                                 if video_id not in already_seen:
1875                                         video_ids.append(video_id)
1876                                         already_seen.add(video_id)
1877                                         if len(video_ids) == n:
1878                                                 # Specified n videos reached
1879                                                 for id in video_ids:
1880                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1881                                                 return
1882
1883                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1884                                 for id in video_ids:
1885                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1886                                 return
1887
1888                         pagenum = pagenum + 1
1889
1890 class YahooSearchIE(InfoExtractor):
1891         """Information Extractor for Yahoo! Video search queries."""
1892         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1893         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1894         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1895         _MORE_PAGES_INDICATOR = r'\s*Next'
1896         _yahoo_ie = None
1897         _max_yahoo_results = 1000
1898
1899         def __init__(self, yahoo_ie, downloader=None):
1900                 InfoExtractor.__init__(self, downloader)
1901                 self._yahoo_ie = yahoo_ie
1902         
1903         @staticmethod
1904         def suitable(url):
1905                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1906
1907         def report_download_page(self, query, pagenum):
1908                 """Report attempt to download playlist page with given number."""
1909                 query = query.decode(preferredencoding())
1910                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1911
1912         def _real_initialize(self):
1913                 self._yahoo_ie.initialize()
1914         
1915         def _real_extract(self, query):
1916                 mobj = re.match(self._VALID_QUERY, query)
1917                 if mobj is None:
1918                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1919                         return
1920
1921                 prefix, query = query.split(':')
1922                 prefix = prefix[8:]
1923                 query  = query.encode('utf-8')
1924                 if prefix == '':
1925                         self._download_n_results(query, 1)
1926                         return
1927                 elif prefix == 'all':
1928                         self._download_n_results(query, self._max_yahoo_results)
1929                         return
1930                 else:
1931                         try:
1932                                 n = long(prefix)
1933                                 if n <= 0:
1934                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1935                                         return
1936                                 elif n > self._max_yahoo_results:
1937                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
1938                                         n = self._max_yahoo_results
1939                                 self._download_n_results(query, n)
1940                                 return
1941                         except ValueError: # parsing prefix as integer fails
1942                                 self._download_n_results(query, 1)
1943                                 return
1944
1945         def _download_n_results(self, query, n):
1946                 """Downloads a specified number of results for a query"""
1947
1948                 video_ids = []
1949                 already_seen = set()
1950                 pagenum = 1
1951
1952                 while True:
1953                         self.report_download_page(query, pagenum)
1954                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1955                         request = urllib2.Request(result_url, None, std_headers)
1956                         try:
1957                                 page = urllib2.urlopen(request).read()
1958                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1959                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1960                                 return
1961
1962                         # Extract video identifiers
1963                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1964                                 video_id = mobj.group(1)
1965                                 if video_id not in already_seen:
1966                                         video_ids.append(video_id)
1967                                         already_seen.add(video_id)
1968                                         if len(video_ids) == n:
1969                                                 # Specified n videos reached
1970                                                 for id in video_ids:
1971                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1972                                                 return
1973
1974                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1975                                 for id in video_ids:
1976                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1977                                 return
1978
1979                         pagenum = pagenum + 1
1980
1981 class YoutubePlaylistIE(InfoExtractor):
1982         """Information Extractor for YouTube playlists."""
1983
1984         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1985         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1986         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1987         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1988         _youtube_ie = None
1989
1990         def __init__(self, youtube_ie, downloader=None):
1991                 InfoExtractor.__init__(self, downloader)
1992                 self._youtube_ie = youtube_ie
1993         
1994         @staticmethod
1995         def suitable(url):
1996                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1997
1998         def report_download_page(self, playlist_id, pagenum):
1999                 """Report attempt to download playlist page with given number."""
2000                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2001
2002         def _real_initialize(self):
2003                 self._youtube_ie.initialize()
2004         
2005         def _real_extract(self, url):
2006                 # Extract playlist id
2007                 mobj = re.match(self._VALID_URL, url)
2008                 if mobj is None:
2009                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2010                         return
2011
2012                 # Download playlist pages
2013                 playlist_id = mobj.group(1)
2014                 video_ids = []
2015                 pagenum = 1
2016
2017                 while True:
2018                         self.report_download_page(playlist_id, pagenum)
2019                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
2020                         try:
2021                                 page = urllib2.urlopen(request).read()
2022                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2023                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2024                                 return
2025
2026                         # Extract video identifiers
2027                         ids_in_page = []
2028                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2029                                 if mobj.group(1) not in ids_in_page:
2030                                         ids_in_page.append(mobj.group(1))
2031                         video_ids.extend(ids_in_page)
2032
2033                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2034                                 break
2035                         pagenum = pagenum + 1
2036
2037                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2038                 playlistend = self._downloader.params.get('playlistend', -1)
2039                 video_ids = video_ids[playliststart:playlistend]
2040
2041                 for id in video_ids:
2042                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2043                 return
2044
2045 class YoutubeUserIE(InfoExtractor):
2046         """Information Extractor for YouTube users."""
2047
2048         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
2049         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2050         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
2051         _youtube_ie = None
2052
2053         def __init__(self, youtube_ie, downloader=None):
2054                 InfoExtractor.__init__(self, downloader)
2055                 self._youtube_ie = youtube_ie
2056         
2057         @staticmethod
2058         def suitable(url):
2059                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2060
2061         def report_download_page(self, username):
2062                 """Report attempt to download user page."""
2063                 self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username))
2064
2065         def _real_initialize(self):
2066                 self._youtube_ie.initialize()
2067         
2068         def _real_extract(self, url):
2069                 # Extract username
2070                 mobj = re.match(self._VALID_URL, url)
2071                 if mobj is None:
2072                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2073                         return
2074
2075                 # Download user page
2076                 username = mobj.group(1)
2077                 video_ids = []
2078                 pagenum = 1
2079
2080                 self.report_download_page(username)
2081                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
2082                 try:
2083                         page = urllib2.urlopen(request).read()
2084                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2085                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2086                         return
2087
2088                 # Extract video identifiers
2089                 ids_in_page = []
2090
2091                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2092                         if mobj.group(1) not in ids_in_page:
2093                                 ids_in_page.append(mobj.group(1))
2094                 video_ids.extend(ids_in_page)
2095
2096                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2097                 playlistend = self._downloader.params.get('playlistend', -1)
2098                 video_ids = video_ids[playliststart:playlistend]
2099
2100                 for id in video_ids:
2101                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2102                 return
2103
2104 class DepositFilesIE(InfoExtractor):
2105         """Information extractor for depositfiles.com"""
2106
2107         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2108
2109         def __init__(self, downloader=None):
2110                 InfoExtractor.__init__(self, downloader)
2111
2112         @staticmethod
2113         def suitable(url):
2114                 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2115
2116         def report_download_webpage(self, file_id):
2117                 """Report webpage download."""
2118                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2119
2120         def report_extraction(self, file_id):
2121                 """Report information extraction."""
2122                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2123
2124         def _real_initialize(self):
2125                 return
2126
2127         def _real_extract(self, url):
2128                 # At this point we have a new file
2129                 self._downloader.increment_downloads()
2130
2131                 file_id = url.split('/')[-1]
2132                 # Rebuild url in english locale
2133                 url = 'http://depositfiles.com/en/files/' + file_id
2134
2135                 # Retrieve file webpage with 'Free download' button pressed
2136                 free_download_indication = { 'gateway_result' : '1' }
2137                 request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers)
2138                 try:
2139                         self.report_download_webpage(file_id)
2140                         webpage = urllib2.urlopen(request).read()
2141                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2142                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2143                         return
2144
2145                 # Search for the real file URL
2146                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2147                 if (mobj is None) or (mobj.group(1) is None):
2148                         # Try to figure out reason of the error.
2149                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2150                         if (mobj is not None) and (mobj.group(1) is not None):
2151                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2152                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2153                         else:
2154                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2155                         return
2156
2157                 file_url = mobj.group(1)
2158                 file_extension = os.path.splitext(file_url)[1][1:]
2159
2160                 # Search for file title
2161                 mobj = re.search(r'<b title="(.*?)">', webpage)
2162                 if mobj is None:
2163                         self._downloader.trouble(u'ERROR: unable to extract title')
2164                         return
2165                 file_title = mobj.group(1).decode('utf-8')
2166
2167                 try:
2168                         # Process file information
2169                         self._downloader.process_info({
2170                                 'id':           file_id.decode('utf-8'),
2171                                 'url':          file_url.decode('utf-8'),
2172                                 'uploader':     u'NA',
2173                                 'upload_date':  u'NA',
2174                                 'title':        file_title,
2175                                 'stitle':       file_title,
2176                                 'ext':          file_extension.decode('utf-8'),
2177                                 'format':       u'NA',
2178                                 'player_url':   None,
2179                         })
2180                 except UnavailableVideoError, err:
2181                         self._downloader.trouble(u'ERROR: unable to download file')
2182
2183 class PostProcessor(object):
2184         """Post Processor class.
2185
2186         PostProcessor objects can be added to downloaders with their
2187         add_post_processor() method. When the downloader has finished a
2188         successful download, it will take its internal chain of PostProcessors
2189         and start calling the run() method on each one of them, first with
2190         an initial argument and then with the returned value of the previous
2191         PostProcessor.
2192
2193         The chain will be stopped if one of them ever returns None or the end
2194         of the chain is reached.
2195
2196         PostProcessor objects follow a "mutual registration" process similar
2197         to InfoExtractor objects.
2198         """
2199
2200         _downloader = None
2201
2202         def __init__(self, downloader=None):
2203                 self._downloader = downloader
2204
2205         def set_downloader(self, downloader):
2206                 """Sets the downloader for this PP."""
2207                 self._downloader = downloader
2208         
2209         def run(self, information):
2210                 """Run the PostProcessor.
2211
2212                 The "information" argument is a dictionary like the ones
2213                 composed by InfoExtractors. The only difference is that this
2214                 one has an extra field called "filepath" that points to the
2215                 downloaded file.
2216
2217                 When this method returns None, the postprocessing chain is
2218                 stopped. However, this method may return an information
2219                 dictionary that will be passed to the next postprocessing
2220                 object in the chain. It can be the one it received after
2221                 changing some fields.
2222
2223                 In addition, this method may raise a PostProcessingError
2224                 exception that will be taken into account by the downloader
2225                 it was called from.
2226                 """
2227                 return information # by default, do nothing
2228         
2229 ### MAIN PROGRAM ###
2230 if __name__ == '__main__':
2231         try:
2232                 # Modules needed only when running the main program
2233                 import getpass
2234                 import optparse
2235
2236                 # Function to update the program file with the latest version from bitbucket.org
2237                 def update_self(downloader, filename):
2238                         # Note: downloader only used for options
2239                         if not os.access (filename, os.W_OK):
2240                                 sys.exit('ERROR: no write permissions on %s' % filename)
2241
2242                         downloader.to_screen('Updating to latest stable version...')
2243                         latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2244                         latest_version = urllib.urlopen(latest_url).read().strip()
2245                         prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2246                         newcontent = urllib.urlopen(prog_url).read()
2247                         stream = open(filename, 'w')
2248                         stream.write(newcontent)
2249                         stream.close()
2250                         downloader.to_screen('Updated to version %s' % latest_version)
2251
2252                 # Parse command line
2253                 parser = optparse.OptionParser(
2254                         usage='Usage: %prog [options] url...',
2255                         version='2010.12.09',
2256                         conflict_handler='resolve',
2257                 )
2258
2259                 parser.add_option('-h', '--help',
2260                                 action='help', help='print this help text and exit')
2261                 parser.add_option('-v', '--version',
2262                                 action='version', help='print program version and exit')
2263                 parser.add_option('-U', '--update',
2264                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2265                 parser.add_option('-i', '--ignore-errors',
2266                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2267                 parser.add_option('-r', '--rate-limit',
2268                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2269                 parser.add_option('-R', '--retries',
2270                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2271                 parser.add_option('--playlist-start',
2272                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2273                 parser.add_option('--playlist-end',
2274                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2275                 parser.add_option('--dump-user-agent',
2276                                 action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False)
2277
2278                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2279                 authentication.add_option('-u', '--username',
2280                                 dest='username', metavar='USERNAME', help='account username')
2281                 authentication.add_option('-p', '--password',
2282                                 dest='password', metavar='PASSWORD', help='account password')
2283                 authentication.add_option('-n', '--netrc',
2284                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2285                 parser.add_option_group(authentication)
2286
2287                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2288                 video_format.add_option('-f', '--format',
2289                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2290                 video_format.add_option('--all-formats',
2291                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2292                 video_format.add_option('--max-quality',
2293                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2294                 parser.add_option_group(video_format)
2295
2296                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2297                 verbosity.add_option('-q', '--quiet',
2298                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2299                 verbosity.add_option('-s', '--simulate',
2300                                 action='store_true', dest='simulate', help='do not download video', default=False)
2301                 verbosity.add_option('-g', '--get-url',
2302                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2303                 verbosity.add_option('-e', '--get-title',
2304                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2305                 verbosity.add_option('--get-thumbnail',
2306                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2307                 verbosity.add_option('--get-description',
2308                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2309                 verbosity.add_option('--no-progress',
2310                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2311                 verbosity.add_option('--console-title',
2312                                 action='store_true', dest='consoletitle', help='display progress in console titlebar', default=False)
2313                 parser.add_option_group(verbosity)
2314
2315                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2316                 filesystem.add_option('-t', '--title',
2317                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2318                 filesystem.add_option('-l', '--literal',
2319                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2320                 filesystem.add_option('-A', '--auto-number',
2321                                 action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
2322                 filesystem.add_option('-o', '--output',
2323                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2324                 filesystem.add_option('-a', '--batch-file',
2325                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2326                 filesystem.add_option('-w', '--no-overwrites',
2327                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2328                 filesystem.add_option('-c', '--continue',
2329                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2330                 filesystem.add_option('--cookies',
2331                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2332                 parser.add_option_group(filesystem)
2333
2334                 (opts, args) = parser.parse_args()
2335
2336                 # Open appropriate CookieJar
2337                 if opts.cookiefile is None:
2338                         jar = cookielib.CookieJar()
2339                 else:
2340                         try:
2341                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2342                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2343                                         jar.load()
2344                         except (IOError, OSError), err:
2345                                 sys.exit(u'ERROR: unable to open cookie file')
2346
2347                 # Dump user agent
2348                 if opts.dump_user_agent:
2349                         print std_headers['User-Agent']
2350                         sys.exit(0)
2351
2352                 # General configuration
2353                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2354                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2355                 urllib2.install_opener(urllib2.build_opener(cookie_processor))
2356                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2357
2358                 # Batch file verification
2359                 batchurls = []
2360                 if opts.batchfile is not None:
2361                         try:
2362                                 if opts.batchfile == '-':
2363                                         batchfd = sys.stdin
2364                                 else:
2365                                         batchfd = open(opts.batchfile, 'r')
2366                                 batchurls = batchfd.readlines()
2367                                 batchurls = [x.strip() for x in batchurls]
2368                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2369                         except IOError:
2370                                 sys.exit(u'ERROR: batch file could not be read')
2371                 all_urls = batchurls + args
2372
2373                 # Conflicting, missing and erroneous options
2374                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2375                         parser.error(u'using .netrc conflicts with giving username/password')
2376                 if opts.password is not None and opts.username is None:
2377                         parser.error(u'account username missing')
2378                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2379                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2380                 if opts.usetitle and opts.useliteral:
2381                         parser.error(u'using title conflicts with using literal title')
2382                 if opts.username is not None and opts.password is None:
2383                         opts.password = getpass.getpass(u'Type account password and press return:')
2384                 if opts.ratelimit is not None:
2385                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2386                         if numeric_limit is None:
2387                                 parser.error(u'invalid rate limit specified')
2388                         opts.ratelimit = numeric_limit
2389                 if opts.retries is not None:
2390                         try:
2391                                 opts.retries = long(opts.retries)
2392                         except (TypeError, ValueError), err:
2393                                 parser.error(u'invalid retry count specified')
2394                 try:
2395                         opts.playliststart = long(opts.playliststart)
2396                         if opts.playliststart <= 0:
2397                                 raise ValueError
2398                 except (TypeError, ValueError), err:
2399                         parser.error(u'invalid playlist start number specified')
2400                 try:
2401                         opts.playlistend = long(opts.playlistend)
2402                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2403                                 raise ValueError
2404                 except (TypeError, ValueError), err:
2405                         parser.error(u'invalid playlist end number specified')
2406
2407                 # Information extractors
2408                 youtube_ie = YoutubeIE()
2409                 metacafe_ie = MetacafeIE(youtube_ie)
2410                 dailymotion_ie = DailymotionIE()
2411                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2412                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2413                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2414                 google_ie = GoogleIE()
2415                 google_search_ie = GoogleSearchIE(google_ie)
2416                 photobucket_ie = PhotobucketIE()
2417                 yahoo_ie = YahooIE()
2418                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2419                 deposit_files_ie = DepositFilesIE()
2420                 generic_ie = GenericIE()
2421
2422                 # File downloader
2423                 fd = FileDownloader({
2424                         'usenetrc': opts.usenetrc,
2425                         'username': opts.username,
2426                         'password': opts.password,
2427                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2428                         'forceurl': opts.geturl,
2429                         'forcetitle': opts.gettitle,
2430                         'forcethumbnail': opts.getthumbnail,
2431                         'forcedescription': opts.getdescription,
2432                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2433                         'format': opts.format,
2434                         'format_limit': opts.format_limit,
2435                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2436                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2437                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2438                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2439                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2440                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2441                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2442                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2443                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2444                                 or u'%(id)s.%(ext)s'),
2445                         'ignoreerrors': opts.ignoreerrors,
2446                         'ratelimit': opts.ratelimit,
2447                         'nooverwrites': opts.nooverwrites,
2448                         'retries': opts.retries,
2449                         'continuedl': opts.continue_dl,
2450                         'noprogress': opts.noprogress,
2451                         'playliststart': opts.playliststart,
2452                         'playlistend': opts.playlistend,
2453                         'logtostderr': opts.outtmpl == '-',
2454                         'consoletitle': opts.consoletitle,
2455                         })
2456                 fd.add_info_extractor(youtube_search_ie)
2457                 fd.add_info_extractor(youtube_pl_ie)
2458                 fd.add_info_extractor(youtube_user_ie)
2459                 fd.add_info_extractor(metacafe_ie)
2460                 fd.add_info_extractor(dailymotion_ie)
2461                 fd.add_info_extractor(youtube_ie)
2462                 fd.add_info_extractor(google_ie)
2463                 fd.add_info_extractor(google_search_ie)
2464                 fd.add_info_extractor(photobucket_ie)
2465                 fd.add_info_extractor(yahoo_ie)
2466                 fd.add_info_extractor(yahoo_search_ie)
2467                 fd.add_info_extractor(deposit_files_ie)
2468
2469                 # This must come last since it's the
2470                 # fallback if none of the others work
2471                 fd.add_info_extractor(generic_ie)
2472
2473                 # Update version
2474                 if opts.update_self:
2475                         update_self(fd, sys.argv[0])
2476
2477                 # Maybe do nothing
2478                 if len(all_urls) < 1:
2479                         if not opts.update_self:
2480                                 parser.error(u'you must provide at least one URL')
2481                         else:
2482                                 sys.exit()
2483                 retcode = fd.download(all_urls)
2484
2485                 # Dump cookie jar if requested
2486                 if opts.cookiefile is not None:
2487                         try:
2488                                 jar.save()
2489                         except (IOError, OSError), err:
2490                                 sys.exit(u'ERROR: unable to save cookie jar')
2491
2492                 sys.exit(retcode)
2493
2494         except DownloadError:
2495                 sys.exit(1)
2496         except SameFileError:
2497                 sys.exit(u'ERROR: fixed output name but more than one file to download')
2498         except KeyboardInterrupt:
2499                 sys.exit(u'\nERROR: Interrupted by user')