Fixed download from Dailymotion.
[youtube-dl.git] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # Author: Vasyl' Vavrychuk
7 # Author: Witold Baryluk
8 # Author: PaweÅ‚ Paprota
9 # Author: Gergely Imreh
10 # License: Public domain code
11 import cookielib
12 import ctypes
13 import datetime
14 import email.utils
15 import gzip
16 import htmlentitydefs
17 import httplib
18 import locale
19 import math
20 import netrc
21 import os
22 import os.path
23 import re
24 import socket
25 import string
26 import StringIO
27 import subprocess
28 import sys
29 import time
30 import urllib
31 import urllib2
32 import zlib
33
34 # parse_qs was moved from the cgi module to the urlparse module recently.
35 try:
36         from urlparse import parse_qs
37 except ImportError:
38         from cgi import parse_qs
39
40 std_headers = {
41         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11',
42         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
43         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
44         'Accept-Encoding': 'gzip, deflate',
45         'Accept-Language': 'en-us,en;q=0.5',
46 }
47
48 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
49
50 def preferredencoding():
51         """Get preferred encoding.
52
53         Returns the best encoding scheme for the system, based on
54         locale.getpreferredencoding() and some further tweaks.
55         """
56         def yield_preferredencoding():
57                 try:
58                         pref = locale.getpreferredencoding()
59                         u'TEST'.encode(pref)
60                 except:
61                         pref = 'UTF-8'
62                 while True:
63                         yield pref
64         return yield_preferredencoding().next()
65
66 def htmlentity_transform(matchobj):
67         """Transforms an HTML entity to a Unicode character.
68
69         This function receives a match object and is intended to be used with
70         the re.sub() function.
71         """
72         entity = matchobj.group(1)
73
74         # Known non-numeric HTML entity
75         if entity in htmlentitydefs.name2codepoint:
76                 return unichr(htmlentitydefs.name2codepoint[entity])
77
78         # Unicode character
79         mobj = re.match(ur'(?u)#(x?\d+)', entity)
80         if mobj is not None:
81                 numstr = mobj.group(1)
82                 if numstr.startswith(u'x'):
83                         base = 16
84                         numstr = u'0%s' % numstr
85                 else:
86                         base = 10
87                 return unichr(long(numstr, base))
88
89         # Unknown entity in name, return its literal representation
90         return (u'&%s;' % entity)
91
92 def sanitize_title(utitle):
93         """Sanitizes a video title so it could be used as part of a filename."""
94         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
95         return utitle.replace(unicode(os.sep), u'%')
96
97 def sanitize_open(filename, open_mode):
98         """Try to open the given filename, and slightly tweak it if this fails.
99
100         Attempts to open the given filename. If this fails, it tries to change
101         the filename slightly, step by step, until it's either able to open it
102         or it fails and raises a final exception, like the standard open()
103         function.
104
105         It returns the tuple (stream, definitive_file_name).
106         """
107         try:
108                 if filename == u'-':
109                         if sys.platform == 'win32':
110                                 import msvcrt
111                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
112                         return (sys.stdout, filename)
113                 stream = open(filename, open_mode)
114                 return (stream, filename)
115         except (IOError, OSError), err:
116                 # In case of error, try to remove win32 forbidden chars
117                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
118
119                 # An exception here should be caught in the caller
120                 stream = open(filename, open_mode)
121                 return (stream, filename)
122
123 def timeconvert(timestr):
124     """Convert RFC 2822 defined time string into system timestamp"""
125     timestamp = None
126     timetuple = email.utils.parsedate_tz(timestr)
127     if timetuple is not None:
128         timestamp = email.utils.mktime_tz(timetuple)
129     return timestamp
130
131 class DownloadError(Exception):
132         """Download Error exception.
133
134         This exception may be thrown by FileDownloader objects if they are not
135         configured to continue on errors. They will contain the appropriate
136         error message.
137         """
138         pass
139
140 class SameFileError(Exception):
141         """Same File exception.
142
143         This exception will be thrown by FileDownloader objects if they detect
144         multiple files would have to be downloaded to the same file on disk.
145         """
146         pass
147
148 class PostProcessingError(Exception):
149         """Post Processing exception.
150
151         This exception may be raised by PostProcessor's .run() method to
152         indicate an error in the postprocessing task.
153         """
154         pass
155
156 class UnavailableVideoError(Exception):
157         """Unavailable Format exception.
158
159         This exception will be thrown when a video is requested
160         in a format that is not available for that video.
161         """
162         pass
163
164 class ContentTooShortError(Exception):
165         """Content Too Short exception.
166
167         This exception may be raised by FileDownloader objects when a file they
168         download is too small for what the server announced first, indicating
169         the connection was probably interrupted.
170         """
171         # Both in bytes
172         downloaded = None
173         expected = None
174
175         def __init__(self, downloaded, expected):
176                 self.downloaded = downloaded
177                 self.expected = expected
178
179 class YoutubeDLHandler(urllib2.HTTPHandler):
180         """Handler for HTTP requests and responses.
181
182         This class, when installed with an OpenerDirector, automatically adds
183         the standard headers to every HTTP request and handles gzipped and
184         deflated responses from web servers. If compression is to be avoided in
185         a particular request, the original request in the program code only has
186         to include the HTTP header "Youtubedl-No-Compression", which will be
187         removed before making the real request.
188         
189         Part of this code was copied from:
190
191           http://techknack.net/python-urllib2-handlers/
192           
193         Andrew Rowls, the author of that code, agreed to release it to the
194         public domain.
195         """
196
197         @staticmethod
198         def deflate(data):
199                 try:
200                         return zlib.decompress(data, -zlib.MAX_WBITS)
201                 except zlib.error:
202                         return zlib.decompress(data)
203         
204         @staticmethod
205         def addinfourl_wrapper(stream, headers, url, code):
206                 if hasattr(urllib2.addinfourl, 'getcode'):
207                         return urllib2.addinfourl(stream, headers, url, code)
208                 ret = urllib2.addinfourl(stream, headers, url)
209                 ret.code = code
210                 return ret
211         
212         def http_request(self, req):
213                 for h in std_headers:
214                         if h in req.headers:
215                                 del req.headers[h]
216                         req.add_header(h, std_headers[h])
217                 if 'Youtubedl-no-compression' in req.headers:
218                         if 'Accept-encoding' in req.headers:
219                                 del req.headers['Accept-encoding']
220                         del req.headers['Youtubedl-no-compression']
221                 return req
222
223         def http_response(self, req, resp):
224                 old_resp = resp
225                 # gzip
226                 if resp.headers.get('Content-encoding', '') == 'gzip':
227                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
228                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
229                         resp.msg = old_resp.msg
230                 # deflate
231                 if resp.headers.get('Content-encoding', '') == 'deflate':
232                         gz = StringIO.StringIO(self.deflate(resp.read()))
233                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
234                         resp.msg = old_resp.msg
235                 return resp
236
237 class FileDownloader(object):
238         """File Downloader class.
239
240         File downloader objects are the ones responsible of downloading the
241         actual video file and writing it to disk if the user has requested
242         it, among some other tasks. In most cases there should be one per
243         program. As, given a video URL, the downloader doesn't know how to
244         extract all the needed information, task that InfoExtractors do, it
245         has to pass the URL to one of them.
246
247         For this, file downloader objects have a method that allows
248         InfoExtractors to be registered in a given order. When it is passed
249         a URL, the file downloader handles it to the first InfoExtractor it
250         finds that reports being able to handle it. The InfoExtractor extracts
251         all the information about the video or videos the URL refers to, and
252         asks the FileDownloader to process the video information, possibly
253         downloading the video.
254
255         File downloaders accept a lot of parameters. In order not to saturate
256         the object constructor with arguments, it receives a dictionary of
257         options instead. These options are available through the params
258         attribute for the InfoExtractors to use. The FileDownloader also
259         registers itself as the downloader in charge for the InfoExtractors
260         that are added to it, so this is a "mutual registration".
261
262         Available options:
263
264         username:         Username for authentication purposes.
265         password:         Password for authentication purposes.
266         usenetrc:         Use netrc for authentication instead.
267         quiet:            Do not print messages to stdout.
268         forceurl:         Force printing final URL.
269         forcetitle:       Force printing title.
270         forcethumbnail:   Force printing thumbnail URL.
271         forcedescription: Force printing description.
272         forcefilename:    Force printing final filename.
273         simulate:         Do not download the video files.
274         format:           Video format code.
275         format_limit:     Highest quality format to try.
276         outtmpl:          Template for output names.
277         ignoreerrors:     Do not stop on download errors.
278         ratelimit:        Download speed limit, in bytes/sec.
279         nooverwrites:     Prevent overwriting files.
280         retries:          Number of times to retry for HTTP error 5xx
281         continuedl:       Try to continue downloads if possible.
282         noprogress:       Do not print the progress bar.
283         playliststart:    Playlist item to start at.
284         playlistend:      Playlist item to end at.
285         logtostderr:      Log messages to stderr instead of stdout.
286         consoletitle:     Display progress in console window's titlebar.
287         nopart:           Do not use temporary .part files.
288         updatetime:       Use the Last-modified header to set output file timestamps.
289         """
290
291         params = None
292         _ies = []
293         _pps = []
294         _download_retcode = None
295         _num_downloads = None
296         _screen_file = None
297
298         def __init__(self, params):
299                 """Create a FileDownloader object with the given options."""
300                 self._ies = []
301                 self._pps = []
302                 self._download_retcode = 0
303                 self._num_downloads = 0
304                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
305                 self.params = params
306
307         @staticmethod
308         def pmkdir(filename):
309                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
310                 components = filename.split(os.sep)
311                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
312                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
313                 for dir in aggregate:
314                         if not os.path.exists(dir):
315                                 os.mkdir(dir)
316
317         @staticmethod
318         def format_bytes(bytes):
319                 if bytes is None:
320                         return 'N/A'
321                 if type(bytes) is str:
322                         bytes = float(bytes)
323                 if bytes == 0.0:
324                         exponent = 0
325                 else:
326                         exponent = long(math.log(bytes, 1024.0))
327                 suffix = 'bkMGTPEZY'[exponent]
328                 converted = float(bytes) / float(1024**exponent)
329                 return '%.2f%s' % (converted, suffix)
330
331         @staticmethod
332         def calc_percent(byte_counter, data_len):
333                 if data_len is None:
334                         return '---.-%'
335                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
336
337         @staticmethod
338         def calc_eta(start, now, total, current):
339                 if total is None:
340                         return '--:--'
341                 dif = now - start
342                 if current == 0 or dif < 0.001: # One millisecond
343                         return '--:--'
344                 rate = float(current) / dif
345                 eta = long((float(total) - float(current)) / rate)
346                 (eta_mins, eta_secs) = divmod(eta, 60)
347                 if eta_mins > 99:
348                         return '--:--'
349                 return '%02d:%02d' % (eta_mins, eta_secs)
350
351         @staticmethod
352         def calc_speed(start, now, bytes):
353                 dif = now - start
354                 if bytes == 0 or dif < 0.001: # One millisecond
355                         return '%10s' % '---b/s'
356                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
357
358         @staticmethod
359         def best_block_size(elapsed_time, bytes):
360                 new_min = max(bytes / 2.0, 1.0)
361                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
362                 if elapsed_time < 0.001:
363                         return long(new_max)
364                 rate = bytes / elapsed_time
365                 if rate > new_max:
366                         return long(new_max)
367                 if rate < new_min:
368                         return long(new_min)
369                 return long(rate)
370
371         @staticmethod
372         def parse_bytes(bytestr):
373                 """Parse a string indicating a byte quantity into a long integer."""
374                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
375                 if matchobj is None:
376                         return None
377                 number = float(matchobj.group(1))
378                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
379                 return long(round(number * multiplier))
380
381         def add_info_extractor(self, ie):
382                 """Add an InfoExtractor object to the end of the list."""
383                 self._ies.append(ie)
384                 ie.set_downloader(self)
385
386         def add_post_processor(self, pp):
387                 """Add a PostProcessor object to the end of the chain."""
388                 self._pps.append(pp)
389                 pp.set_downloader(self)
390
391         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
392                 """Print message to stdout if not in quiet mode."""
393                 try:
394                         if not self.params.get('quiet', False):
395                                 terminator = [u'\n', u''][skip_eol]
396                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
397                         self._screen_file.flush()
398                 except (UnicodeEncodeError), err:
399                         if not ignore_encoding_errors:
400                                 raise
401
402         def to_stderr(self, message):
403                 """Print message to stderr."""
404                 print >>sys.stderr, message.encode(preferredencoding())
405
406         def to_cons_title(self, message):
407                 """Set console/terminal window title to message."""
408                 if not self.params.get('consoletitle', False):
409                         return
410                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
411                         # c_wchar_p() might not be necessary if `message` is
412                         # already of type unicode()
413                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
414                 elif 'TERM' in os.environ:
415                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
416
417         def fixed_template(self):
418                 """Checks if the output template is fixed."""
419                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
420
421         def trouble(self, message=None):
422                 """Determine action to take when a download problem appears.
423
424                 Depending on if the downloader has been configured to ignore
425                 download errors or not, this method may throw an exception or
426                 not when errors are found, after printing the message.
427                 """
428                 if message is not None:
429                         self.to_stderr(message)
430                 if not self.params.get('ignoreerrors', False):
431                         raise DownloadError(message)
432                 self._download_retcode = 1
433
434         def slow_down(self, start_time, byte_counter):
435                 """Sleep if the download speed is over the rate limit."""
436                 rate_limit = self.params.get('ratelimit', None)
437                 if rate_limit is None or byte_counter == 0:
438                         return
439                 now = time.time()
440                 elapsed = now - start_time
441                 if elapsed <= 0.0:
442                         return
443                 speed = float(byte_counter) / elapsed
444                 if speed > rate_limit:
445                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
446
447         def temp_name(self, filename):
448                 """Returns a temporary filename for the given filename."""
449                 if self.params.get('nopart', False) or filename == u'-' or \
450                                 (os.path.exists(filename) and not os.path.isfile(filename)):
451                         return filename
452                 return filename + u'.part'
453
454         def undo_temp_name(self, filename):
455                 if filename.endswith(u'.part'):
456                         return filename[:-len(u'.part')]
457                 return filename
458
459         def try_rename(self, old_filename, new_filename):
460                 try:
461                         if old_filename == new_filename:
462                                 return
463                         os.rename(old_filename, new_filename)
464                 except (IOError, OSError), err:
465                         self.trouble(u'ERROR: unable to rename file')
466         
467         def try_utime(self, filename, last_modified_hdr):
468                 """Try to set the last-modified time of the given file."""
469                 if last_modified_hdr is None:
470                         return
471                 if not os.path.isfile(filename):
472                         return
473                 timestr = last_modified_hdr
474                 if timestr is None:
475                         return
476                 filetime = timeconvert(timestr)
477                 if filetime is None:
478                         return
479                 try:
480                         os.utime(filename,(time.time(), filetime))
481                 except:
482                         pass
483
484         def report_destination(self, filename):
485                 """Report destination filename."""
486                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
487
488         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
489                 """Report download progress."""
490                 if self.params.get('noprogress', False):
491                         return
492                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
493                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
494                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
495                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
496
497         def report_resuming_byte(self, resume_len):
498                 """Report attempt to resume at given byte."""
499                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
500
501         def report_retry(self, count, retries):
502                 """Report retry in case of HTTP error 5xx"""
503                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
504
505         def report_file_already_downloaded(self, file_name):
506                 """Report file has already been fully downloaded."""
507                 try:
508                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
509                 except (UnicodeEncodeError), err:
510                         self.to_screen(u'[download] The file has already been downloaded')
511
512         def report_unable_to_resume(self):
513                 """Report it was impossible to resume download."""
514                 self.to_screen(u'[download] Unable to resume')
515
516         def report_finish(self):
517                 """Report download finished."""
518                 if self.params.get('noprogress', False):
519                         self.to_screen(u'[download] Download completed')
520                 else:
521                         self.to_screen(u'')
522
523         def increment_downloads(self):
524                 """Increment the ordinal that assigns a number to each file."""
525                 self._num_downloads += 1
526
527         def prepare_filename(self, info_dict):
528                 """Generate the output filename."""
529                 try:
530                         template_dict = dict(info_dict)
531                         template_dict['epoch'] = unicode(long(time.time()))
532                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
533                         filename = self.params['outtmpl'] % template_dict
534                         return filename
535                 except (ValueError, KeyError), err:
536                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
537                         return None
538
539         def process_info(self, info_dict):
540                 """Process a single dictionary returned by an InfoExtractor."""
541                 filename = self.prepare_filename(info_dict)
542                 # Do nothing else if in simulate mode
543                 if self.params.get('simulate', False):
544                         # Forced printings
545                         if self.params.get('forcetitle', False):
546                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
547                         if self.params.get('forceurl', False):
548                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
549                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
550                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
551                         if self.params.get('forcedescription', False) and 'description' in info_dict:
552                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
553                         if self.params.get('forcefilename', False) and filename is not None:
554                                 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
555
556                         return
557
558                 if filename is None:
559                         return
560                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
561                         self.to_stderr(u'WARNING: file exists and will be skipped')
562                         return
563
564                 try:
565                         self.pmkdir(filename)
566                 except (OSError, IOError), err:
567                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
568                         return
569
570                 try:
571                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
572                 except (OSError, IOError), err:
573                         raise UnavailableVideoError
574                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
575                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
576                         return
577                 except (ContentTooShortError, ), err:
578                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
579                         return
580
581                 if success:
582                         try:
583                                 self.post_process(filename, info_dict)
584                         except (PostProcessingError), err:
585                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
586                                 return
587
588         def download(self, url_list):
589                 """Download a given list of URLs."""
590                 if len(url_list) > 1 and self.fixed_template():
591                         raise SameFileError(self.params['outtmpl'])
592
593                 for url in url_list:
594                         suitable_found = False
595                         for ie in self._ies:
596                                 # Go to next InfoExtractor if not suitable
597                                 if not ie.suitable(url):
598                                         continue
599
600                                 # Suitable InfoExtractor found
601                                 suitable_found = True
602
603                                 # Extract information from URL and process it
604                                 ie.extract(url)
605
606                                 # Suitable InfoExtractor had been found; go to next URL
607                                 break
608
609                         if not suitable_found:
610                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
611
612                 return self._download_retcode
613
614         def post_process(self, filename, ie_info):
615                 """Run the postprocessing chain on the given file."""
616                 info = dict(ie_info)
617                 info['filepath'] = filename
618                 for pp in self._pps:
619                         info = pp.run(info)
620                         if info is None:
621                                 break
622
623         def _download_with_rtmpdump(self, filename, url, player_url):
624                 self.report_destination(filename)
625                 tmpfilename = self.temp_name(filename)
626
627                 # Check for rtmpdump first
628                 try:
629                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
630                 except (OSError, IOError):
631                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
632                         return False
633
634                 # Download using rtmpdump. rtmpdump returns exit code 2 when
635                 # the connection was interrumpted and resuming appears to be
636                 # possible. This is part of rtmpdump's normal usage, AFAIK.
637                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
638                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
639                 while retval == 2 or retval == 1:
640                         prevsize = os.path.getsize(tmpfilename)
641                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
642                         time.sleep(5.0) # This seems to be needed
643                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
644                         cursize = os.path.getsize(tmpfilename)
645                         if prevsize == cursize and retval == 1:
646                                 break
647                 if retval == 0:
648                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
649                         self.try_rename(tmpfilename, filename)
650                         return True
651                 else:
652                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
653                         return False
654
655         def _do_download(self, filename, url, player_url):
656                 # Check file already present
657                 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
658                         self.report_file_already_downloaded(filename)
659                         return True
660
661                 # Attempt to download using rtmpdump
662                 if url.startswith('rtmp'):
663                         return self._download_with_rtmpdump(filename, url, player_url)
664
665                 tmpfilename = self.temp_name(filename)
666                 stream = None
667                 open_mode = 'wb'
668
669                 # Do not include the Accept-Encoding header
670                 headers = {'Youtubedl-no-compression': 'True'}
671                 basic_request = urllib2.Request(url, None, headers)
672                 request = urllib2.Request(url, None, headers)
673
674                 # Establish possible resume length
675                 if os.path.isfile(tmpfilename):
676                         resume_len = os.path.getsize(tmpfilename)
677                 else:
678                         resume_len = 0
679
680                 # Request parameters in case of being able to resume
681                 if self.params.get('continuedl', False) and resume_len != 0:
682                         self.report_resuming_byte(resume_len)
683                         request.add_header('Range','bytes=%d-' % resume_len)
684                         open_mode = 'ab'
685
686                 count = 0
687                 retries = self.params.get('retries', 0)
688                 while count <= retries:
689                         # Establish connection
690                         try:
691                                 data = urllib2.urlopen(request)
692                                 break
693                         except (urllib2.HTTPError, ), err:
694                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
695                                         # Unexpected HTTP error
696                                         raise
697                                 elif err.code == 416:
698                                         # Unable to resume (requested range not satisfiable)
699                                         try:
700                                                 # Open the connection again without the range header
701                                                 data = urllib2.urlopen(basic_request)
702                                                 content_length = data.info()['Content-Length']
703                                         except (urllib2.HTTPError, ), err:
704                                                 if err.code < 500 or err.code >= 600:
705                                                         raise
706                                         else:
707                                                 # Examine the reported length
708                                                 if (content_length is not None and
709                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
710                                                         # The file had already been fully downloaded.
711                                                         # Explanation to the above condition: in issue #175 it was revealed that
712                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
713                                                         # changing the file size slightly and causing problems for some users. So
714                                                         # I decided to implement a suggested change and consider the file
715                                                         # completely downloaded if the file size differs less than 100 bytes from
716                                                         # the one in the hard drive.
717                                                         self.report_file_already_downloaded(filename)
718                                                         self.try_rename(tmpfilename, filename)
719                                                         return True
720                                                 else:
721                                                         # The length does not match, we start the download over
722                                                         self.report_unable_to_resume()
723                                                         open_mode = 'wb'
724                                                         break
725                         # Retry
726                         count += 1
727                         if count <= retries:
728                                 self.report_retry(count, retries)
729
730                 if count > retries:
731                         self.trouble(u'ERROR: giving up after %s retries' % retries)
732                         return False
733
734                 data_len = data.info().get('Content-length', None)
735                 if data_len is not None:
736                         data_len = long(data_len) + resume_len
737                 data_len_str = self.format_bytes(data_len)
738                 byte_counter = 0 + resume_len
739                 block_size = 1024
740                 start = time.time()
741                 while True:
742                         # Download and write
743                         before = time.time()
744                         data_block = data.read(block_size)
745                         after = time.time()
746                         if len(data_block) == 0:
747                                 break
748                         byte_counter += len(data_block)
749
750                         # Open file just in time
751                         if stream is None:
752                                 try:
753                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
754                                         filename = self.undo_temp_name(tmpfilename)
755                                         self.report_destination(filename)
756                                 except (OSError, IOError), err:
757                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
758                                         return False
759                         try:
760                                 stream.write(data_block)
761                         except (IOError, OSError), err:
762                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
763                                 return False
764                         block_size = self.best_block_size(after - before, len(data_block))
765
766                         # Progress message
767                         percent_str = self.calc_percent(byte_counter, data_len)
768                         eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
769                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
770                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
771
772                         # Apply rate limit
773                         self.slow_down(start, byte_counter - resume_len)
774
775                 stream.close()
776                 self.report_finish()
777                 if data_len is not None and byte_counter != data_len:
778                         raise ContentTooShortError(byte_counter, long(data_len))
779                 self.try_rename(tmpfilename, filename)
780
781                 # Update file modification time
782                 if self.params.get('updatetime', True):
783                         self.try_utime(filename, data.info().get('last-modified', None))
784
785                 return True
786
787 class InfoExtractor(object):
788         """Information Extractor class.
789
790         Information extractors are the classes that, given a URL, extract
791         information from the video (or videos) the URL refers to. This
792         information includes the real video URL, the video title and simplified
793         title, author and others. The information is stored in a dictionary
794         which is then passed to the FileDownloader. The FileDownloader
795         processes this information possibly downloading the video to the file
796         system, among other possible outcomes. The dictionaries must include
797         the following fields:
798
799         id:             Video identifier.
800         url:            Final video URL.
801         uploader:       Nickname of the video uploader.
802         title:          Literal title.
803         stitle:         Simplified title.
804         ext:            Video filename extension.
805         format:         Video format.
806         player_url:     SWF Player URL (may be None).
807
808         The following fields are optional. Their primary purpose is to allow
809         youtube-dl to serve as the backend for a video search function, such
810         as the one in youtube2mp3.  They are only used when their respective
811         forced printing functions are called:
812
813         thumbnail:      Full URL to a video thumbnail image.
814         description:    One-line video description.
815
816         Subclasses of this one should re-define the _real_initialize() and
817         _real_extract() methods, as well as the suitable() static method.
818         Probably, they should also be instantiated and added to the main
819         downloader.
820         """
821
822         _ready = False
823         _downloader = None
824
825         def __init__(self, downloader=None):
826                 """Constructor. Receives an optional downloader."""
827                 self._ready = False
828                 self.set_downloader(downloader)
829
830         @staticmethod
831         def suitable(url):
832                 """Receives a URL and returns True if suitable for this IE."""
833                 return False
834
835         def initialize(self):
836                 """Initializes an instance (authentication, etc)."""
837                 if not self._ready:
838                         self._real_initialize()
839                         self._ready = True
840
841         def extract(self, url):
842                 """Extracts URL information and returns it in list of dicts."""
843                 self.initialize()
844                 return self._real_extract(url)
845
846         def set_downloader(self, downloader):
847                 """Sets the downloader for this IE."""
848                 self._downloader = downloader
849
850         def _real_initialize(self):
851                 """Real initialization process. Redefine in subclasses."""
852                 pass
853
854         def _real_extract(self, url):
855                 """Real extraction process. Redefine in subclasses."""
856                 pass
857
858 class YoutubeIE(InfoExtractor):
859         """Information extractor for youtube.com."""
860
861         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
862         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
863         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
864         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
865         _NETRC_MACHINE = 'youtube'
866         # Listed in order of quality
867         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
868         _video_extensions = {
869                 '13': '3gp',
870                 '17': 'mp4',
871                 '18': 'mp4',
872                 '22': 'mp4',
873                 '37': 'mp4',
874                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
875                 '43': 'webm',
876                 '45': 'webm',
877         }
878
879         @staticmethod
880         def suitable(url):
881                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
882
883         def report_lang(self):
884                 """Report attempt to set language."""
885                 self._downloader.to_screen(u'[youtube] Setting language')
886
887         def report_login(self):
888                 """Report attempt to log in."""
889                 self._downloader.to_screen(u'[youtube] Logging in')
890
891         def report_age_confirmation(self):
892                 """Report attempt to confirm age."""
893                 self._downloader.to_screen(u'[youtube] Confirming age')
894
895         def report_video_webpage_download(self, video_id):
896                 """Report attempt to download video webpage."""
897                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
898
899         def report_video_info_webpage_download(self, video_id):
900                 """Report attempt to download video info webpage."""
901                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
902
903         def report_information_extraction(self, video_id):
904                 """Report attempt to extract video information."""
905                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
906
907         def report_unavailable_format(self, video_id, format):
908                 """Report extracted video URL."""
909                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
910
911         def report_rtmp_download(self):
912                 """Indicate the download will use the RTMP protocol."""
913                 self._downloader.to_screen(u'[youtube] RTMP download detected')
914
915         def _real_initialize(self):
916                 if self._downloader is None:
917                         return
918
919                 username = None
920                 password = None
921                 downloader_params = self._downloader.params
922
923                 # Attempt to use provided username and password or .netrc data
924                 if downloader_params.get('username', None) is not None:
925                         username = downloader_params['username']
926                         password = downloader_params['password']
927                 elif downloader_params.get('usenetrc', False):
928                         try:
929                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
930                                 if info is not None:
931                                         username = info[0]
932                                         password = info[2]
933                                 else:
934                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
935                         except (IOError, netrc.NetrcParseError), err:
936                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
937                                 return
938
939                 # Set language
940                 request = urllib2.Request(self._LANG_URL)
941                 try:
942                         self.report_lang()
943                         urllib2.urlopen(request).read()
944                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
945                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
946                         return
947
948                 # No authentication to be performed
949                 if username is None:
950                         return
951
952                 # Log in
953                 login_form = {
954                                 'current_form': 'loginForm',
955                                 'next':         '/',
956                                 'action_login': 'Log In',
957                                 'username':     username,
958                                 'password':     password,
959                                 }
960                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
961                 try:
962                         self.report_login()
963                         login_results = urllib2.urlopen(request).read()
964                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
965                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
966                                 return
967                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
968                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
969                         return
970
971                 # Confirm age
972                 age_form = {
973                                 'next_url':             '/',
974                                 'action_confirm':       'Confirm',
975                                 }
976                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
977                 try:
978                         self.report_age_confirmation()
979                         age_results = urllib2.urlopen(request).read()
980                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
981                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
982                         return
983
984         def _real_extract(self, url):
985                 # Extract video id from URL
986                 mobj = re.match(self._VALID_URL, url)
987                 if mobj is None:
988                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
989                         return
990                 video_id = mobj.group(2)
991
992                 # Get video webpage
993                 self.report_video_webpage_download(video_id)
994                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
995                 try:
996                         video_webpage = urllib2.urlopen(request).read()
997                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
998                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
999                         return
1000
1001                 # Attempt to extract SWF player URL
1002                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1003                 if mobj is not None:
1004                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1005                 else:
1006                         player_url = None
1007
1008                 # Get video info
1009                 self.report_video_info_webpage_download(video_id)
1010                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1011                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1012                                            % (video_id, el_type))
1013                         request = urllib2.Request(video_info_url)
1014                         try:
1015                                 video_info_webpage = urllib2.urlopen(request).read()
1016                                 video_info = parse_qs(video_info_webpage)
1017                                 if 'token' in video_info:
1018                                         break
1019                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1020                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1021                                 return
1022                 if 'token' not in video_info:
1023                         if 'reason' in video_info:
1024                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1025                         else:
1026                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1027                         return
1028
1029                 # Start extracting information
1030                 self.report_information_extraction(video_id)
1031
1032                 # uploader
1033                 if 'author' not in video_info:
1034                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1035                         return
1036                 video_uploader = urllib.unquote_plus(video_info['author'][0])
1037
1038                 # title
1039                 if 'title' not in video_info:
1040                         self._downloader.trouble(u'ERROR: unable to extract video title')
1041                         return
1042                 video_title = urllib.unquote_plus(video_info['title'][0])
1043                 video_title = video_title.decode('utf-8')
1044                 video_title = sanitize_title(video_title)
1045
1046                 # simplified title
1047                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1048                 simple_title = simple_title.strip(ur'_')
1049
1050                 # thumbnail image
1051                 if 'thumbnail_url' not in video_info:
1052                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1053                         video_thumbnail = ''
1054                 else:   # don't panic if we can't find it
1055                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1056
1057                 # upload date
1058                 upload_date = u'NA'
1059                 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1060                 if mobj is not None:
1061                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1062                         format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1063                         for expression in format_expressions:
1064                                 try:
1065                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1066                                 except:
1067                                         pass
1068
1069                 # description
1070                 video_description = 'No description available.'
1071                 if self._downloader.params.get('forcedescription', False):
1072                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1073                         if mobj is not None:
1074                                 video_description = mobj.group(1)
1075
1076                 # token
1077                 video_token = urllib.unquote_plus(video_info['token'][0])
1078
1079                 # Decide which formats to download
1080                 req_format = self._downloader.params.get('format', None)
1081
1082                 if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]:
1083                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
1084                         format_limit = self._downloader.params.get('format_limit', None)
1085                         if format_limit is not None and format_limit in self._available_formats:
1086                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
1087                         else:
1088                                 format_list = self._available_formats
1089                         existing_formats = [x for x in format_list if x in url_map]
1090                         if len(existing_formats) == 0:
1091                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1092                                 return
1093                         if req_format is None:
1094                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1095                         elif req_format == '-1':
1096                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1097                         else:
1098                                 # Specific format
1099                                 if req_format not in url_map:
1100                                         self._downloader.trouble(u'ERROR: requested format not available')
1101                                         return
1102                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
1103
1104                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1105                         self.report_rtmp_download()
1106                         video_url_list = [(None, video_info['conn'][0])]
1107
1108                 else:
1109                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
1110                         return
1111
1112                 for format_param, video_real_url in video_url_list:
1113                         # At this point we have a new video
1114                         self._downloader.increment_downloads()
1115
1116                         # Extension
1117                         video_extension = self._video_extensions.get(format_param, 'flv')
1118
1119                         # Find the video URL in fmt_url_map or conn paramters
1120                         try:
1121                                 # Process video information
1122                                 self._downloader.process_info({
1123                                         'id':           video_id.decode('utf-8'),
1124                                         'url':          video_real_url.decode('utf-8'),
1125                                         'uploader':     video_uploader.decode('utf-8'),
1126                                         'upload_date':  upload_date,
1127                                         'title':        video_title,
1128                                         'stitle':       simple_title,
1129                                         'ext':          video_extension.decode('utf-8'),
1130                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1131                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1132                                         'description':  video_description.decode('utf-8'),
1133                                         'player_url':   player_url,
1134                                 })
1135                         except UnavailableVideoError, err:
1136                                 self._downloader.trouble(u'\nERROR: unable to download video')
1137
1138
1139 class MetacafeIE(InfoExtractor):
1140         """Information Extractor for metacafe.com."""
1141
1142         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1143         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1144         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1145         _youtube_ie = None
1146
1147         def __init__(self, youtube_ie, downloader=None):
1148                 InfoExtractor.__init__(self, downloader)
1149                 self._youtube_ie = youtube_ie
1150
1151         @staticmethod
1152         def suitable(url):
1153                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1154
1155         def report_disclaimer(self):
1156                 """Report disclaimer retrieval."""
1157                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1158
1159         def report_age_confirmation(self):
1160                 """Report attempt to confirm age."""
1161                 self._downloader.to_screen(u'[metacafe] Confirming age')
1162
1163         def report_download_webpage(self, video_id):
1164                 """Report webpage download."""
1165                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1166
1167         def report_extraction(self, video_id):
1168                 """Report information extraction."""
1169                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1170
1171         def _real_initialize(self):
1172                 # Retrieve disclaimer
1173                 request = urllib2.Request(self._DISCLAIMER)
1174                 try:
1175                         self.report_disclaimer()
1176                         disclaimer = urllib2.urlopen(request).read()
1177                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1178                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1179                         return
1180
1181                 # Confirm age
1182                 disclaimer_form = {
1183                         'filters': '0',
1184                         'submit': "Continue - I'm over 18",
1185                         }
1186                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1187                 try:
1188                         self.report_age_confirmation()
1189                         disclaimer = urllib2.urlopen(request).read()
1190                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1191                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1192                         return
1193
1194         def _real_extract(self, url):
1195                 # Extract id and simplified title from URL
1196                 mobj = re.match(self._VALID_URL, url)
1197                 if mobj is None:
1198                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1199                         return
1200
1201                 video_id = mobj.group(1)
1202
1203                 # Check if video comes from YouTube
1204                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1205                 if mobj2 is not None:
1206                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1207                         return
1208
1209                 # At this point we have a new video
1210                 self._downloader.increment_downloads()
1211
1212                 simple_title = mobj.group(2).decode('utf-8')
1213
1214                 # Retrieve video webpage to extract further information
1215                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1216                 try:
1217                         self.report_download_webpage(video_id)
1218                         webpage = urllib2.urlopen(request).read()
1219                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1220                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1221                         return
1222
1223                 # Extract URL, uploader and title from webpage
1224                 self.report_extraction(video_id)
1225                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1226                 if mobj is not None:
1227                         mediaURL = urllib.unquote(mobj.group(1))
1228                         video_extension = mediaURL[-3:]
1229
1230                         # Extract gdaKey if available
1231                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1232                         if mobj is None:
1233                                 video_url = mediaURL
1234                         else:
1235                                 gdaKey = mobj.group(1)
1236                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1237                 else:
1238                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1239                         if mobj is None:
1240                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1241                                 return
1242                         vardict = parse_qs(mobj.group(1))
1243                         if 'mediaData' not in vardict:
1244                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1245                                 return
1246                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1247                         if mobj is None:
1248                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1249                                 return
1250                         mediaURL = mobj.group(1).replace('\\/', '/')
1251                         video_extension = mediaURL[-3:]
1252                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1253
1254                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1255                 if mobj is None:
1256                         self._downloader.trouble(u'ERROR: unable to extract title')
1257                         return
1258                 video_title = mobj.group(1).decode('utf-8')
1259                 video_title = sanitize_title(video_title)
1260
1261                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1262                 if mobj is None:
1263                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1264                         return
1265                 video_uploader = mobj.group(1)
1266
1267                 try:
1268                         # Process video information
1269                         self._downloader.process_info({
1270                                 'id':           video_id.decode('utf-8'),
1271                                 'url':          video_url.decode('utf-8'),
1272                                 'uploader':     video_uploader.decode('utf-8'),
1273                                 'upload_date':  u'NA',
1274                                 'title':        video_title,
1275                                 'stitle':       simple_title,
1276                                 'ext':          video_extension.decode('utf-8'),
1277                                 'format':       u'NA',
1278                                 'player_url':   None,
1279                         })
1280                 except UnavailableVideoError:
1281                         self._downloader.trouble(u'\nERROR: unable to download video')
1282
1283
1284 class DailymotionIE(InfoExtractor):
1285         """Information Extractor for Dailymotion"""
1286
1287         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1288
1289         def __init__(self, downloader=None):
1290                 InfoExtractor.__init__(self, downloader)
1291
1292         @staticmethod
1293         def suitable(url):
1294                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1295
1296         def report_download_webpage(self, video_id):
1297                 """Report webpage download."""
1298                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1299
1300         def report_extraction(self, video_id):
1301                 """Report information extraction."""
1302                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1303
1304         def _real_initialize(self):
1305                 return
1306
1307         def _real_extract(self, url):
1308                 # Extract id and simplified title from URL
1309                 mobj = re.match(self._VALID_URL, url)
1310                 if mobj is None:
1311                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1312                         return
1313
1314                 # At this point we have a new video
1315                 self._downloader.increment_downloads()
1316                 video_id = mobj.group(1)
1317
1318                 simple_title = mobj.group(2).decode('utf-8')
1319                 video_extension = 'flv'
1320
1321                 # Retrieve video webpage to extract further information
1322                 request = urllib2.Request(url)
1323                 request.add_header('Cookie', 'family_filter=off')
1324                 try:
1325                         self.report_download_webpage(video_id)
1326                         webpage = urllib2.urlopen(request).read()
1327                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1328                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1329                         return
1330
1331                 # Extract URL, uploader and title from webpage
1332                 self.report_extraction(video_id)
1333                 mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
1334                 if mobj is None:
1335                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1336                         return
1337                 sequence = urllib.unquote(mobj.group(1))
1338                 mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
1339                 if mobj is None:
1340                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1341                         return
1342                 mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
1343
1344                 # if needed add http://www.dailymotion.com/ if relative URL
1345
1346                 video_url = mediaURL
1347
1348                 mobj = re.search(r'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage)
1349                 if mobj is None:
1350                         self._downloader.trouble(u'ERROR: unable to extract title')
1351                         return
1352                 video_title = mobj.group(1).decode('utf-8')
1353                 video_title = sanitize_title(video_title)
1354
1355                 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
1356                 if mobj is None:
1357                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1358                         return
1359                 video_uploader = mobj.group(1)
1360
1361                 try:
1362                         # Process video information
1363                         self._downloader.process_info({
1364                                 'id':           video_id.decode('utf-8'),
1365                                 'url':          video_url.decode('utf-8'),
1366                                 'uploader':     video_uploader.decode('utf-8'),
1367                                 'upload_date':  u'NA',
1368                                 'title':        video_title,
1369                                 'stitle':       simple_title,
1370                                 'ext':          video_extension.decode('utf-8'),
1371                                 'format':       u'NA',
1372                                 'player_url':   None,
1373                         })
1374                 except UnavailableVideoError:
1375                         self._downloader.trouble(u'\nERROR: unable to download video')
1376
1377 class GoogleIE(InfoExtractor):
1378         """Information extractor for video.google.com."""
1379
1380         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1381
1382         def __init__(self, downloader=None):
1383                 InfoExtractor.__init__(self, downloader)
1384
1385         @staticmethod
1386         def suitable(url):
1387                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1388
1389         def report_download_webpage(self, video_id):
1390                 """Report webpage download."""
1391                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1392
1393         def report_extraction(self, video_id):
1394                 """Report information extraction."""
1395                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1396
1397         def _real_initialize(self):
1398                 return
1399
1400         def _real_extract(self, url):
1401                 # Extract id from URL
1402                 mobj = re.match(self._VALID_URL, url)
1403                 if mobj is None:
1404                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1405                         return
1406
1407                 # At this point we have a new video
1408                 self._downloader.increment_downloads()
1409                 video_id = mobj.group(1)
1410
1411                 video_extension = 'mp4'
1412
1413                 # Retrieve video webpage to extract further information
1414                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1415                 try:
1416                         self.report_download_webpage(video_id)
1417                         webpage = urllib2.urlopen(request).read()
1418                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1419                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1420                         return
1421
1422                 # Extract URL, uploader, and title from webpage
1423                 self.report_extraction(video_id)
1424                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1425                 if mobj is None:
1426                         video_extension = 'flv'
1427                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1428                 if mobj is None:
1429                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1430                         return
1431                 mediaURL = urllib.unquote(mobj.group(1))
1432                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1433                 mediaURL = mediaURL.replace('\\x26', '\x26')
1434
1435                 video_url = mediaURL
1436
1437                 mobj = re.search(r'<title>(.*)</title>', webpage)
1438                 if mobj is None:
1439                         self._downloader.trouble(u'ERROR: unable to extract title')
1440                         return
1441                 video_title = mobj.group(1).decode('utf-8')
1442                 video_title = sanitize_title(video_title)
1443                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1444
1445                 # Extract video description
1446                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1447                 if mobj is None:
1448                         self._downloader.trouble(u'ERROR: unable to extract video description')
1449                         return
1450                 video_description = mobj.group(1).decode('utf-8')
1451                 if not video_description:
1452                         video_description = 'No description available.'
1453
1454                 # Extract video thumbnail
1455                 if self._downloader.params.get('forcethumbnail', False):
1456                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1457                         try:
1458                                 webpage = urllib2.urlopen(request).read()
1459                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1460                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1461                                 return
1462                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1463                         if mobj is None:
1464                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1465                                 return
1466                         video_thumbnail = mobj.group(1)
1467                 else:   # we need something to pass to process_info
1468                         video_thumbnail = ''
1469
1470
1471                 try:
1472                         # Process video information
1473                         self._downloader.process_info({
1474                                 'id':           video_id.decode('utf-8'),
1475                                 'url':          video_url.decode('utf-8'),
1476                                 'uploader':     u'NA',
1477                                 'upload_date':  u'NA',
1478                                 'title':        video_title,
1479                                 'stitle':       simple_title,
1480                                 'ext':          video_extension.decode('utf-8'),
1481                                 'format':       u'NA',
1482                                 'player_url':   None,
1483                         })
1484                 except UnavailableVideoError:
1485                         self._downloader.trouble(u'\nERROR: unable to download video')
1486
1487
1488 class PhotobucketIE(InfoExtractor):
1489         """Information extractor for photobucket.com."""
1490
1491         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1492
1493         def __init__(self, downloader=None):
1494                 InfoExtractor.__init__(self, downloader)
1495
1496         @staticmethod
1497         def suitable(url):
1498                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1499
1500         def report_download_webpage(self, video_id):
1501                 """Report webpage download."""
1502                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1503
1504         def report_extraction(self, video_id):
1505                 """Report information extraction."""
1506                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1507
1508         def _real_initialize(self):
1509                 return
1510
1511         def _real_extract(self, url):
1512                 # Extract id from URL
1513                 mobj = re.match(self._VALID_URL, url)
1514                 if mobj is None:
1515                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1516                         return
1517
1518                 # At this point we have a new video
1519                 self._downloader.increment_downloads()
1520                 video_id = mobj.group(1)
1521
1522                 video_extension = 'flv'
1523
1524                 # Retrieve video webpage to extract further information
1525                 request = urllib2.Request(url)
1526                 try:
1527                         self.report_download_webpage(video_id)
1528                         webpage = urllib2.urlopen(request).read()
1529                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1530                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1531                         return
1532
1533                 # Extract URL, uploader, and title from webpage
1534                 self.report_extraction(video_id)
1535                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1536                 if mobj is None:
1537                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1538                         return
1539                 mediaURL = urllib.unquote(mobj.group(1))
1540
1541                 video_url = mediaURL
1542
1543                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1544                 if mobj is None:
1545                         self._downloader.trouble(u'ERROR: unable to extract title')
1546                         return
1547                 video_title = mobj.group(1).decode('utf-8')
1548                 video_title = sanitize_title(video_title)
1549                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1550
1551                 video_uploader = mobj.group(2).decode('utf-8')
1552
1553                 try:
1554                         # Process video information
1555                         self._downloader.process_info({
1556                                 'id':           video_id.decode('utf-8'),
1557                                 'url':          video_url.decode('utf-8'),
1558                                 'uploader':     video_uploader,
1559                                 'upload_date':  u'NA',
1560                                 'title':        video_title,
1561                                 'stitle':       simple_title,
1562                                 'ext':          video_extension.decode('utf-8'),
1563                                 'format':       u'NA',
1564                                 'player_url':   None,
1565                         })
1566                 except UnavailableVideoError:
1567                         self._downloader.trouble(u'\nERROR: unable to download video')
1568
1569
1570 class YahooIE(InfoExtractor):
1571         """Information extractor for video.yahoo.com."""
1572
1573         # _VALID_URL matches all Yahoo! Video URLs
1574         # _VPAGE_URL matches only the extractable '/watch/' URLs
1575         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1576         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1577
1578         def __init__(self, downloader=None):
1579                 InfoExtractor.__init__(self, downloader)
1580
1581         @staticmethod
1582         def suitable(url):
1583                 return (re.match(YahooIE._VALID_URL, url) is not None)
1584
1585         def report_download_webpage(self, video_id):
1586                 """Report webpage download."""
1587                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1588
1589         def report_extraction(self, video_id):
1590                 """Report information extraction."""
1591                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1592
1593         def _real_initialize(self):
1594                 return
1595
1596         def _real_extract(self, url, new_video=True):
1597                 # Extract ID from URL
1598                 mobj = re.match(self._VALID_URL, url)
1599                 if mobj is None:
1600                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1601                         return
1602
1603                 # At this point we have a new video
1604                 self._downloader.increment_downloads()
1605                 video_id = mobj.group(2)
1606                 video_extension = 'flv'
1607
1608                 # Rewrite valid but non-extractable URLs as
1609                 # extractable English language /watch/ URLs
1610                 if re.match(self._VPAGE_URL, url) is None:
1611                         request = urllib2.Request(url)
1612                         try:
1613                                 webpage = urllib2.urlopen(request).read()
1614                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1615                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1616                                 return
1617
1618                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1619                         if mobj is None:
1620                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1621                                 return
1622                         yahoo_id = mobj.group(1)
1623
1624                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1625                         if mobj is None:
1626                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1627                                 return
1628                         yahoo_vid = mobj.group(1)
1629
1630                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1631                         return self._real_extract(url, new_video=False)
1632
1633                 # Retrieve video webpage to extract further information
1634                 request = urllib2.Request(url)
1635                 try:
1636                         self.report_download_webpage(video_id)
1637                         webpage = urllib2.urlopen(request).read()
1638                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1639                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1640                         return
1641
1642                 # Extract uploader and title from webpage
1643                 self.report_extraction(video_id)
1644                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1645                 if mobj is None:
1646                         self._downloader.trouble(u'ERROR: unable to extract video title')
1647                         return
1648                 video_title = mobj.group(1).decode('utf-8')
1649                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1650
1651                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1652                 if mobj is None:
1653                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1654                         return
1655                 video_uploader = mobj.group(1).decode('utf-8')
1656
1657                 # Extract video thumbnail
1658                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1659                 if mobj is None:
1660                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1661                         return
1662                 video_thumbnail = mobj.group(1).decode('utf-8')
1663
1664                 # Extract video description
1665                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1666                 if mobj is None:
1667                         self._downloader.trouble(u'ERROR: unable to extract video description')
1668                         return
1669                 video_description = mobj.group(1).decode('utf-8')
1670                 if not video_description: video_description = 'No description available.'
1671
1672                 # Extract video height and width
1673                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1674                 if mobj is None:
1675                         self._downloader.trouble(u'ERROR: unable to extract video height')
1676                         return
1677                 yv_video_height = mobj.group(1)
1678
1679                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1680                 if mobj is None:
1681                         self._downloader.trouble(u'ERROR: unable to extract video width')
1682                         return
1683                 yv_video_width = mobj.group(1)
1684
1685                 # Retrieve video playlist to extract media URL
1686                 # I'm not completely sure what all these options are, but we
1687                 # seem to need most of them, otherwise the server sends a 401.
1688                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1689                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1690                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1691                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1692                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1693                 try:
1694                         self.report_download_webpage(video_id)
1695                         webpage = urllib2.urlopen(request).read()
1696                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1697                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1698                         return
1699
1700                 # Extract media URL from playlist XML
1701                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1702                 if mobj is None:
1703                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1704                         return
1705                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1706                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1707
1708                 try:
1709                         # Process video information
1710                         self._downloader.process_info({
1711                                 'id':           video_id.decode('utf-8'),
1712                                 'url':          video_url,
1713                                 'uploader':     video_uploader,
1714                                 'upload_date':  u'NA',
1715                                 'title':        video_title,
1716                                 'stitle':       simple_title,
1717                                 'ext':          video_extension.decode('utf-8'),
1718                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1719                                 'description':  video_description,
1720                                 'thumbnail':    video_thumbnail,
1721                                 'description':  video_description,
1722                                 'player_url':   None,
1723                         })
1724                 except UnavailableVideoError:
1725                         self._downloader.trouble(u'\nERROR: unable to download video')
1726
1727
1728 class GenericIE(InfoExtractor):
1729         """Generic last-resort information extractor."""
1730
1731         def __init__(self, downloader=None):
1732                 InfoExtractor.__init__(self, downloader)
1733
1734         @staticmethod
1735         def suitable(url):
1736                 return True
1737
1738         def report_download_webpage(self, video_id):
1739                 """Report webpage download."""
1740                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1741                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1742
1743         def report_extraction(self, video_id):
1744                 """Report information extraction."""
1745                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1746
1747         def _real_initialize(self):
1748                 return
1749
1750         def _real_extract(self, url):
1751                 # At this point we have a new video
1752                 self._downloader.increment_downloads()
1753
1754                 video_id = url.split('/')[-1]
1755                 request = urllib2.Request(url)
1756                 try:
1757                         self.report_download_webpage(video_id)
1758                         webpage = urllib2.urlopen(request).read()
1759                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1760                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1761                         return
1762                 except ValueError, err:
1763                         # since this is the last-resort InfoExtractor, if
1764                         # this error is thrown, it'll be thrown here
1765                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1766                         return
1767
1768                 self.report_extraction(video_id)
1769                 # Start with something easy: JW Player in SWFObject
1770                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1771                 if mobj is None:
1772                         # Broaden the search a little bit
1773                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1774                 if mobj is None:
1775                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1776                         return
1777
1778                 # It's possible that one of the regexes
1779                 # matched, but returned an empty group:
1780                 if mobj.group(1) is None:
1781                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1782                         return
1783
1784                 video_url = urllib.unquote(mobj.group(1))
1785                 video_id  = os.path.basename(video_url)
1786
1787                 # here's a fun little line of code for you:
1788                 video_extension = os.path.splitext(video_id)[1][1:]
1789                 video_id        = os.path.splitext(video_id)[0]
1790
1791                 # it's tempting to parse this further, but you would
1792                 # have to take into account all the variations like
1793                 #   Video Title - Site Name
1794                 #   Site Name | Video Title
1795                 #   Video Title - Tagline | Site Name
1796                 # and so on and so forth; it's just not practical
1797                 mobj = re.search(r'<title>(.*)</title>', webpage)
1798                 if mobj is None:
1799                         self._downloader.trouble(u'ERROR: unable to extract title')
1800                         return
1801                 video_title = mobj.group(1).decode('utf-8')
1802                 video_title = sanitize_title(video_title)
1803                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1804
1805                 # video uploader is domain name
1806                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1807                 if mobj is None:
1808                         self._downloader.trouble(u'ERROR: unable to extract title')
1809                         return
1810                 video_uploader = mobj.group(1).decode('utf-8')
1811
1812                 try:
1813                         # Process video information
1814                         self._downloader.process_info({
1815                                 'id':           video_id.decode('utf-8'),
1816                                 'url':          video_url.decode('utf-8'),
1817                                 'uploader':     video_uploader,
1818                                 'upload_date':  u'NA',
1819                                 'title':        video_title,
1820                                 'stitle':       simple_title,
1821                                 'ext':          video_extension.decode('utf-8'),
1822                                 'format':       u'NA',
1823                                 'player_url':   None,
1824                         })
1825                 except UnavailableVideoError, err:
1826                         self._downloader.trouble(u'\nERROR: unable to download video')
1827
1828
1829 class YoutubeSearchIE(InfoExtractor):
1830         """Information Extractor for YouTube search queries."""
1831         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1832         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1833         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1834         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1835         _youtube_ie = None
1836         _max_youtube_results = 1000
1837
1838         def __init__(self, youtube_ie, downloader=None):
1839                 InfoExtractor.__init__(self, downloader)
1840                 self._youtube_ie = youtube_ie
1841
1842         @staticmethod
1843         def suitable(url):
1844                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1845
1846         def report_download_page(self, query, pagenum):
1847                 """Report attempt to download playlist page with given number."""
1848                 query = query.decode(preferredencoding())
1849                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1850
1851         def _real_initialize(self):
1852                 self._youtube_ie.initialize()
1853
1854         def _real_extract(self, query):
1855                 mobj = re.match(self._VALID_QUERY, query)
1856                 if mobj is None:
1857                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1858                         return
1859
1860                 prefix, query = query.split(':')
1861                 prefix = prefix[8:]
1862                 query  = query.encode('utf-8')
1863                 if prefix == '':
1864                         self._download_n_results(query, 1)
1865                         return
1866                 elif prefix == 'all':
1867                         self._download_n_results(query, self._max_youtube_results)
1868                         return
1869                 else:
1870                         try:
1871                                 n = long(prefix)
1872                                 if n <= 0:
1873                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1874                                         return
1875                                 elif n > self._max_youtube_results:
1876                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1877                                         n = self._max_youtube_results
1878                                 self._download_n_results(query, n)
1879                                 return
1880                         except ValueError: # parsing prefix as integer fails
1881                                 self._download_n_results(query, 1)
1882                                 return
1883
1884         def _download_n_results(self, query, n):
1885                 """Downloads a specified number of results for a query"""
1886
1887                 video_ids = []
1888                 already_seen = set()
1889                 pagenum = 1
1890
1891                 while True:
1892                         self.report_download_page(query, pagenum)
1893                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1894                         request = urllib2.Request(result_url)
1895                         try:
1896                                 page = urllib2.urlopen(request).read()
1897                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1898                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1899                                 return
1900
1901                         # Extract video identifiers
1902                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1903                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1904                                 if video_id not in already_seen:
1905                                         video_ids.append(video_id)
1906                                         already_seen.add(video_id)
1907                                         if len(video_ids) == n:
1908                                                 # Specified n videos reached
1909                                                 for id in video_ids:
1910                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1911                                                 return
1912
1913                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1914                                 for id in video_ids:
1915                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1916                                 return
1917
1918                         pagenum = pagenum + 1
1919
1920 class GoogleSearchIE(InfoExtractor):
1921         """Information Extractor for Google Video search queries."""
1922         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1923         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1924         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1925         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1926         _google_ie = None
1927         _max_google_results = 1000
1928
1929         def __init__(self, google_ie, downloader=None):
1930                 InfoExtractor.__init__(self, downloader)
1931                 self._google_ie = google_ie
1932
1933         @staticmethod
1934         def suitable(url):
1935                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1936
1937         def report_download_page(self, query, pagenum):
1938                 """Report attempt to download playlist page with given number."""
1939                 query = query.decode(preferredencoding())
1940                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1941
1942         def _real_initialize(self):
1943                 self._google_ie.initialize()
1944
1945         def _real_extract(self, query):
1946                 mobj = re.match(self._VALID_QUERY, query)
1947                 if mobj is None:
1948                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1949                         return
1950
1951                 prefix, query = query.split(':')
1952                 prefix = prefix[8:]
1953                 query  = query.encode('utf-8')
1954                 if prefix == '':
1955                         self._download_n_results(query, 1)
1956                         return
1957                 elif prefix == 'all':
1958                         self._download_n_results(query, self._max_google_results)
1959                         return
1960                 else:
1961                         try:
1962                                 n = long(prefix)
1963                                 if n <= 0:
1964                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1965                                         return
1966                                 elif n > self._max_google_results:
1967                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1968                                         n = self._max_google_results
1969                                 self._download_n_results(query, n)
1970                                 return
1971                         except ValueError: # parsing prefix as integer fails
1972                                 self._download_n_results(query, 1)
1973                                 return
1974
1975         def _download_n_results(self, query, n):
1976                 """Downloads a specified number of results for a query"""
1977
1978                 video_ids = []
1979                 already_seen = set()
1980                 pagenum = 1
1981
1982                 while True:
1983                         self.report_download_page(query, pagenum)
1984                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1985                         request = urllib2.Request(result_url)
1986                         try:
1987                                 page = urllib2.urlopen(request).read()
1988                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1989                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1990                                 return
1991
1992                         # Extract video identifiers
1993                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1994                                 video_id = mobj.group(1)
1995                                 if video_id not in already_seen:
1996                                         video_ids.append(video_id)
1997                                         already_seen.add(video_id)
1998                                         if len(video_ids) == n:
1999                                                 # Specified n videos reached
2000                                                 for id in video_ids:
2001                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2002                                                 return
2003
2004                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2005                                 for id in video_ids:
2006                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2007                                 return
2008
2009                         pagenum = pagenum + 1
2010
2011 class YahooSearchIE(InfoExtractor):
2012         """Information Extractor for Yahoo! Video search queries."""
2013         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
2014         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2015         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2016         _MORE_PAGES_INDICATOR = r'\s*Next'
2017         _yahoo_ie = None
2018         _max_yahoo_results = 1000
2019
2020         def __init__(self, yahoo_ie, downloader=None):
2021                 InfoExtractor.__init__(self, downloader)
2022                 self._yahoo_ie = yahoo_ie
2023
2024         @staticmethod
2025         def suitable(url):
2026                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
2027
2028         def report_download_page(self, query, pagenum):
2029                 """Report attempt to download playlist page with given number."""
2030                 query = query.decode(preferredencoding())
2031                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2032
2033         def _real_initialize(self):
2034                 self._yahoo_ie.initialize()
2035
2036         def _real_extract(self, query):
2037                 mobj = re.match(self._VALID_QUERY, query)
2038                 if mobj is None:
2039                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2040                         return
2041
2042                 prefix, query = query.split(':')
2043                 prefix = prefix[8:]
2044                 query  = query.encode('utf-8')
2045                 if prefix == '':
2046                         self._download_n_results(query, 1)
2047                         return
2048                 elif prefix == 'all':
2049                         self._download_n_results(query, self._max_yahoo_results)
2050                         return
2051                 else:
2052                         try:
2053                                 n = long(prefix)
2054                                 if n <= 0:
2055                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2056                                         return
2057                                 elif n > self._max_yahoo_results:
2058                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
2059                                         n = self._max_yahoo_results
2060                                 self._download_n_results(query, n)
2061                                 return
2062                         except ValueError: # parsing prefix as integer fails
2063                                 self._download_n_results(query, 1)
2064                                 return
2065
2066         def _download_n_results(self, query, n):
2067                 """Downloads a specified number of results for a query"""
2068
2069                 video_ids = []
2070                 already_seen = set()
2071                 pagenum = 1
2072
2073                 while True:
2074                         self.report_download_page(query, pagenum)
2075                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2076                         request = urllib2.Request(result_url)
2077                         try:
2078                                 page = urllib2.urlopen(request).read()
2079                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2080                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2081                                 return
2082
2083                         # Extract video identifiers
2084                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2085                                 video_id = mobj.group(1)
2086                                 if video_id not in already_seen:
2087                                         video_ids.append(video_id)
2088                                         already_seen.add(video_id)
2089                                         if len(video_ids) == n:
2090                                                 # Specified n videos reached
2091                                                 for id in video_ids:
2092                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2093                                                 return
2094
2095                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2096                                 for id in video_ids:
2097                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2098                                 return
2099
2100                         pagenum = pagenum + 1
2101
2102 class YoutubePlaylistIE(InfoExtractor):
2103         """Information Extractor for YouTube playlists."""
2104
2105         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2106         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2107         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2108         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2109         _youtube_ie = None
2110
2111         def __init__(self, youtube_ie, downloader=None):
2112                 InfoExtractor.__init__(self, downloader)
2113                 self._youtube_ie = youtube_ie
2114
2115         @staticmethod
2116         def suitable(url):
2117                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2118
2119         def report_download_page(self, playlist_id, pagenum):
2120                 """Report attempt to download playlist page with given number."""
2121                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2122
2123         def _real_initialize(self):
2124                 self._youtube_ie.initialize()
2125
2126         def _real_extract(self, url):
2127                 # Extract playlist id
2128                 mobj = re.match(self._VALID_URL, url)
2129                 if mobj is None:
2130                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2131                         return
2132
2133                 # Single video case
2134                 if mobj.group(3) is not None:
2135                         self._youtube_ie.extract(mobj.group(3))
2136                         return
2137
2138                 # Download playlist pages
2139                 # prefix is 'p' as default for playlists but there are other types that need extra care
2140                 playlist_prefix = mobj.group(1)
2141                 if playlist_prefix == 'a':
2142                         playlist_access = 'artist'
2143                 else:
2144                         playlist_prefix = 'p'
2145                         playlist_access = 'view_play_list'
2146                 playlist_id = mobj.group(2)
2147                 video_ids = []
2148                 pagenum = 1
2149
2150                 while True:
2151                         self.report_download_page(playlist_id, pagenum)
2152                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
2153                         try:
2154                                 page = urllib2.urlopen(request).read()
2155                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2156                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2157                                 return
2158
2159                         # Extract video identifiers
2160                         ids_in_page = []
2161                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2162                                 if mobj.group(1) not in ids_in_page:
2163                                         ids_in_page.append(mobj.group(1))
2164                         video_ids.extend(ids_in_page)
2165
2166                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2167                                 break
2168                         pagenum = pagenum + 1
2169
2170                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2171                 playlistend = self._downloader.params.get('playlistend', -1)
2172                 video_ids = video_ids[playliststart:playlistend]
2173
2174                 for id in video_ids:
2175                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2176                 return
2177
2178 class YoutubeUserIE(InfoExtractor):
2179         """Information Extractor for YouTube users."""
2180
2181         _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2182         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2183         _GDATA_PAGE_SIZE = 50
2184         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2185         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2186         _youtube_ie = None
2187
2188         def __init__(self, youtube_ie, downloader=None):
2189                 InfoExtractor.__init__(self, downloader)
2190                 self._youtube_ie = youtube_ie
2191
2192         @staticmethod
2193         def suitable(url):
2194                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2195
2196         def report_download_page(self, username, start_index):
2197                 """Report attempt to download user page."""
2198                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2199                                            (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2200
2201         def _real_initialize(self):
2202                 self._youtube_ie.initialize()
2203
2204         def _real_extract(self, url):
2205                 # Extract username
2206                 mobj = re.match(self._VALID_URL, url)
2207                 if mobj is None:
2208                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2209                         return
2210
2211                 username = mobj.group(1)
2212
2213                 # Download video ids using YouTube Data API. Result size per
2214                 # query is limited (currently to 50 videos) so we need to query
2215                 # page by page until there are no video ids - it means we got
2216                 # all of them.
2217
2218                 video_ids = []
2219                 pagenum = 0
2220
2221                 while True:
2222                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2223                         self.report_download_page(username, start_index)
2224
2225                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2226
2227                         try:
2228                                 page = urllib2.urlopen(request).read()
2229                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2230                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2231                                 return
2232
2233                         # Extract video identifiers
2234                         ids_in_page = []
2235
2236                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2237                                 if mobj.group(1) not in ids_in_page:
2238                                         ids_in_page.append(mobj.group(1))
2239
2240                         video_ids.extend(ids_in_page)
2241
2242                         # A little optimization - if current page is not
2243                         # "full", ie. does not contain PAGE_SIZE video ids then
2244                         # we can assume that this page is the last one - there
2245                         # are no more ids on further pages - no need to query
2246                         # again.
2247
2248                         if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2249                                 break
2250
2251                         pagenum += 1
2252
2253                 all_ids_count = len(video_ids)
2254                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2255                 playlistend = self._downloader.params.get('playlistend', -1)
2256
2257                 if playlistend == -1:
2258                         video_ids = video_ids[playliststart:]
2259                 else:
2260                         video_ids = video_ids[playliststart:playlistend]
2261                         
2262                 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2263                                            (username, all_ids_count, len(video_ids)))
2264
2265                 for video_id in video_ids:
2266                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2267
2268
2269 class DepositFilesIE(InfoExtractor):
2270         """Information extractor for depositfiles.com"""
2271
2272         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2273
2274         def __init__(self, downloader=None):
2275                 InfoExtractor.__init__(self, downloader)
2276
2277         @staticmethod
2278         def suitable(url):
2279                 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2280
2281         def report_download_webpage(self, file_id):
2282                 """Report webpage download."""
2283                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2284
2285         def report_extraction(self, file_id):
2286                 """Report information extraction."""
2287                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2288
2289         def _real_initialize(self):
2290                 return
2291
2292         def _real_extract(self, url):
2293                 # At this point we have a new file
2294                 self._downloader.increment_downloads()
2295
2296                 file_id = url.split('/')[-1]
2297                 # Rebuild url in english locale
2298                 url = 'http://depositfiles.com/en/files/' + file_id
2299
2300                 # Retrieve file webpage with 'Free download' button pressed
2301                 free_download_indication = { 'gateway_result' : '1' }
2302                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2303                 try:
2304                         self.report_download_webpage(file_id)
2305                         webpage = urllib2.urlopen(request).read()
2306                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2307                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2308                         return
2309
2310                 # Search for the real file URL
2311                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2312                 if (mobj is None) or (mobj.group(1) is None):
2313                         # Try to figure out reason of the error.
2314                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2315                         if (mobj is not None) and (mobj.group(1) is not None):
2316                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2317                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2318                         else:
2319                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2320                         return
2321
2322                 file_url = mobj.group(1)
2323                 file_extension = os.path.splitext(file_url)[1][1:]
2324
2325                 # Search for file title
2326                 mobj = re.search(r'<b title="(.*?)">', webpage)
2327                 if mobj is None:
2328                         self._downloader.trouble(u'ERROR: unable to extract title')
2329                         return
2330                 file_title = mobj.group(1).decode('utf-8')
2331
2332                 try:
2333                         # Process file information
2334                         self._downloader.process_info({
2335                                 'id':           file_id.decode('utf-8'),
2336                                 'url':          file_url.decode('utf-8'),
2337                                 'uploader':     u'NA',
2338                                 'upload_date':  u'NA',
2339                                 'title':        file_title,
2340                                 'stitle':       file_title,
2341                                 'ext':          file_extension.decode('utf-8'),
2342                                 'format':       u'NA',
2343                                 'player_url':   None,
2344                         })
2345                 except UnavailableVideoError, err:
2346                         self._downloader.trouble(u'ERROR: unable to download file')
2347
2348 class FacebookIE(InfoExtractor):
2349         """Information Extractor for Facebook"""
2350
2351         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2352         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2353         _NETRC_MACHINE = 'facebook'
2354         _available_formats = ['highqual', 'lowqual']
2355         _video_extensions = {
2356                 'highqual': 'mp4',
2357                 'lowqual': 'mp4',
2358         }
2359
2360         def __init__(self, downloader=None):
2361                 InfoExtractor.__init__(self, downloader)
2362
2363         @staticmethod
2364         def suitable(url):
2365                 return (re.match(FacebookIE._VALID_URL, url) is not None)
2366
2367         def _reporter(self, message):
2368                 """Add header and report message."""
2369                 self._downloader.to_screen(u'[facebook] %s' % message)
2370
2371         def report_login(self):
2372                 """Report attempt to log in."""
2373                 self._reporter(u'Logging in')
2374
2375         def report_video_webpage_download(self, video_id):
2376                 """Report attempt to download video webpage."""
2377                 self._reporter(u'%s: Downloading video webpage' % video_id)
2378
2379         def report_information_extraction(self, video_id):
2380                 """Report attempt to extract video information."""
2381                 self._reporter(u'%s: Extracting video information' % video_id)
2382
2383         def _parse_page(self, video_webpage):
2384                 """Extract video information from page"""
2385                 # General data
2386                 data = {'title': r'class="video_title datawrap">(.*?)</',
2387                         'description': r'<div class="datawrap">(.*?)</div>',
2388                         'owner': r'\("video_owner_name", "(.*?)"\)',
2389                         'upload_date': r'data-date="(.*?)"',
2390                         'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2391                         }
2392                 video_info = {}
2393                 for piece in data.keys():
2394                         mobj = re.search(data[piece], video_webpage)
2395                         if mobj is not None:
2396                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2397
2398                 # Video urls
2399                 video_urls = {}
2400                 for fmt in self._available_formats:
2401                         mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2402                         if mobj is not None:
2403                                 # URL is in a Javascript segment inside an escaped Unicode format within
2404                                 # the generally utf-8 page
2405                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2406                 video_info['video_urls'] = video_urls
2407
2408                 return video_info
2409
2410         def _real_initialize(self):
2411                 if self._downloader is None:
2412                         return
2413
2414                 useremail = None
2415                 password = None
2416                 downloader_params = self._downloader.params
2417
2418                 # Attempt to use provided username and password or .netrc data
2419                 if downloader_params.get('username', None) is not None:
2420                         useremail = downloader_params['username']
2421                         password = downloader_params['password']
2422                 elif downloader_params.get('usenetrc', False):
2423                         try:
2424                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2425                                 if info is not None:
2426                                         useremail = info[0]
2427                                         password = info[2]
2428                                 else:
2429                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2430                         except (IOError, netrc.NetrcParseError), err:
2431                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2432                                 return
2433
2434                 if useremail is None:
2435                         return
2436
2437                 # Log in
2438                 login_form = {
2439                         'email': useremail,
2440                         'pass': password,
2441                         'login': 'Log+In'
2442                         }
2443                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2444                 try:
2445                         self.report_login()
2446                         login_results = urllib2.urlopen(request).read()
2447                         if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2448                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2449                                 return
2450                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2451                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2452                         return
2453
2454         def _real_extract(self, url):
2455                 mobj = re.match(self._VALID_URL, url)
2456                 if mobj is None:
2457                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2458                         return
2459                 video_id = mobj.group('ID')
2460
2461                 # Get video webpage
2462                 self.report_video_webpage_download(video_id)
2463                 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2464                 try:
2465                         page = urllib2.urlopen(request)
2466                         video_webpage = page.read()
2467                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2468                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2469                         return
2470
2471                 # Start extracting information
2472                 self.report_information_extraction(video_id)
2473
2474                 # Extract information
2475                 video_info = self._parse_page(video_webpage)
2476
2477                 # uploader
2478                 if 'owner' not in video_info:
2479                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2480                         return
2481                 video_uploader = video_info['owner']
2482
2483                 # title
2484                 if 'title' not in video_info:
2485                         self._downloader.trouble(u'ERROR: unable to extract video title')
2486                         return
2487                 video_title = video_info['title']
2488                 video_title = video_title.decode('utf-8')
2489                 video_title = sanitize_title(video_title)
2490
2491                 # simplified title
2492                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2493                 simple_title = simple_title.strip(ur'_')
2494
2495                 # thumbnail image
2496                 if 'thumbnail' not in video_info:
2497                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2498                         video_thumbnail = ''
2499                 else:
2500                         video_thumbnail = video_info['thumbnail']
2501
2502                 # upload date
2503                 upload_date = u'NA'
2504                 if 'upload_date' in video_info:
2505                         upload_time = video_info['upload_date']
2506                         timetuple = email.utils.parsedate_tz(upload_time)
2507                         if timetuple is not None:
2508                                 try:
2509                                         upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2510                                 except:
2511                                         pass
2512
2513                 # description
2514                 video_description = 'No description available.'
2515                 if (self._downloader.params.get('forcedescription', False) and
2516                     'description' in video_info):
2517                         video_description = video_info['description']
2518
2519                 url_map = video_info['video_urls']
2520                 if len(url_map.keys()) > 0:
2521                         # Decide which formats to download
2522                         req_format = self._downloader.params.get('format', None)
2523                         format_limit = self._downloader.params.get('format_limit', None)
2524
2525                         if format_limit is not None and format_limit in self._available_formats:
2526                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
2527                         else:
2528                                 format_list = self._available_formats
2529                         existing_formats = [x for x in format_list if x in url_map]
2530                         if len(existing_formats) == 0:
2531                                 self._downloader.trouble(u'ERROR: no known formats available for video')
2532                                 return
2533                         if req_format is None:
2534                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2535                         elif req_format == '-1':
2536                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2537                         else:
2538                                 # Specific format
2539                                 if req_format not in url_map:
2540                                         self._downloader.trouble(u'ERROR: requested format not available')
2541                                         return
2542                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
2543
2544                 for format_param, video_real_url in video_url_list:
2545
2546                         # At this point we have a new video
2547                         self._downloader.increment_downloads()
2548
2549                         # Extension
2550                         video_extension = self._video_extensions.get(format_param, 'mp4')
2551
2552                         # Find the video URL in fmt_url_map or conn paramters
2553                         try:
2554                                 # Process video information
2555                                 self._downloader.process_info({
2556                                         'id':           video_id.decode('utf-8'),
2557                                         'url':          video_real_url.decode('utf-8'),
2558                                         'uploader':     video_uploader.decode('utf-8'),
2559                                         'upload_date':  upload_date,
2560                                         'title':        video_title,
2561                                         'stitle':       simple_title,
2562                                         'ext':          video_extension.decode('utf-8'),
2563                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
2564                                         'thumbnail':    video_thumbnail.decode('utf-8'),
2565                                         'description':  video_description.decode('utf-8'),
2566                                         'player_url':   None,
2567                                 })
2568                         except UnavailableVideoError, err:
2569                                 self._downloader.trouble(u'\nERROR: unable to download video')
2570
2571 class PostProcessor(object):
2572         """Post Processor class.
2573
2574         PostProcessor objects can be added to downloaders with their
2575         add_post_processor() method. When the downloader has finished a
2576         successful download, it will take its internal chain of PostProcessors
2577         and start calling the run() method on each one of them, first with
2578         an initial argument and then with the returned value of the previous
2579         PostProcessor.
2580
2581         The chain will be stopped if one of them ever returns None or the end
2582         of the chain is reached.
2583
2584         PostProcessor objects follow a "mutual registration" process similar
2585         to InfoExtractor objects.
2586         """
2587
2588         _downloader = None
2589
2590         def __init__(self, downloader=None):
2591                 self._downloader = downloader
2592
2593         def set_downloader(self, downloader):
2594                 """Sets the downloader for this PP."""
2595                 self._downloader = downloader
2596
2597         def run(self, information):
2598                 """Run the PostProcessor.
2599
2600                 The "information" argument is a dictionary like the ones
2601                 composed by InfoExtractors. The only difference is that this
2602                 one has an extra field called "filepath" that points to the
2603                 downloaded file.
2604
2605                 When this method returns None, the postprocessing chain is
2606                 stopped. However, this method may return an information
2607                 dictionary that will be passed to the next postprocessing
2608                 object in the chain. It can be the one it received after
2609                 changing some fields.
2610
2611                 In addition, this method may raise a PostProcessingError
2612                 exception that will be taken into account by the downloader
2613                 it was called from.
2614                 """
2615                 return information # by default, do nothing
2616
2617 class FFmpegExtractAudioPP(PostProcessor):
2618
2619         def __init__(self, downloader=None, preferredcodec=None):
2620                 PostProcessor.__init__(self, downloader)
2621                 if preferredcodec is None:
2622                         preferredcodec = 'best'
2623                 self._preferredcodec = preferredcodec
2624
2625         @staticmethod
2626         def get_audio_codec(path):
2627                 try:
2628                         cmd = ['ffprobe', '-show_streams', '--', path]
2629                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
2630                         output = handle.communicate()[0]
2631                         if handle.wait() != 0:
2632                                 return None
2633                 except (IOError, OSError):
2634                         return None
2635                 audio_codec = None
2636                 for line in output.split('\n'):
2637                         if line.startswith('codec_name='):
2638                                 audio_codec = line.split('=')[1].strip()
2639                         elif line.strip() == 'codec_type=audio' and audio_codec is not None:
2640                                 return audio_codec
2641                 return None
2642
2643         @staticmethod
2644         def run_ffmpeg(path, out_path, codec, more_opts):
2645                 try:
2646                         cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
2647                         ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
2648                         return (ret == 0)
2649                 except (IOError, OSError):
2650                         return False
2651
2652         def run(self, information):
2653                 path = information['filepath']
2654
2655                 filecodec = self.get_audio_codec(path)
2656                 if filecodec is None:
2657                         self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
2658                         return None
2659
2660                 more_opts = []
2661                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
2662                         if filecodec == 'aac' or filecodec == 'mp3':
2663                                 # Lossless if possible
2664                                 acodec = 'copy'
2665                                 extension = filecodec
2666                                 if filecodec == 'aac':
2667                                         more_opts = ['-f', 'adts']
2668                         else:
2669                                 # MP3 otherwise.
2670                                 acodec = 'libmp3lame'
2671                                 extension = 'mp3'
2672                                 more_opts = ['-ab', '128k']
2673                 else:
2674                         # We convert the audio (lossy)
2675                         acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
2676                         extension = self._preferredcodec
2677                         more_opts = ['-ab', '128k']
2678                         if self._preferredcodec == 'aac':
2679                                 more_opts += ['-f', 'adts']
2680
2681                 (prefix, ext) = os.path.splitext(path)
2682                 new_path = prefix + '.' + extension
2683                 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
2684                 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
2685
2686                 if not status:
2687                         self._downloader.to_stderr(u'WARNING: error running ffmpeg')
2688                         return None
2689
2690                 try:
2691                         os.remove(path)
2692                 except (IOError, OSError):
2693                         self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
2694                         return None
2695
2696                 information['filepath'] = new_path
2697                 return information
2698
2699 ### MAIN PROGRAM ###
2700 if __name__ == '__main__':
2701         try:
2702                 # Modules needed only when running the main program
2703                 import getpass
2704                 import optparse
2705
2706                 # Function to update the program file with the latest version from the repository.
2707                 def update_self(downloader, filename):
2708                         # Note: downloader only used for options
2709                         if not os.access(filename, os.W_OK):
2710                                 sys.exit('ERROR: no write permissions on %s' % filename)
2711
2712                         downloader.to_screen('Updating to latest stable version...')
2713                         try:
2714                                 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2715                                 latest_version = urllib.urlopen(latest_url).read().strip()
2716                                 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2717                                 newcontent = urllib.urlopen(prog_url).read()
2718                         except (IOError, OSError), err:
2719                                 sys.exit('ERROR: unable to download latest version')
2720                         try:
2721                                 stream = open(filename, 'w')
2722                                 stream.write(newcontent)
2723                                 stream.close()
2724                         except (IOError, OSError), err:
2725                                 sys.exit('ERROR: unable to overwrite current version')
2726                         downloader.to_screen('Updated to version %s' % latest_version)
2727
2728                 # Parse command line
2729                 parser = optparse.OptionParser(
2730                         usage='Usage: %prog [options] url...',
2731                         version='2011.03.29',
2732                         conflict_handler='resolve',
2733                 )
2734
2735                 parser.add_option('-h', '--help',
2736                                 action='help', help='print this help text and exit')
2737                 parser.add_option('-v', '--version',
2738                                 action='version', help='print program version and exit')
2739                 parser.add_option('-U', '--update',
2740                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2741                 parser.add_option('-i', '--ignore-errors',
2742                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2743                 parser.add_option('-r', '--rate-limit',
2744                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2745                 parser.add_option('-R', '--retries',
2746                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2747                 parser.add_option('--playlist-start',
2748                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2749                 parser.add_option('--playlist-end',
2750                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2751                 parser.add_option('--dump-user-agent',
2752                                 action='store_true', dest='dump_user_agent',
2753                                 help='display the current browser identification', default=False)
2754
2755                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2756                 authentication.add_option('-u', '--username',
2757                                 dest='username', metavar='USERNAME', help='account username')
2758                 authentication.add_option('-p', '--password',
2759                                 dest='password', metavar='PASSWORD', help='account password')
2760                 authentication.add_option('-n', '--netrc',
2761                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2762                 parser.add_option_group(authentication)
2763
2764                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2765                 video_format.add_option('-f', '--format',
2766                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2767                 video_format.add_option('--all-formats',
2768                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2769                 video_format.add_option('--max-quality',
2770                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2771                 parser.add_option_group(video_format)
2772
2773                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2774                 verbosity.add_option('-q', '--quiet',
2775                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2776                 verbosity.add_option('-s', '--simulate',
2777                                 action='store_true', dest='simulate', help='do not download video', default=False)
2778                 verbosity.add_option('-g', '--get-url',
2779                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2780                 verbosity.add_option('-e', '--get-title',
2781                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2782                 verbosity.add_option('--get-thumbnail',
2783                                 action='store_true', dest='getthumbnail',
2784                                 help='simulate, quiet but print thumbnail URL', default=False)
2785                 verbosity.add_option('--get-description',
2786                                 action='store_true', dest='getdescription',
2787                                 help='simulate, quiet but print video description', default=False)
2788                 verbosity.add_option('--get-filename',
2789                                 action='store_true', dest='getfilename',
2790                                 help='simulate, quiet but print output filename', default=False)
2791                 verbosity.add_option('--no-progress',
2792                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2793                 verbosity.add_option('--console-title',
2794                                 action='store_true', dest='consoletitle',
2795                                 help='display progress in console titlebar', default=False)
2796                 parser.add_option_group(verbosity)
2797
2798                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2799                 filesystem.add_option('-t', '--title',
2800                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2801                 filesystem.add_option('-l', '--literal',
2802                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2803                 filesystem.add_option('-A', '--auto-number',
2804                                 action='store_true', dest='autonumber',
2805                                 help='number downloaded files starting from 00000', default=False)
2806                 filesystem.add_option('-o', '--output',
2807                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2808                 filesystem.add_option('-a', '--batch-file',
2809                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2810                 filesystem.add_option('-w', '--no-overwrites',
2811                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2812                 filesystem.add_option('-c', '--continue',
2813                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2814                 filesystem.add_option('--cookies',
2815                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2816                 filesystem.add_option('--no-part',
2817                                 action='store_true', dest='nopart', help='do not use .part files', default=False)
2818                 filesystem.add_option('--no-mtime',
2819                                 action='store_false', dest='updatetime',
2820                                 help='do not use the Last-modified header to set the file modification time', default=True)
2821                 parser.add_option_group(filesystem)
2822
2823                 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
2824                 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
2825                                 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
2826                 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
2827                                 help='"best", "aac" or "mp3"; best by default')
2828                 parser.add_option_group(postproc)
2829
2830                 (opts, args) = parser.parse_args()
2831
2832                 # Open appropriate CookieJar
2833                 if opts.cookiefile is None:
2834                         jar = cookielib.CookieJar()
2835                 else:
2836                         try:
2837                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2838                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2839                                         jar.load()
2840                         except (IOError, OSError), err:
2841                                 sys.exit(u'ERROR: unable to open cookie file')
2842
2843                 # Dump user agent
2844                 if opts.dump_user_agent:
2845                         print std_headers['User-Agent']
2846                         sys.exit(0)
2847
2848                 # General configuration
2849                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2850                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
2851                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2852
2853                 # Batch file verification
2854                 batchurls = []
2855                 if opts.batchfile is not None:
2856                         try:
2857                                 if opts.batchfile == '-':
2858                                         batchfd = sys.stdin
2859                                 else:
2860                                         batchfd = open(opts.batchfile, 'r')
2861                                 batchurls = batchfd.readlines()
2862                                 batchurls = [x.strip() for x in batchurls]
2863                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2864                         except IOError:
2865                                 sys.exit(u'ERROR: batch file could not be read')
2866                 all_urls = batchurls + args
2867
2868                 # Conflicting, missing and erroneous options
2869                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2870                         parser.error(u'using .netrc conflicts with giving username/password')
2871                 if opts.password is not None and opts.username is None:
2872                         parser.error(u'account username missing')
2873                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2874                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2875                 if opts.usetitle and opts.useliteral:
2876                         parser.error(u'using title conflicts with using literal title')
2877                 if opts.username is not None and opts.password is None:
2878                         opts.password = getpass.getpass(u'Type account password and press return:')
2879                 if opts.ratelimit is not None:
2880                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2881                         if numeric_limit is None:
2882                                 parser.error(u'invalid rate limit specified')
2883                         opts.ratelimit = numeric_limit
2884                 if opts.retries is not None:
2885                         try:
2886                                 opts.retries = long(opts.retries)
2887                         except (TypeError, ValueError), err:
2888                                 parser.error(u'invalid retry count specified')
2889                 try:
2890                         opts.playliststart = long(opts.playliststart)
2891                         if opts.playliststart <= 0:
2892                                 raise ValueError
2893                 except (TypeError, ValueError), err:
2894                         parser.error(u'invalid playlist start number specified')
2895                 try:
2896                         opts.playlistend = long(opts.playlistend)
2897                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2898                                 raise ValueError
2899                 except (TypeError, ValueError), err:
2900                         parser.error(u'invalid playlist end number specified')
2901                 if opts.extractaudio:
2902                         if opts.audioformat not in ['best', 'aac', 'mp3']:
2903                                 parser.error(u'invalid audio format specified')
2904
2905                 # Information extractors
2906                 youtube_ie = YoutubeIE()
2907                 metacafe_ie = MetacafeIE(youtube_ie)
2908                 dailymotion_ie = DailymotionIE()
2909                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2910                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2911                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2912                 google_ie = GoogleIE()
2913                 google_search_ie = GoogleSearchIE(google_ie)
2914                 photobucket_ie = PhotobucketIE()
2915                 yahoo_ie = YahooIE()
2916                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2917                 deposit_files_ie = DepositFilesIE()
2918                 facebook_ie = FacebookIE()
2919                 generic_ie = GenericIE()
2920
2921                 # File downloader
2922                 fd = FileDownloader({
2923                         'usenetrc': opts.usenetrc,
2924                         'username': opts.username,
2925                         'password': opts.password,
2926                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2927                         'forceurl': opts.geturl,
2928                         'forcetitle': opts.gettitle,
2929                         'forcethumbnail': opts.getthumbnail,
2930                         'forcedescription': opts.getdescription,
2931                         'forcefilename': opts.getfilename,
2932                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2933                         'format': opts.format,
2934                         'format_limit': opts.format_limit,
2935                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2936                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2937                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2938                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2939                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2940                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2941                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2942                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2943                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2944                                 or u'%(id)s.%(ext)s'),
2945                         'ignoreerrors': opts.ignoreerrors,
2946                         'ratelimit': opts.ratelimit,
2947                         'nooverwrites': opts.nooverwrites,
2948                         'retries': opts.retries,
2949                         'continuedl': opts.continue_dl,
2950                         'noprogress': opts.noprogress,
2951                         'playliststart': opts.playliststart,
2952                         'playlistend': opts.playlistend,
2953                         'logtostderr': opts.outtmpl == '-',
2954                         'consoletitle': opts.consoletitle,
2955                         'nopart': opts.nopart,
2956                         'updatetime': opts.updatetime,
2957                         })
2958                 fd.add_info_extractor(youtube_search_ie)
2959                 fd.add_info_extractor(youtube_pl_ie)
2960                 fd.add_info_extractor(youtube_user_ie)
2961                 fd.add_info_extractor(metacafe_ie)
2962                 fd.add_info_extractor(dailymotion_ie)
2963                 fd.add_info_extractor(youtube_ie)
2964                 fd.add_info_extractor(google_ie)
2965                 fd.add_info_extractor(google_search_ie)
2966                 fd.add_info_extractor(photobucket_ie)
2967                 fd.add_info_extractor(yahoo_ie)
2968                 fd.add_info_extractor(yahoo_search_ie)
2969                 fd.add_info_extractor(deposit_files_ie)
2970                 fd.add_info_extractor(facebook_ie)
2971
2972                 # This must come last since it's the
2973                 # fallback if none of the others work
2974                 fd.add_info_extractor(generic_ie)
2975
2976                 # PostProcessors
2977                 if opts.extractaudio:
2978                         fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
2979
2980                 # Update version
2981                 if opts.update_self:
2982                         update_self(fd, sys.argv[0])
2983
2984                 # Maybe do nothing
2985                 if len(all_urls) < 1:
2986                         if not opts.update_self:
2987                                 parser.error(u'you must provide at least one URL')
2988                         else:
2989                                 sys.exit()
2990                 retcode = fd.download(all_urls)
2991
2992                 # Dump cookie jar if requested
2993                 if opts.cookiefile is not None:
2994                         try:
2995                                 jar.save()
2996                         except (IOError, OSError), err:
2997                                 sys.exit(u'ERROR: unable to save cookie jar')
2998
2999                 sys.exit(retcode)
3000
3001         except DownloadError:
3002                 sys.exit(1)
3003         except SameFileError:
3004                 sys.exit(u'ERROR: fixed output name but more than one file to download')
3005         except KeyboardInterrupt:
3006                 sys.exit(u'\nERROR: Interrupted by user')