fixed PEP8 whitespace issues
[youtube-dl.git] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # Author: Vasyl' Vavrychuk
7 # Author: Witold Baryluk
8 # Author: PaweÅ‚ Paprota
9 # Author: Gergely Imreh
10 # License: Public domain code
11 import cookielib
12 import ctypes
13 import datetime
14 import email.utils
15 import gzip
16 import htmlentitydefs
17 import httplib
18 import locale
19 import math
20 import netrc
21 import os
22 import os.path
23 import re
24 import socket
25 import string
26 import StringIO
27 import subprocess
28 import sys
29 import time
30 import urllib
31 import urllib2
32 import zlib
33
34 # parse_qs was moved from the cgi module to the urlparse module recently.
35 try:
36         from urlparse import parse_qs
37 except ImportError:
38         from cgi import parse_qs
39
40 std_headers = {
41         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
42         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
43         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
44         'Accept-Encoding': 'gzip, deflate',
45         'Accept-Language': 'en-us,en;q=0.5',
46 }
47
48 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
49
50
51 def preferredencoding():
52         """Get preferred encoding.
53
54         Returns the best encoding scheme for the system, based on
55         locale.getpreferredencoding() and some further tweaks.
56         """
57         def yield_preferredencoding():
58                 try:
59                         pref = locale.getpreferredencoding()
60                         u'TEST'.encode(pref)
61                 except:
62                         pref = 'UTF-8'
63                 while True:
64                         yield pref
65         return yield_preferredencoding().next()
66
67
68 def htmlentity_transform(matchobj):
69         """Transforms an HTML entity to a Unicode character.
70
71         This function receives a match object and is intended to be used with
72         the re.sub() function.
73         """
74         entity = matchobj.group(1)
75
76         # Known non-numeric HTML entity
77         if entity in htmlentitydefs.name2codepoint:
78                 return unichr(htmlentitydefs.name2codepoint[entity])
79
80         # Unicode character
81         mobj = re.match(ur'(?u)#(x?\d+)', entity)
82         if mobj is not None:
83                 numstr = mobj.group(1)
84                 if numstr.startswith(u'x'):
85                         base = 16
86                         numstr = u'0%s' % numstr
87                 else:
88                         base = 10
89                 return unichr(long(numstr, base))
90
91         # Unknown entity in name, return its literal representation
92         return (u'&%s;' % entity)
93
94
95 def sanitize_title(utitle):
96         """Sanitizes a video title so it could be used as part of a filename."""
97         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
98         return utitle.replace(unicode(os.sep), u'%')
99
100
101 def sanitize_open(filename, open_mode):
102         """Try to open the given filename, and slightly tweak it if this fails.
103
104         Attempts to open the given filename. If this fails, it tries to change
105         the filename slightly, step by step, until it's either able to open it
106         or it fails and raises a final exception, like the standard open()
107         function.
108
109         It returns the tuple (stream, definitive_file_name).
110         """
111         try:
112                 if filename == u'-':
113                         if sys.platform == 'win32':
114                                 import msvcrt
115                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
116                         return (sys.stdout, filename)
117                 stream = open(filename, open_mode)
118                 return (stream, filename)
119         except (IOError, OSError), err:
120                 # In case of error, try to remove win32 forbidden chars
121                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
122
123                 # An exception here should be caught in the caller
124                 stream = open(filename, open_mode)
125                 return (stream, filename)
126
127
128 def timeconvert(timestr):
129         """Convert RFC 2822 defined time string into system timestamp"""
130         timestamp = None
131         timetuple = email.utils.parsedate_tz(timestr)
132         if timetuple is not None:
133                 timestamp = email.utils.mktime_tz(timetuple)
134         return timestamp
135
136
137 class DownloadError(Exception):
138         """Download Error exception.
139
140         This exception may be thrown by FileDownloader objects if they are not
141         configured to continue on errors. They will contain the appropriate
142         error message.
143         """
144         pass
145
146
147 class SameFileError(Exception):
148         """Same File exception.
149
150         This exception will be thrown by FileDownloader objects if they detect
151         multiple files would have to be downloaded to the same file on disk.
152         """
153         pass
154
155
156 class PostProcessingError(Exception):
157         """Post Processing exception.
158
159         This exception may be raised by PostProcessor's .run() method to
160         indicate an error in the postprocessing task.
161         """
162         pass
163
164
165 class UnavailableVideoError(Exception):
166         """Unavailable Format exception.
167
168         This exception will be thrown when a video is requested
169         in a format that is not available for that video.
170         """
171         pass
172
173
174 class ContentTooShortError(Exception):
175         """Content Too Short exception.
176
177         This exception may be raised by FileDownloader objects when a file they
178         download is too small for what the server announced first, indicating
179         the connection was probably interrupted.
180         """
181         # Both in bytes
182         downloaded = None
183         expected = None
184
185         def __init__(self, downloaded, expected):
186                 self.downloaded = downloaded
187                 self.expected = expected
188
189
190 class YoutubeDLHandler(urllib2.HTTPHandler):
191         """Handler for HTTP requests and responses.
192
193         This class, when installed with an OpenerDirector, automatically adds
194         the standard headers to every HTTP request and handles gzipped and
195         deflated responses from web servers. If compression is to be avoided in
196         a particular request, the original request in the program code only has
197         to include the HTTP header "Youtubedl-No-Compression", which will be
198         removed before making the real request.
199
200         Part of this code was copied from:
201
202         http://techknack.net/python-urllib2-handlers/
203
204         Andrew Rowls, the author of that code, agreed to release it to the
205         public domain.
206         """
207
208         @staticmethod
209         def deflate(data):
210                 try:
211                         return zlib.decompress(data, -zlib.MAX_WBITS)
212                 except zlib.error:
213                         return zlib.decompress(data)
214
215         @staticmethod
216         def addinfourl_wrapper(stream, headers, url, code):
217                 if hasattr(urllib2.addinfourl, 'getcode'):
218                         return urllib2.addinfourl(stream, headers, url, code)
219                 ret = urllib2.addinfourl(stream, headers, url)
220                 ret.code = code
221                 return ret
222
223         def http_request(self, req):
224                 for h in std_headers:
225                         if h in req.headers:
226                                 del req.headers[h]
227                         req.add_header(h, std_headers[h])
228                 if 'Youtubedl-no-compression' in req.headers:
229                         if 'Accept-encoding' in req.headers:
230                                 del req.headers['Accept-encoding']
231                         del req.headers['Youtubedl-no-compression']
232                 return req
233
234         def http_response(self, req, resp):
235                 old_resp = resp
236                 # gzip
237                 if resp.headers.get('Content-encoding', '') == 'gzip':
238                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
239                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
240                         resp.msg = old_resp.msg
241                 # deflate
242                 if resp.headers.get('Content-encoding', '') == 'deflate':
243                         gz = StringIO.StringIO(self.deflate(resp.read()))
244                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
245                         resp.msg = old_resp.msg
246                 return resp
247
248
249 class FileDownloader(object):
250         """File Downloader class.
251
252         File downloader objects are the ones responsible of downloading the
253         actual video file and writing it to disk if the user has requested
254         it, among some other tasks. In most cases there should be one per
255         program. As, given a video URL, the downloader doesn't know how to
256         extract all the needed information, task that InfoExtractors do, it
257         has to pass the URL to one of them.
258
259         For this, file downloader objects have a method that allows
260         InfoExtractors to be registered in a given order. When it is passed
261         a URL, the file downloader handles it to the first InfoExtractor it
262         finds that reports being able to handle it. The InfoExtractor extracts
263         all the information about the video or videos the URL refers to, and
264         asks the FileDownloader to process the video information, possibly
265         downloading the video.
266
267         File downloaders accept a lot of parameters. In order not to saturate
268         the object constructor with arguments, it receives a dictionary of
269         options instead. These options are available through the params
270         attribute for the InfoExtractors to use. The FileDownloader also
271         registers itself as the downloader in charge for the InfoExtractors
272         that are added to it, so this is a "mutual registration".
273
274         Available options:
275
276         username:         Username for authentication purposes.
277         password:         Password for authentication purposes.
278         usenetrc:         Use netrc for authentication instead.
279         quiet:            Do not print messages to stdout.
280         forceurl:         Force printing final URL.
281         forcetitle:       Force printing title.
282         forcethumbnail:   Force printing thumbnail URL.
283         forcedescription: Force printing description.
284         forcefilename:    Force printing final filename.
285         simulate:         Do not download the video files.
286         format:           Video format code.
287         format_limit:     Highest quality format to try.
288         outtmpl:          Template for output names.
289         ignoreerrors:     Do not stop on download errors.
290         ratelimit:        Download speed limit, in bytes/sec.
291         nooverwrites:     Prevent overwriting files.
292         retries:          Number of times to retry for HTTP error 5xx
293         continuedl:       Try to continue downloads if possible.
294         noprogress:       Do not print the progress bar.
295         playliststart:    Playlist item to start at.
296         playlistend:      Playlist item to end at.
297         logtostderr:      Log messages to stderr instead of stdout.
298         consoletitle:     Display progress in console window's titlebar.
299         nopart:           Do not use temporary .part files.
300         updatetime:       Use the Last-modified header to set output file timestamps.
301         """
302
303         params = None
304         _ies = []
305         _pps = []
306         _download_retcode = None
307         _num_downloads = None
308         _screen_file = None
309
310         def __init__(self, params):
311                 """Create a FileDownloader object with the given options."""
312                 self._ies = []
313                 self._pps = []
314                 self._download_retcode = 0
315                 self._num_downloads = 0
316                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
317                 self.params = params
318
319         @staticmethod
320         def pmkdir(filename):
321                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
322                 components = filename.split(os.sep)
323                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
324                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
325                 for dir in aggregate:
326                         if not os.path.exists(dir):
327                                 os.mkdir(dir)
328
329         @staticmethod
330         def format_bytes(bytes):
331                 if bytes is None:
332                         return 'N/A'
333                 if type(bytes) is str:
334                         bytes = float(bytes)
335                 if bytes == 0.0:
336                         exponent = 0
337                 else:
338                         exponent = long(math.log(bytes, 1024.0))
339                 suffix = 'bkMGTPEZY'[exponent]
340                 converted = float(bytes) / float(1024 ** exponent)
341                 return '%.2f%s' % (converted, suffix)
342
343         @staticmethod
344         def calc_percent(byte_counter, data_len):
345                 if data_len is None:
346                         return '---.-%'
347                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
348
349         @staticmethod
350         def calc_eta(start, now, total, current):
351                 if total is None:
352                         return '--:--'
353                 dif = now - start
354                 if current == 0 or dif < 0.001: # One millisecond
355                         return '--:--'
356                 rate = float(current) / dif
357                 eta = long((float(total) - float(current)) / rate)
358                 (eta_mins, eta_secs) = divmod(eta, 60)
359                 if eta_mins > 99:
360                         return '--:--'
361                 return '%02d:%02d' % (eta_mins, eta_secs)
362
363         @staticmethod
364         def calc_speed(start, now, bytes):
365                 dif = now - start
366                 if bytes == 0 or dif < 0.001: # One millisecond
367                         return '%10s' % '---b/s'
368                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
369
370         @staticmethod
371         def best_block_size(elapsed_time, bytes):
372                 new_min = max(bytes / 2.0, 1.0)
373                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
374                 if elapsed_time < 0.001:
375                         return long(new_max)
376                 rate = bytes / elapsed_time
377                 if rate > new_max:
378                         return long(new_max)
379                 if rate < new_min:
380                         return long(new_min)
381                 return long(rate)
382
383         @staticmethod
384         def parse_bytes(bytestr):
385                 """Parse a string indicating a byte quantity into a long integer."""
386                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
387                 if matchobj is None:
388                         return None
389                 number = float(matchobj.group(1))
390                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
391                 return long(round(number * multiplier))
392
393         def add_info_extractor(self, ie):
394                 """Add an InfoExtractor object to the end of the list."""
395                 self._ies.append(ie)
396                 ie.set_downloader(self)
397
398         def add_post_processor(self, pp):
399                 """Add a PostProcessor object to the end of the chain."""
400                 self._pps.append(pp)
401                 pp.set_downloader(self)
402
403         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
404                 """Print message to stdout if not in quiet mode."""
405                 try:
406                         if not self.params.get('quiet', False):
407                                 terminator = [u'\n', u''][skip_eol]
408                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
409                         self._screen_file.flush()
410                 except (UnicodeEncodeError), err:
411                         if not ignore_encoding_errors:
412                                 raise
413
414         def to_stderr(self, message):
415                 """Print message to stderr."""
416                 print >>sys.stderr, message.encode(preferredencoding())
417
418         def to_cons_title(self, message):
419                 """Set console/terminal window title to message."""
420                 if not self.params.get('consoletitle', False):
421                         return
422                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
423                         # c_wchar_p() might not be necessary if `message` is
424                         # already of type unicode()
425                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
426                 elif 'TERM' in os.environ:
427                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
428
429         def fixed_template(self):
430                 """Checks if the output template is fixed."""
431                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
432
433         def trouble(self, message=None):
434                 """Determine action to take when a download problem appears.
435
436                 Depending on if the downloader has been configured to ignore
437                 download errors or not, this method may throw an exception or
438                 not when errors are found, after printing the message.
439                 """
440                 if message is not None:
441                         self.to_stderr(message)
442                 if not self.params.get('ignoreerrors', False):
443                         raise DownloadError(message)
444                 self._download_retcode = 1
445
446         def slow_down(self, start_time, byte_counter):
447                 """Sleep if the download speed is over the rate limit."""
448                 rate_limit = self.params.get('ratelimit', None)
449                 if rate_limit is None or byte_counter == 0:
450                         return
451                 now = time.time()
452                 elapsed = now - start_time
453                 if elapsed <= 0.0:
454                         return
455                 speed = float(byte_counter) / elapsed
456                 if speed > rate_limit:
457                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
458
459         def temp_name(self, filename):
460                 """Returns a temporary filename for the given filename."""
461                 if self.params.get('nopart', False) or filename == u'-' or \
462                                 (os.path.exists(filename) and not os.path.isfile(filename)):
463                         return filename
464                 return filename + u'.part'
465
466         def undo_temp_name(self, filename):
467                 if filename.endswith(u'.part'):
468                         return filename[:-len(u'.part')]
469                 return filename
470
471         def try_rename(self, old_filename, new_filename):
472                 try:
473                         if old_filename == new_filename:
474                                 return
475                         os.rename(old_filename, new_filename)
476                 except (IOError, OSError), err:
477                         self.trouble(u'ERROR: unable to rename file')
478
479         def try_utime(self, filename, last_modified_hdr):
480                 """Try to set the last-modified time of the given file."""
481                 if last_modified_hdr is None:
482                         return
483                 if not os.path.isfile(filename):
484                         return
485                 timestr = last_modified_hdr
486                 if timestr is None:
487                         return
488                 filetime = timeconvert(timestr)
489                 if filetime is None:
490                         return
491                 try:
492                         os.utime(filename, (time.time(), filetime))
493                 except:
494                         pass
495
496         def report_destination(self, filename):
497                 """Report destination filename."""
498                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
499
500         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
501                 """Report download progress."""
502                 if self.params.get('noprogress', False):
503                         return
504                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
505                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
506                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
507                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
508
509         def report_resuming_byte(self, resume_len):
510                 """Report attempt to resume at given byte."""
511                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
512
513         def report_retry(self, count, retries):
514                 """Report retry in case of HTTP error 5xx"""
515                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
516
517         def report_file_already_downloaded(self, file_name):
518                 """Report file has already been fully downloaded."""
519                 try:
520                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
521                 except (UnicodeEncodeError), err:
522                         self.to_screen(u'[download] The file has already been downloaded')
523
524         def report_unable_to_resume(self):
525                 """Report it was impossible to resume download."""
526                 self.to_screen(u'[download] Unable to resume')
527
528         def report_finish(self):
529                 """Report download finished."""
530                 if self.params.get('noprogress', False):
531                         self.to_screen(u'[download] Download completed')
532                 else:
533                         self.to_screen(u'')
534
535         def increment_downloads(self):
536                 """Increment the ordinal that assigns a number to each file."""
537                 self._num_downloads += 1
538
539         def prepare_filename(self, info_dict):
540                 """Generate the output filename."""
541                 try:
542                         template_dict = dict(info_dict)
543                         template_dict['epoch'] = unicode(long(time.time()))
544                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
545                         filename = self.params['outtmpl'] % template_dict
546                         return filename
547                 except (ValueError, KeyError), err:
548                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
549                         return None
550
551         def process_info(self, info_dict):
552                 """Process a single dictionary returned by an InfoExtractor."""
553                 filename = self.prepare_filename(info_dict)
554                 # Do nothing else if in simulate mode
555                 if self.params.get('simulate', False):
556                         # Forced printings
557                         if self.params.get('forcetitle', False):
558                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
559                         if self.params.get('forceurl', False):
560                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
561                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
562                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
563                         if self.params.get('forcedescription', False) and 'description' in info_dict:
564                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
565                         if self.params.get('forcefilename', False) and filename is not None:
566                                 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
567
568                         return
569
570                 if filename is None:
571                         return
572                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
573                         self.to_stderr(u'WARNING: file exists and will be skipped')
574                         return
575
576                 try:
577                         self.pmkdir(filename)
578                 except (OSError, IOError), err:
579                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
580                         return
581
582                 try:
583                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
584                 except (OSError, IOError), err:
585                         raise UnavailableVideoError
586                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
587                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
588                         return
589                 except (ContentTooShortError, ), err:
590                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
591                         return
592
593                 if success:
594                         try:
595                                 self.post_process(filename, info_dict)
596                         except (PostProcessingError), err:
597                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
598                                 return
599
600         def download(self, url_list):
601                 """Download a given list of URLs."""
602                 if len(url_list) > 1 and self.fixed_template():
603                         raise SameFileError(self.params['outtmpl'])
604
605                 for url in url_list:
606                         suitable_found = False
607                         for ie in self._ies:
608                                 # Go to next InfoExtractor if not suitable
609                                 if not ie.suitable(url):
610                                         continue
611
612                                 # Suitable InfoExtractor found
613                                 suitable_found = True
614
615                                 # Extract information from URL and process it
616                                 ie.extract(url)
617
618                                 # Suitable InfoExtractor had been found; go to next URL
619                                 break
620
621                         if not suitable_found:
622                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
623
624                 return self._download_retcode
625
626         def post_process(self, filename, ie_info):
627                 """Run the postprocessing chain on the given file."""
628                 info = dict(ie_info)
629                 info['filepath'] = filename
630                 for pp in self._pps:
631                         info = pp.run(info)
632                         if info is None:
633                                 break
634
635         def _download_with_rtmpdump(self, filename, url, player_url):
636                 self.report_destination(filename)
637                 tmpfilename = self.temp_name(filename)
638
639                 # Check for rtmpdump first
640                 try:
641                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
642                 except (OSError, IOError):
643                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
644                         return False
645
646                 # Download using rtmpdump. rtmpdump returns exit code 2 when
647                 # the connection was interrumpted and resuming appears to be
648                 # possible. This is part of rtmpdump's normal usage, AFAIK.
649                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
650                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
651                 while retval == 2 or retval == 1:
652                         prevsize = os.path.getsize(tmpfilename)
653                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
654                         time.sleep(5.0) # This seems to be needed
655                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
656                         cursize = os.path.getsize(tmpfilename)
657                         if prevsize == cursize and retval == 1:
658                                 break
659                 if retval == 0:
660                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
661                         self.try_rename(tmpfilename, filename)
662                         return True
663                 else:
664                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
665                         return False
666
667         def _do_download(self, filename, url, player_url):
668                 # Check file already present
669                 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
670                         self.report_file_already_downloaded(filename)
671                         return True
672
673                 # Attempt to download using rtmpdump
674                 if url.startswith('rtmp'):
675                         return self._download_with_rtmpdump(filename, url, player_url)
676
677                 tmpfilename = self.temp_name(filename)
678                 stream = None
679                 open_mode = 'wb'
680
681                 # Do not include the Accept-Encoding header
682                 headers = {'Youtubedl-no-compression': 'True'}
683                 basic_request = urllib2.Request(url, None, headers)
684                 request = urllib2.Request(url, None, headers)
685
686                 # Establish possible resume length
687                 if os.path.isfile(tmpfilename):
688                         resume_len = os.path.getsize(tmpfilename)
689                 else:
690                         resume_len = 0
691
692                 # Request parameters in case of being able to resume
693                 if self.params.get('continuedl', False) and resume_len != 0:
694                         self.report_resuming_byte(resume_len)
695                         request.add_header('Range', 'bytes=%d-' % resume_len)
696                         open_mode = 'ab'
697
698                 count = 0
699                 retries = self.params.get('retries', 0)
700                 while count <= retries:
701                         # Establish connection
702                         try:
703                                 data = urllib2.urlopen(request)
704                                 break
705                         except (urllib2.HTTPError, ), err:
706                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
707                                         # Unexpected HTTP error
708                                         raise
709                                 elif err.code == 416:
710                                         # Unable to resume (requested range not satisfiable)
711                                         try:
712                                                 # Open the connection again without the range header
713                                                 data = urllib2.urlopen(basic_request)
714                                                 content_length = data.info()['Content-Length']
715                                         except (urllib2.HTTPError, ), err:
716                                                 if err.code < 500 or err.code >= 600:
717                                                         raise
718                                         else:
719                                                 # Examine the reported length
720                                                 if (content_length is not None and
721                                                                 (resume_len - 100 < long(content_length) < resume_len + 100)):
722                                                         # The file had already been fully downloaded.
723                                                         # Explanation to the above condition: in issue #175 it was revealed that
724                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
725                                                         # changing the file size slightly and causing problems for some users. So
726                                                         # I decided to implement a suggested change and consider the file
727                                                         # completely downloaded if the file size differs less than 100 bytes from
728                                                         # the one in the hard drive.
729                                                         self.report_file_already_downloaded(filename)
730                                                         self.try_rename(tmpfilename, filename)
731                                                         return True
732                                                 else:
733                                                         # The length does not match, we start the download over
734                                                         self.report_unable_to_resume()
735                                                         open_mode = 'wb'
736                                                         break
737                         # Retry
738                         count += 1
739                         if count <= retries:
740                                 self.report_retry(count, retries)
741
742                 if count > retries:
743                         self.trouble(u'ERROR: giving up after %s retries' % retries)
744                         return False
745
746                 data_len = data.info().get('Content-length', None)
747                 if data_len is not None:
748                         data_len = long(data_len) + resume_len
749                 data_len_str = self.format_bytes(data_len)
750                 byte_counter = 0 + resume_len
751                 block_size = 1024
752                 start = time.time()
753                 while True:
754                         # Download and write
755                         before = time.time()
756                         data_block = data.read(block_size)
757                         after = time.time()
758                         if len(data_block) == 0:
759                                 break
760                         byte_counter += len(data_block)
761
762                         # Open file just in time
763                         if stream is None:
764                                 try:
765                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
766                                         filename = self.undo_temp_name(tmpfilename)
767                                         self.report_destination(filename)
768                                 except (OSError, IOError), err:
769                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
770                                         return False
771                         try:
772                                 stream.write(data_block)
773                         except (IOError, OSError), err:
774                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
775                                 return False
776                         block_size = self.best_block_size(after - before, len(data_block))
777
778                         # Progress message
779                         percent_str = self.calc_percent(byte_counter, data_len)
780                         eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
781                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
782                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
783
784                         # Apply rate limit
785                         self.slow_down(start, byte_counter - resume_len)
786
787                 stream.close()
788                 self.report_finish()
789                 if data_len is not None and byte_counter != data_len:
790                         raise ContentTooShortError(byte_counter, long(data_len))
791                 self.try_rename(tmpfilename, filename)
792
793                 # Update file modification time
794                 if self.params.get('updatetime', True):
795                         self.try_utime(filename, data.info().get('last-modified', None))
796
797                 return True
798
799
800 class InfoExtractor(object):
801         """Information Extractor class.
802
803         Information extractors are the classes that, given a URL, extract
804         information from the video (or videos) the URL refers to. This
805         information includes the real video URL, the video title and simplified
806         title, author and others. The information is stored in a dictionary
807         which is then passed to the FileDownloader. The FileDownloader
808         processes this information possibly downloading the video to the file
809         system, among other possible outcomes. The dictionaries must include
810         the following fields:
811
812         id:             Video identifier.
813         url:            Final video URL.
814         uploader:       Nickname of the video uploader.
815         title:          Literal title.
816         stitle:         Simplified title.
817         ext:            Video filename extension.
818         format:         Video format.
819         player_url:     SWF Player URL (may be None).
820
821         The following fields are optional. Their primary purpose is to allow
822         youtube-dl to serve as the backend for a video search function, such
823         as the one in youtube2mp3.  They are only used when their respective
824         forced printing functions are called:
825
826         thumbnail:      Full URL to a video thumbnail image.
827         description:    One-line video description.
828
829         Subclasses of this one should re-define the _real_initialize() and
830         _real_extract() methods, as well as the suitable() static method.
831         Probably, they should also be instantiated and added to the main
832         downloader.
833         """
834
835         _ready = False
836         _downloader = None
837
838         def __init__(self, downloader=None):
839                 """Constructor. Receives an optional downloader."""
840                 self._ready = False
841                 self.set_downloader(downloader)
842
843         @staticmethod
844         def suitable(url):
845                 """Receives a URL and returns True if suitable for this IE."""
846                 return False
847
848         def initialize(self):
849                 """Initializes an instance (authentication, etc)."""
850                 if not self._ready:
851                         self._real_initialize()
852                         self._ready = True
853
854         def extract(self, url):
855                 """Extracts URL information and returns it in list of dicts."""
856                 self.initialize()
857                 return self._real_extract(url)
858
859         def set_downloader(self, downloader):
860                 """Sets the downloader for this IE."""
861                 self._downloader = downloader
862
863         def _real_initialize(self):
864                 """Real initialization process. Redefine in subclasses."""
865                 pass
866
867         def _real_extract(self, url):
868                 """Real extraction process. Redefine in subclasses."""
869                 pass
870
871
872 class YoutubeIE(InfoExtractor):
873         """Information extractor for youtube.com."""
874
875         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
876         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
877         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
878         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
879         _NETRC_MACHINE = 'youtube'
880         # Listed in order of quality
881         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
882         _video_extensions = {
883                 '13': '3gp',
884                 '17': 'mp4',
885                 '18': 'mp4',
886                 '22': 'mp4',
887                 '37': 'mp4',
888                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
889                 '43': 'webm',
890                 '45': 'webm',
891         }
892
893         @staticmethod
894         def suitable(url):
895                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
896
897         def report_lang(self):
898                 """Report attempt to set language."""
899                 self._downloader.to_screen(u'[youtube] Setting language')
900
901         def report_login(self):
902                 """Report attempt to log in."""
903                 self._downloader.to_screen(u'[youtube] Logging in')
904
905         def report_age_confirmation(self):
906                 """Report attempt to confirm age."""
907                 self._downloader.to_screen(u'[youtube] Confirming age')
908
909         def report_video_webpage_download(self, video_id):
910                 """Report attempt to download video webpage."""
911                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
912
913         def report_video_info_webpage_download(self, video_id):
914                 """Report attempt to download video info webpage."""
915                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
916
917         def report_information_extraction(self, video_id):
918                 """Report attempt to extract video information."""
919                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
920
921         def report_unavailable_format(self, video_id, format):
922                 """Report extracted video URL."""
923                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
924
925         def report_rtmp_download(self):
926                 """Indicate the download will use the RTMP protocol."""
927                 self._downloader.to_screen(u'[youtube] RTMP download detected')
928
929         def _real_initialize(self):
930                 if self._downloader is None:
931                         return
932
933                 username = None
934                 password = None
935                 downloader_params = self._downloader.params
936
937                 # Attempt to use provided username and password or .netrc data
938                 if downloader_params.get('username', None) is not None:
939                         username = downloader_params['username']
940                         password = downloader_params['password']
941                 elif downloader_params.get('usenetrc', False):
942                         try:
943                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
944                                 if info is not None:
945                                         username = info[0]
946                                         password = info[2]
947                                 else:
948                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
949                         except (IOError, netrc.NetrcParseError), err:
950                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
951                                 return
952
953                 # Set language
954                 request = urllib2.Request(self._LANG_URL)
955                 try:
956                         self.report_lang()
957                         urllib2.urlopen(request).read()
958                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
959                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
960                         return
961
962                 # No authentication to be performed
963                 if username is None:
964                         return
965
966                 # Log in
967                 login_form = {
968                                 'current_form': 'loginForm',
969                                 'next':         '/',
970                                 'action_login': 'Log In',
971                                 'username':     username,
972                                 'password':     password,
973                                 }
974                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
975                 try:
976                         self.report_login()
977                         login_results = urllib2.urlopen(request).read()
978                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
979                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
980                                 return
981                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
982                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
983                         return
984
985                 # Confirm age
986                 age_form = {
987                                 'next_url':             '/',
988                                 'action_confirm':       'Confirm',
989                                 }
990                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
991                 try:
992                         self.report_age_confirmation()
993                         age_results = urllib2.urlopen(request).read()
994                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
995                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
996                         return
997
998         def _real_extract(self, url):
999                 # Extract video id from URL
1000                 mobj = re.match(self._VALID_URL, url)
1001                 if mobj is None:
1002                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1003                         return
1004                 video_id = mobj.group(2)
1005
1006                 # Get video webpage
1007                 self.report_video_webpage_download(video_id)
1008                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
1009                 try:
1010                         video_webpage = urllib2.urlopen(request).read()
1011                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1012                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1013                         return
1014
1015                 # Attempt to extract SWF player URL
1016                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1017                 if mobj is not None:
1018                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1019                 else:
1020                         player_url = None
1021
1022                 # Get video info
1023                 self.report_video_info_webpage_download(video_id)
1024                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1025                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1026                                         % (video_id, el_type))
1027                         request = urllib2.Request(video_info_url)
1028                         try:
1029                                 video_info_webpage = urllib2.urlopen(request).read()
1030                                 video_info = parse_qs(video_info_webpage)
1031                                 if 'token' in video_info:
1032                                         break
1033                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1034                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1035                                 return
1036                 if 'token' not in video_info:
1037                         if 'reason' in video_info:
1038                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1039                         else:
1040                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1041                         return
1042
1043                 # Start extracting information
1044                 self.report_information_extraction(video_id)
1045
1046                 # uploader
1047                 if 'author' not in video_info:
1048                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1049                         return
1050                 video_uploader = urllib.unquote_plus(video_info['author'][0])
1051
1052                 # title
1053                 if 'title' not in video_info:
1054                         self._downloader.trouble(u'ERROR: unable to extract video title')
1055                         return
1056                 video_title = urllib.unquote_plus(video_info['title'][0])
1057                 video_title = video_title.decode('utf-8')
1058                 video_title = sanitize_title(video_title)
1059
1060                 # simplified title
1061                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1062                 simple_title = simple_title.strip(ur'_')
1063
1064                 # thumbnail image
1065                 if 'thumbnail_url' not in video_info:
1066                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1067                         video_thumbnail = ''
1068                 else:   # don't panic if we can't find it
1069                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1070
1071                 # upload date
1072                 upload_date = u'NA'
1073                 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1074                 if mobj is not None:
1075                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1076                         format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1077                         for expression in format_expressions:
1078                                 try:
1079                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1080                                 except:
1081                                         pass
1082
1083                 # description
1084                 video_description = 'No description available.'
1085                 if self._downloader.params.get('forcedescription', False):
1086                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1087                         if mobj is not None:
1088                                 video_description = mobj.group(1)
1089
1090                 # token
1091                 video_token = urllib.unquote_plus(video_info['token'][0])
1092
1093                 # Decide which formats to download
1094                 req_format = self._downloader.params.get('format', None)
1095
1096                 if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1097                         url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
1098                         url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs]
1099                         url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data)
1100                         format_limit = self._downloader.params.get('format_limit', None)
1101                         if format_limit is not None and format_limit in self._available_formats:
1102                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
1103                         else:
1104                                 format_list = self._available_formats
1105                         existing_formats = [x for x in format_list if x in url_map]
1106                         if len(existing_formats) == 0:
1107                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1108                                 return
1109                         if req_format is None:
1110                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1111                         elif req_format == '-1':
1112                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1113                         else:
1114                                 # Specific format
1115                                 if req_format not in url_map:
1116                                         self._downloader.trouble(u'ERROR: requested format not available')
1117                                         return
1118                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
1119
1120                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1121                         self.report_rtmp_download()
1122                         video_url_list = [(None, video_info['conn'][0])]
1123
1124                 else:
1125                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
1126                         return
1127
1128                 for format_param, video_real_url in video_url_list:
1129                         # At this point we have a new video
1130                         self._downloader.increment_downloads()
1131
1132                         # Extension
1133                         video_extension = self._video_extensions.get(format_param, 'flv')
1134
1135                         # Find the video URL in fmt_url_map or conn paramters
1136                         try:
1137                                 # Process video information
1138                                 self._downloader.process_info({
1139                                         'id':           video_id.decode('utf-8'),
1140                                         'url':          video_real_url.decode('utf-8'),
1141                                         'uploader':     video_uploader.decode('utf-8'),
1142                                         'upload_date':  upload_date,
1143                                         'title':        video_title,
1144                                         'stitle':       simple_title,
1145                                         'ext':          video_extension.decode('utf-8'),
1146                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1147                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1148                                         'description':  video_description.decode('utf-8'),
1149                                         'player_url':   player_url,
1150                                 })
1151                         except UnavailableVideoError, err:
1152                                 self._downloader.trouble(u'\nERROR: unable to download video')
1153
1154
1155 class MetacafeIE(InfoExtractor):
1156         """Information Extractor for metacafe.com."""
1157
1158         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1159         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1160         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1161         _youtube_ie = None
1162
1163         def __init__(self, youtube_ie, downloader=None):
1164                 InfoExtractor.__init__(self, downloader)
1165                 self._youtube_ie = youtube_ie
1166
1167         @staticmethod
1168         def suitable(url):
1169                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1170
1171         def report_disclaimer(self):
1172                 """Report disclaimer retrieval."""
1173                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1174
1175         def report_age_confirmation(self):
1176                 """Report attempt to confirm age."""
1177                 self._downloader.to_screen(u'[metacafe] Confirming age')
1178
1179         def report_download_webpage(self, video_id):
1180                 """Report webpage download."""
1181                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1182
1183         def report_extraction(self, video_id):
1184                 """Report information extraction."""
1185                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1186
1187         def _real_initialize(self):
1188                 # Retrieve disclaimer
1189                 request = urllib2.Request(self._DISCLAIMER)
1190                 try:
1191                         self.report_disclaimer()
1192                         disclaimer = urllib2.urlopen(request).read()
1193                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1194                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1195                         return
1196
1197                 # Confirm age
1198                 disclaimer_form = {
1199                         'filters': '0',
1200                         'submit': "Continue - I'm over 18",
1201                         }
1202                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1203                 try:
1204                         self.report_age_confirmation()
1205                         disclaimer = urllib2.urlopen(request).read()
1206                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1207                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1208                         return
1209
1210         def _real_extract(self, url):
1211                 # Extract id and simplified title from URL
1212                 mobj = re.match(self._VALID_URL, url)
1213                 if mobj is None:
1214                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1215                         return
1216
1217                 video_id = mobj.group(1)
1218
1219                 # Check if video comes from YouTube
1220                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1221                 if mobj2 is not None:
1222                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1223                         return
1224
1225                 # At this point we have a new video
1226                 self._downloader.increment_downloads()
1227
1228                 simple_title = mobj.group(2).decode('utf-8')
1229
1230                 # Retrieve video webpage to extract further information
1231                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1232                 try:
1233                         self.report_download_webpage(video_id)
1234                         webpage = urllib2.urlopen(request).read()
1235                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1236                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1237                         return
1238
1239                 # Extract URL, uploader and title from webpage
1240                 self.report_extraction(video_id)
1241                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1242                 if mobj is not None:
1243                         mediaURL = urllib.unquote(mobj.group(1))
1244                         video_extension = mediaURL[-3:]
1245
1246                         # Extract gdaKey if available
1247                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1248                         if mobj is None:
1249                                 video_url = mediaURL
1250                         else:
1251                                 gdaKey = mobj.group(1)
1252                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1253                 else:
1254                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1255                         if mobj is None:
1256                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1257                                 return
1258                         vardict = parse_qs(mobj.group(1))
1259                         if 'mediaData' not in vardict:
1260                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1261                                 return
1262                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1263                         if mobj is None:
1264                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1265                                 return
1266                         mediaURL = mobj.group(1).replace('\\/', '/')
1267                         video_extension = mediaURL[-3:]
1268                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1269
1270                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1271                 if mobj is None:
1272                         self._downloader.trouble(u'ERROR: unable to extract title')
1273                         return
1274                 video_title = mobj.group(1).decode('utf-8')
1275                 video_title = sanitize_title(video_title)
1276
1277                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1278                 if mobj is None:
1279                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1280                         return
1281                 video_uploader = mobj.group(1)
1282
1283                 try:
1284                         # Process video information
1285                         self._downloader.process_info({
1286                                 'id':           video_id.decode('utf-8'),
1287                                 'url':          video_url.decode('utf-8'),
1288                                 'uploader':     video_uploader.decode('utf-8'),
1289                                 'upload_date':  u'NA',
1290                                 'title':        video_title,
1291                                 'stitle':       simple_title,
1292                                 'ext':          video_extension.decode('utf-8'),
1293                                 'format':       u'NA',
1294                                 'player_url':   None,
1295                         })
1296                 except UnavailableVideoError:
1297                         self._downloader.trouble(u'\nERROR: unable to download video')
1298
1299
1300 class DailymotionIE(InfoExtractor):
1301         """Information Extractor for Dailymotion"""
1302
1303         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1304
1305         def __init__(self, downloader=None):
1306                 InfoExtractor.__init__(self, downloader)
1307
1308         @staticmethod
1309         def suitable(url):
1310                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1311
1312         def report_download_webpage(self, video_id):
1313                 """Report webpage download."""
1314                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1315
1316         def report_extraction(self, video_id):
1317                 """Report information extraction."""
1318                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1319
1320         def _real_initialize(self):
1321                 return
1322
1323         def _real_extract(self, url):
1324                 # Extract id and simplified title from URL
1325                 mobj = re.match(self._VALID_URL, url)
1326                 if mobj is None:
1327                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1328                         return
1329
1330                 # At this point we have a new video
1331                 self._downloader.increment_downloads()
1332                 video_id = mobj.group(1)
1333
1334                 simple_title = mobj.group(2).decode('utf-8')
1335                 video_extension = 'flv'
1336
1337                 # Retrieve video webpage to extract further information
1338                 request = urllib2.Request(url)
1339                 try:
1340                         self.report_download_webpage(video_id)
1341                         webpage = urllib2.urlopen(request).read()
1342                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1343                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1344                         return
1345
1346                 # Extract URL, uploader and title from webpage
1347                 self.report_extraction(video_id)
1348                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1349                 if mobj is None:
1350                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1351                         return
1352                 mediaURL = urllib.unquote(mobj.group(1))
1353
1354                 # if needed add http://www.dailymotion.com/ if relative URL
1355
1356                 video_url = mediaURL
1357
1358                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1359                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1360                 if mobj is None:
1361                         self._downloader.trouble(u'ERROR: unable to extract title')
1362                         return
1363                 video_title = mobj.group(1).decode('utf-8')
1364                 video_title = sanitize_title(video_title)
1365
1366                 mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
1367                 if mobj is None:
1368                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1369                         return
1370                 video_uploader = mobj.group(1)
1371
1372                 try:
1373                         # Process video information
1374                         self._downloader.process_info({
1375                                 'id':           video_id.decode('utf-8'),
1376                                 'url':          video_url.decode('utf-8'),
1377                                 'uploader':     video_uploader.decode('utf-8'),
1378                                 'upload_date':  u'NA',
1379                                 'title':        video_title,
1380                                 'stitle':       simple_title,
1381                                 'ext':          video_extension.decode('utf-8'),
1382                                 'format':       u'NA',
1383                                 'player_url':   None,
1384                         })
1385                 except UnavailableVideoError:
1386                         self._downloader.trouble(u'\nERROR: unable to download video')
1387
1388
1389 class GoogleIE(InfoExtractor):
1390         """Information extractor for video.google.com."""
1391
1392         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1393
1394         def __init__(self, downloader=None):
1395                 InfoExtractor.__init__(self, downloader)
1396
1397         @staticmethod
1398         def suitable(url):
1399                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1400
1401         def report_download_webpage(self, video_id):
1402                 """Report webpage download."""
1403                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1404
1405         def report_extraction(self, video_id):
1406                 """Report information extraction."""
1407                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1408
1409         def _real_initialize(self):
1410                 return
1411
1412         def _real_extract(self, url):
1413                 # Extract id from URL
1414                 mobj = re.match(self._VALID_URL, url)
1415                 if mobj is None:
1416                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1417                         return
1418
1419                 # At this point we have a new video
1420                 self._downloader.increment_downloads()
1421                 video_id = mobj.group(1)
1422
1423                 video_extension = 'mp4'
1424
1425                 # Retrieve video webpage to extract further information
1426                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1427                 try:
1428                         self.report_download_webpage(video_id)
1429                         webpage = urllib2.urlopen(request).read()
1430                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1431                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1432                         return
1433
1434                 # Extract URL, uploader, and title from webpage
1435                 self.report_extraction(video_id)
1436                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1437                 if mobj is None:
1438                         video_extension = 'flv'
1439                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1440                 if mobj is None:
1441                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1442                         return
1443                 mediaURL = urllib.unquote(mobj.group(1))
1444                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1445                 mediaURL = mediaURL.replace('\\x26', '\x26')
1446
1447                 video_url = mediaURL
1448
1449                 mobj = re.search(r'<title>(.*)</title>', webpage)
1450                 if mobj is None:
1451                         self._downloader.trouble(u'ERROR: unable to extract title')
1452                         return
1453                 video_title = mobj.group(1).decode('utf-8')
1454                 video_title = sanitize_title(video_title)
1455                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1456
1457                 # Extract video description
1458                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1459                 if mobj is None:
1460                         self._downloader.trouble(u'ERROR: unable to extract video description')
1461                         return
1462                 video_description = mobj.group(1).decode('utf-8')
1463                 if not video_description:
1464                         video_description = 'No description available.'
1465
1466                 # Extract video thumbnail
1467                 if self._downloader.params.get('forcethumbnail', False):
1468                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1469                         try:
1470                                 webpage = urllib2.urlopen(request).read()
1471                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1472                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1473                                 return
1474                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1475                         if mobj is None:
1476                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1477                                 return
1478                         video_thumbnail = mobj.group(1)
1479                 else:   # we need something to pass to process_info
1480                         video_thumbnail = ''
1481
1482                 try:
1483                         # Process video information
1484                         self._downloader.process_info({
1485                                 'id':           video_id.decode('utf-8'),
1486                                 'url':          video_url.decode('utf-8'),
1487                                 'uploader':     u'NA',
1488                                 'upload_date':  u'NA',
1489                                 'title':        video_title,
1490                                 'stitle':       simple_title,
1491                                 'ext':          video_extension.decode('utf-8'),
1492                                 'format':       u'NA',
1493                                 'player_url':   None,
1494                         })
1495                 except UnavailableVideoError:
1496                         self._downloader.trouble(u'\nERROR: unable to download video')
1497
1498
1499 class PhotobucketIE(InfoExtractor):
1500         """Information extractor for photobucket.com."""
1501
1502         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1503
1504         def __init__(self, downloader=None):
1505                 InfoExtractor.__init__(self, downloader)
1506
1507         @staticmethod
1508         def suitable(url):
1509                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1510
1511         def report_download_webpage(self, video_id):
1512                 """Report webpage download."""
1513                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1514
1515         def report_extraction(self, video_id):
1516                 """Report information extraction."""
1517                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1518
1519         def _real_initialize(self):
1520                 return
1521
1522         def _real_extract(self, url):
1523                 # Extract id from URL
1524                 mobj = re.match(self._VALID_URL, url)
1525                 if mobj is None:
1526                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1527                         return
1528
1529                 # At this point we have a new video
1530                 self._downloader.increment_downloads()
1531                 video_id = mobj.group(1)
1532
1533                 video_extension = 'flv'
1534
1535                 # Retrieve video webpage to extract further information
1536                 request = urllib2.Request(url)
1537                 try:
1538                         self.report_download_webpage(video_id)
1539                         webpage = urllib2.urlopen(request).read()
1540                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1541                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1542                         return
1543
1544                 # Extract URL, uploader, and title from webpage
1545                 self.report_extraction(video_id)
1546                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1547                 if mobj is None:
1548                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1549                         return
1550                 mediaURL = urllib.unquote(mobj.group(1))
1551
1552                 video_url = mediaURL
1553
1554                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1555                 if mobj is None:
1556                         self._downloader.trouble(u'ERROR: unable to extract title')
1557                         return
1558                 video_title = mobj.group(1).decode('utf-8')
1559                 video_title = sanitize_title(video_title)
1560                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1561
1562                 video_uploader = mobj.group(2).decode('utf-8')
1563
1564                 try:
1565                         # Process video information
1566                         self._downloader.process_info({
1567                                 'id':           video_id.decode('utf-8'),
1568                                 'url':          video_url.decode('utf-8'),
1569                                 'uploader':     video_uploader,
1570                                 'upload_date':  u'NA',
1571                                 'title':        video_title,
1572                                 'stitle':       simple_title,
1573                                 'ext':          video_extension.decode('utf-8'),
1574                                 'format':       u'NA',
1575                                 'player_url':   None,
1576                         })
1577                 except UnavailableVideoError:
1578                         self._downloader.trouble(u'\nERROR: unable to download video')
1579
1580
1581 class YahooIE(InfoExtractor):
1582         """Information extractor for video.yahoo.com."""
1583
1584         # _VALID_URL matches all Yahoo! Video URLs
1585         # _VPAGE_URL matches only the extractable '/watch/' URLs
1586         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1587         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1588
1589         def __init__(self, downloader=None):
1590                 InfoExtractor.__init__(self, downloader)
1591
1592         @staticmethod
1593         def suitable(url):
1594                 return (re.match(YahooIE._VALID_URL, url) is not None)
1595
1596         def report_download_webpage(self, video_id):
1597                 """Report webpage download."""
1598                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1599
1600         def report_extraction(self, video_id):
1601                 """Report information extraction."""
1602                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1603
1604         def _real_initialize(self):
1605                 return
1606
1607         def _real_extract(self, url, new_video=True):
1608                 # Extract ID from URL
1609                 mobj = re.match(self._VALID_URL, url)
1610                 if mobj is None:
1611                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1612                         return
1613
1614                 # At this point we have a new video
1615                 self._downloader.increment_downloads()
1616                 video_id = mobj.group(2)
1617                 video_extension = 'flv'
1618
1619                 # Rewrite valid but non-extractable URLs as
1620                 # extractable English language /watch/ URLs
1621                 if re.match(self._VPAGE_URL, url) is None:
1622                         request = urllib2.Request(url)
1623                         try:
1624                                 webpage = urllib2.urlopen(request).read()
1625                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1626                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1627                                 return
1628
1629                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1630                         if mobj is None:
1631                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1632                                 return
1633                         yahoo_id = mobj.group(1)
1634
1635                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1636                         if mobj is None:
1637                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1638                                 return
1639                         yahoo_vid = mobj.group(1)
1640
1641                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1642                         return self._real_extract(url, new_video=False)
1643
1644                 # Retrieve video webpage to extract further information
1645                 request = urllib2.Request(url)
1646                 try:
1647                         self.report_download_webpage(video_id)
1648                         webpage = urllib2.urlopen(request).read()
1649                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1650                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1651                         return
1652
1653                 # Extract uploader and title from webpage
1654                 self.report_extraction(video_id)
1655                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1656                 if mobj is None:
1657                         self._downloader.trouble(u'ERROR: unable to extract video title')
1658                         return
1659                 video_title = mobj.group(1).decode('utf-8')
1660                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1661
1662                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1663                 if mobj is None:
1664                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1665                         return
1666                 video_uploader = mobj.group(1).decode('utf-8')
1667
1668                 # Extract video thumbnail
1669                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1670                 if mobj is None:
1671                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1672                         return
1673                 video_thumbnail = mobj.group(1).decode('utf-8')
1674
1675                 # Extract video description
1676                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1677                 if mobj is None:
1678                         self._downloader.trouble(u'ERROR: unable to extract video description')
1679                         return
1680                 video_description = mobj.group(1).decode('utf-8')
1681                 if not video_description:
1682                         video_description = 'No description available.'
1683
1684                 # Extract video height and width
1685                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1686                 if mobj is None:
1687                         self._downloader.trouble(u'ERROR: unable to extract video height')
1688                         return
1689                 yv_video_height = mobj.group(1)
1690
1691                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1692                 if mobj is None:
1693                         self._downloader.trouble(u'ERROR: unable to extract video width')
1694                         return
1695                 yv_video_width = mobj.group(1)
1696
1697                 # Retrieve video playlist to extract media URL
1698                 # I'm not completely sure what all these options are, but we
1699                 # seem to need most of them, otherwise the server sends a 401.
1700                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1701                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1702                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1703                                 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1704                                 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1705                 try:
1706                         self.report_download_webpage(video_id)
1707                         webpage = urllib2.urlopen(request).read()
1708                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1709                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1710                         return
1711
1712                 # Extract media URL from playlist XML
1713                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1714                 if mobj is None:
1715                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1716                         return
1717                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1718                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1719
1720                 try:
1721                         # Process video information
1722                         self._downloader.process_info({
1723                                 'id':           video_id.decode('utf-8'),
1724                                 'url':          video_url,
1725                                 'uploader':     video_uploader,
1726                                 'upload_date':  u'NA',
1727                                 'title':        video_title,
1728                                 'stitle':       simple_title,
1729                                 'ext':          video_extension.decode('utf-8'),
1730                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1731                                 'description':  video_description,
1732                                 'thumbnail':    video_thumbnail,
1733                                 'description':  video_description,
1734                                 'player_url':   None,
1735                         })
1736                 except UnavailableVideoError:
1737                         self._downloader.trouble(u'\nERROR: unable to download video')
1738
1739
1740 class GenericIE(InfoExtractor):
1741         """Generic last-resort information extractor."""
1742
1743         def __init__(self, downloader=None):
1744                 InfoExtractor.__init__(self, downloader)
1745
1746         @staticmethod
1747         def suitable(url):
1748                 return True
1749
1750         def report_download_webpage(self, video_id):
1751                 """Report webpage download."""
1752                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1753                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1754
1755         def report_extraction(self, video_id):
1756                 """Report information extraction."""
1757                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1758
1759         def _real_initialize(self):
1760                 return
1761
1762         def _real_extract(self, url):
1763                 # At this point we have a new video
1764                 self._downloader.increment_downloads()
1765
1766                 video_id = url.split('/')[-1]
1767                 request = urllib2.Request(url)
1768                 try:
1769                         self.report_download_webpage(video_id)
1770                         webpage = urllib2.urlopen(request).read()
1771                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1772                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1773                         return
1774                 except ValueError, err:
1775                         # since this is the last-resort InfoExtractor, if
1776                         # this error is thrown, it'll be thrown here
1777                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1778                         return
1779
1780                 self.report_extraction(video_id)
1781                 # Start with something easy: JW Player in SWFObject
1782                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1783                 if mobj is None:
1784                         # Broaden the search a little bit
1785                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1786                 if mobj is None:
1787                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1788                         return
1789
1790                 # It's possible that one of the regexes
1791                 # matched, but returned an empty group:
1792                 if mobj.group(1) is None:
1793                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1794                         return
1795
1796                 video_url = urllib.unquote(mobj.group(1))
1797                 video_id = os.path.basename(video_url)
1798
1799                 # here's a fun little line of code for you:
1800                 video_extension = os.path.splitext(video_id)[1][1:]
1801                 video_id = os.path.splitext(video_id)[0]
1802
1803                 # it's tempting to parse this further, but you would
1804                 # have to take into account all the variations like
1805                 #   Video Title - Site Name
1806                 #   Site Name | Video Title
1807                 #   Video Title - Tagline | Site Name
1808                 # and so on and so forth; it's just not practical
1809                 mobj = re.search(r'<title>(.*)</title>', webpage)
1810                 if mobj is None:
1811                         self._downloader.trouble(u'ERROR: unable to extract title')
1812                         return
1813                 video_title = mobj.group(1).decode('utf-8')
1814                 video_title = sanitize_title(video_title)
1815                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1816
1817                 # video uploader is domain name
1818                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1819                 if mobj is None:
1820                         self._downloader.trouble(u'ERROR: unable to extract title')
1821                         return
1822                 video_uploader = mobj.group(1).decode('utf-8')
1823
1824                 try:
1825                         # Process video information
1826                         self._downloader.process_info({
1827                                 'id':           video_id.decode('utf-8'),
1828                                 'url':          video_url.decode('utf-8'),
1829                                 'uploader':     video_uploader,
1830                                 'upload_date':  u'NA',
1831                                 'title':        video_title,
1832                                 'stitle':       simple_title,
1833                                 'ext':          video_extension.decode('utf-8'),
1834                                 'format':       u'NA',
1835                                 'player_url':   None,
1836                         })
1837                 except UnavailableVideoError, err:
1838                         self._downloader.trouble(u'\nERROR: unable to download video')
1839
1840
1841 class YoutubeSearchIE(InfoExtractor):
1842         """Information Extractor for YouTube search queries."""
1843         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1844         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1845         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1846         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1847         _youtube_ie = None
1848         _max_youtube_results = 1000
1849
1850         def __init__(self, youtube_ie, downloader=None):
1851                 InfoExtractor.__init__(self, downloader)
1852                 self._youtube_ie = youtube_ie
1853
1854         @staticmethod
1855         def suitable(url):
1856                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1857
1858         def report_download_page(self, query, pagenum):
1859                 """Report attempt to download playlist page with given number."""
1860                 query = query.decode(preferredencoding())
1861                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1862
1863         def _real_initialize(self):
1864                 self._youtube_ie.initialize()
1865
1866         def _real_extract(self, query):
1867                 mobj = re.match(self._VALID_QUERY, query)
1868                 if mobj is None:
1869                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1870                         return
1871
1872                 prefix, query = query.split(':')
1873                 prefix = prefix[8:]
1874                 query = query.encode('utf-8')
1875                 if prefix == '':
1876                         self._download_n_results(query, 1)
1877                         return
1878                 elif prefix == 'all':
1879                         self._download_n_results(query, self._max_youtube_results)
1880                         return
1881                 else:
1882                         try:
1883                                 n = long(prefix)
1884                                 if n <= 0:
1885                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1886                                         return
1887                                 elif n > self._max_youtube_results:
1888                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
1889                                         n = self._max_youtube_results
1890                                 self._download_n_results(query, n)
1891                                 return
1892                         except ValueError: # parsing prefix as integer fails
1893                                 self._download_n_results(query, 1)
1894                                 return
1895
1896         def _download_n_results(self, query, n):
1897                 """Downloads a specified number of results for a query"""
1898
1899                 video_ids = []
1900                 already_seen = set()
1901                 pagenum = 1
1902
1903                 while True:
1904                         self.report_download_page(query, pagenum)
1905                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1906                         request = urllib2.Request(result_url)
1907                         try:
1908                                 page = urllib2.urlopen(request).read()
1909                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1910                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1911                                 return
1912
1913                         # Extract video identifiers
1914                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1915                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1916                                 if video_id not in already_seen:
1917                                         video_ids.append(video_id)
1918                                         already_seen.add(video_id)
1919                                         if len(video_ids) == n:
1920                                                 # Specified n videos reached
1921                                                 for id in video_ids:
1922                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1923                                                 return
1924
1925                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1926                                 for id in video_ids:
1927                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1928                                 return
1929
1930                         pagenum = pagenum + 1
1931
1932
1933 class GoogleSearchIE(InfoExtractor):
1934         """Information Extractor for Google Video search queries."""
1935         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1936         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1937         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1938         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1939         _google_ie = None
1940         _max_google_results = 1000
1941
1942         def __init__(self, google_ie, downloader=None):
1943                 InfoExtractor.__init__(self, downloader)
1944                 self._google_ie = google_ie
1945
1946         @staticmethod
1947         def suitable(url):
1948                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1949
1950         def report_download_page(self, query, pagenum):
1951                 """Report attempt to download playlist page with given number."""
1952                 query = query.decode(preferredencoding())
1953                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1954
1955         def _real_initialize(self):
1956                 self._google_ie.initialize()
1957
1958         def _real_extract(self, query):
1959                 mobj = re.match(self._VALID_QUERY, query)
1960                 if mobj is None:
1961                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1962                         return
1963
1964                 prefix, query = query.split(':')
1965                 prefix = prefix[8:]
1966                 query = query.encode('utf-8')
1967                 if prefix == '':
1968                         self._download_n_results(query, 1)
1969                         return
1970                 elif prefix == 'all':
1971                         self._download_n_results(query, self._max_google_results)
1972                         return
1973                 else:
1974                         try:
1975                                 n = long(prefix)
1976                                 if n <= 0:
1977                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1978                                         return
1979                                 elif n > self._max_google_results:
1980                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
1981                                         n = self._max_google_results
1982                                 self._download_n_results(query, n)
1983                                 return
1984                         except ValueError: # parsing prefix as integer fails
1985                                 self._download_n_results(query, 1)
1986                                 return
1987
1988         def _download_n_results(self, query, n):
1989                 """Downloads a specified number of results for a query"""
1990
1991                 video_ids = []
1992                 already_seen = set()
1993                 pagenum = 1
1994
1995                 while True:
1996                         self.report_download_page(query, pagenum)
1997                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1998                         request = urllib2.Request(result_url)
1999                         try:
2000                                 page = urllib2.urlopen(request).read()
2001                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2002                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2003                                 return
2004
2005                         # Extract video identifiers
2006                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2007                                 video_id = mobj.group(1)
2008                                 if video_id not in already_seen:
2009                                         video_ids.append(video_id)
2010                                         already_seen.add(video_id)
2011                                         if len(video_ids) == n:
2012                                                 # Specified n videos reached
2013                                                 for id in video_ids:
2014                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2015                                                 return
2016
2017                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2018                                 for id in video_ids:
2019                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2020                                 return
2021
2022                         pagenum = pagenum + 1
2023
2024
2025 class YahooSearchIE(InfoExtractor):
2026         """Information Extractor for Yahoo! Video search queries."""
2027         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
2028         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2029         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2030         _MORE_PAGES_INDICATOR = r'\s*Next'
2031         _yahoo_ie = None
2032         _max_yahoo_results = 1000
2033
2034         def __init__(self, yahoo_ie, downloader=None):
2035                 InfoExtractor.__init__(self, downloader)
2036                 self._yahoo_ie = yahoo_ie
2037
2038         @staticmethod
2039         def suitable(url):
2040                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
2041
2042         def report_download_page(self, query, pagenum):
2043                 """Report attempt to download playlist page with given number."""
2044                 query = query.decode(preferredencoding())
2045                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2046
2047         def _real_initialize(self):
2048                 self._yahoo_ie.initialize()
2049
2050         def _real_extract(self, query):
2051                 mobj = re.match(self._VALID_QUERY, query)
2052                 if mobj is None:
2053                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2054                         return
2055
2056                 prefix, query = query.split(':')
2057                 prefix = prefix[8:]
2058                 query = query.encode('utf-8')
2059                 if prefix == '':
2060                         self._download_n_results(query, 1)
2061                         return
2062                 elif prefix == 'all':
2063                         self._download_n_results(query, self._max_yahoo_results)
2064                         return
2065                 else:
2066                         try:
2067                                 n = long(prefix)
2068                                 if n <= 0:
2069                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2070                                         return
2071                                 elif n > self._max_yahoo_results:
2072                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
2073                                         n = self._max_yahoo_results
2074                                 self._download_n_results(query, n)
2075                                 return
2076                         except ValueError: # parsing prefix as integer fails
2077                                 self._download_n_results(query, 1)
2078                                 return
2079
2080         def _download_n_results(self, query, n):
2081                 """Downloads a specified number of results for a query"""
2082
2083                 video_ids = []
2084                 already_seen = set()
2085                 pagenum = 1
2086
2087                 while True:
2088                         self.report_download_page(query, pagenum)
2089                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2090                         request = urllib2.Request(result_url)
2091                         try:
2092                                 page = urllib2.urlopen(request).read()
2093                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2094                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2095                                 return
2096
2097                         # Extract video identifiers
2098                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2099                                 video_id = mobj.group(1)
2100                                 if video_id not in already_seen:
2101                                         video_ids.append(video_id)
2102                                         already_seen.add(video_id)
2103                                         if len(video_ids) == n:
2104                                                 # Specified n videos reached
2105                                                 for id in video_ids:
2106                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2107                                                 return
2108
2109                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2110                                 for id in video_ids:
2111                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2112                                 return
2113
2114                         pagenum = pagenum + 1
2115
2116
2117 class YoutubePlaylistIE(InfoExtractor):
2118         """Information Extractor for YouTube playlists."""
2119
2120         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2121         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2122         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2123         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2124         _youtube_ie = None
2125
2126         def __init__(self, youtube_ie, downloader=None):
2127                 InfoExtractor.__init__(self, downloader)
2128                 self._youtube_ie = youtube_ie
2129
2130         @staticmethod
2131         def suitable(url):
2132                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2133
2134         def report_download_page(self, playlist_id, pagenum):
2135                 """Report attempt to download playlist page with given number."""
2136                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2137
2138         def _real_initialize(self):
2139                 self._youtube_ie.initialize()
2140
2141         def _real_extract(self, url):
2142                 # Extract playlist id
2143                 mobj = re.match(self._VALID_URL, url)
2144                 if mobj is None:
2145                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2146                         return
2147
2148                 # Single video case
2149                 if mobj.group(3) is not None:
2150                         self._youtube_ie.extract(mobj.group(3))
2151                         return
2152
2153                 # Download playlist pages
2154                 # prefix is 'p' as default for playlists but there are other types that need extra care
2155                 playlist_prefix = mobj.group(1)
2156                 if playlist_prefix == 'a':
2157                         playlist_access = 'artist'
2158                 else:
2159                         playlist_prefix = 'p'
2160                         playlist_access = 'view_play_list'
2161                 playlist_id = mobj.group(2)
2162                 video_ids = []
2163                 pagenum = 1
2164
2165                 while True:
2166                         self.report_download_page(playlist_id, pagenum)
2167                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
2168                         try:
2169                                 page = urllib2.urlopen(request).read()
2170                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2171                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2172                                 return
2173
2174                         # Extract video identifiers
2175                         ids_in_page = []
2176                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2177                                 if mobj.group(1) not in ids_in_page:
2178                                         ids_in_page.append(mobj.group(1))
2179                         video_ids.extend(ids_in_page)
2180
2181                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2182                                 break
2183                         pagenum = pagenum + 1
2184
2185                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2186                 playlistend = self._downloader.params.get('playlistend', -1)
2187                 video_ids = video_ids[playliststart:playlistend]
2188
2189                 for id in video_ids:
2190                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2191                 return
2192
2193
2194 class YoutubeUserIE(InfoExtractor):
2195         """Information Extractor for YouTube users."""
2196
2197         _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2198         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2199         _GDATA_PAGE_SIZE = 50
2200         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2201         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2202         _youtube_ie = None
2203
2204         def __init__(self, youtube_ie, downloader=None):
2205                 InfoExtractor.__init__(self, downloader)
2206                 self._youtube_ie = youtube_ie
2207
2208         @staticmethod
2209         def suitable(url):
2210                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2211
2212         def report_download_page(self, username, start_index):
2213                 """Report attempt to download user page."""
2214                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2215                                 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2216
2217         def _real_initialize(self):
2218                 self._youtube_ie.initialize()
2219
2220         def _real_extract(self, url):
2221                 # Extract username
2222                 mobj = re.match(self._VALID_URL, url)
2223                 if mobj is None:
2224                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2225                         return
2226
2227                 username = mobj.group(1)
2228
2229                 # Download video ids using YouTube Data API. Result size per
2230                 # query is limited (currently to 50 videos) so we need to query
2231                 # page by page until there are no video ids - it means we got
2232                 # all of them.
2233
2234                 video_ids = []
2235                 pagenum = 0
2236
2237                 while True:
2238                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2239                         self.report_download_page(username, start_index)
2240
2241                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2242
2243                         try:
2244                                 page = urllib2.urlopen(request).read()
2245                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2246                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2247                                 return
2248
2249                         # Extract video identifiers
2250                         ids_in_page = []
2251
2252                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2253                                 if mobj.group(1) not in ids_in_page:
2254                                         ids_in_page.append(mobj.group(1))
2255
2256                         video_ids.extend(ids_in_page)
2257
2258                         # A little optimization - if current page is not
2259                         # "full", ie. does not contain PAGE_SIZE video ids then
2260                         # we can assume that this page is the last one - there
2261                         # are no more ids on further pages - no need to query
2262                         # again.
2263
2264                         if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2265                                 break
2266
2267                         pagenum += 1
2268
2269                 all_ids_count = len(video_ids)
2270                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2271                 playlistend = self._downloader.params.get('playlistend', -1)
2272
2273                 if playlistend == -1:
2274                         video_ids = video_ids[playliststart:]
2275                 else:
2276                         video_ids = video_ids[playliststart:playlistend]
2277
2278                 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2279                                 (username, all_ids_count, len(video_ids)))
2280
2281                 for video_id in video_ids:
2282                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2283
2284
2285 class DepositFilesIE(InfoExtractor):
2286         """Information extractor for depositfiles.com"""
2287
2288         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2289
2290         def __init__(self, downloader=None):
2291                 InfoExtractor.__init__(self, downloader)
2292
2293         @staticmethod
2294         def suitable(url):
2295                 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2296
2297         def report_download_webpage(self, file_id):
2298                 """Report webpage download."""
2299                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2300
2301         def report_extraction(self, file_id):
2302                 """Report information extraction."""
2303                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2304
2305         def _real_initialize(self):
2306                 return
2307
2308         def _real_extract(self, url):
2309                 # At this point we have a new file
2310                 self._downloader.increment_downloads()
2311
2312                 file_id = url.split('/')[-1]
2313                 # Rebuild url in english locale
2314                 url = 'http://depositfiles.com/en/files/' + file_id
2315
2316                 # Retrieve file webpage with 'Free download' button pressed
2317                 free_download_indication = { 'gateway_result' : '1' }
2318                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2319                 try:
2320                         self.report_download_webpage(file_id)
2321                         webpage = urllib2.urlopen(request).read()
2322                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2323                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2324                         return
2325
2326                 # Search for the real file URL
2327                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2328                 if (mobj is None) or (mobj.group(1) is None):
2329                         # Try to figure out reason of the error.
2330                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2331                         if (mobj is not None) and (mobj.group(1) is not None):
2332                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2333                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2334                         else:
2335                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2336                         return
2337
2338                 file_url = mobj.group(1)
2339                 file_extension = os.path.splitext(file_url)[1][1:]
2340
2341                 # Search for file title
2342                 mobj = re.search(r'<b title="(.*?)">', webpage)
2343                 if mobj is None:
2344                         self._downloader.trouble(u'ERROR: unable to extract title')
2345                         return
2346                 file_title = mobj.group(1).decode('utf-8')
2347
2348                 try:
2349                         # Process file information
2350                         self._downloader.process_info({
2351                                 'id':           file_id.decode('utf-8'),
2352                                 'url':          file_url.decode('utf-8'),
2353                                 'uploader':     u'NA',
2354                                 'upload_date':  u'NA',
2355                                 'title':        file_title,
2356                                 'stitle':       file_title,
2357                                 'ext':          file_extension.decode('utf-8'),
2358                                 'format':       u'NA',
2359                                 'player_url':   None,
2360                         })
2361                 except UnavailableVideoError, err:
2362                         self._downloader.trouble(u'ERROR: unable to download file')
2363
2364
2365 class FacebookIE(InfoExtractor):
2366         """Information Extractor for Facebook"""
2367
2368         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2369         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2370         _NETRC_MACHINE = 'facebook'
2371         _available_formats = ['highqual', 'lowqual']
2372         _video_extensions = {
2373                 'highqual': 'mp4',
2374                 'lowqual': 'mp4',
2375         }
2376
2377         def __init__(self, downloader=None):
2378                 InfoExtractor.__init__(self, downloader)
2379
2380         @staticmethod
2381         def suitable(url):
2382                 return (re.match(FacebookIE._VALID_URL, url) is not None)
2383
2384         def _reporter(self, message):
2385                 """Add header and report message."""
2386                 self._downloader.to_screen(u'[facebook] %s' % message)
2387
2388         def report_login(self):
2389                 """Report attempt to log in."""
2390                 self._reporter(u'Logging in')
2391
2392         def report_video_webpage_download(self, video_id):
2393                 """Report attempt to download video webpage."""
2394                 self._reporter(u'%s: Downloading video webpage' % video_id)
2395
2396         def report_information_extraction(self, video_id):
2397                 """Report attempt to extract video information."""
2398                 self._reporter(u'%s: Extracting video information' % video_id)
2399
2400         def _parse_page(self, video_webpage):
2401                 """Extract video information from page"""
2402                 # General data
2403                 data = {'title': r'class="video_title datawrap">(.*?)</',
2404                         'description': r'<div class="datawrap">(.*?)</div>',
2405                         'owner': r'\("video_owner_name", "(.*?)"\)',
2406                         'upload_date': r'data-date="(.*?)"',
2407                         'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2408                         }
2409                 video_info = {}
2410                 for piece in data.keys():
2411                         mobj = re.search(data[piece], video_webpage)
2412                         if mobj is not None:
2413                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2414
2415                 # Video urls
2416                 video_urls = {}
2417                 for fmt in self._available_formats:
2418                         mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2419                         if mobj is not None:
2420                                 # URL is in a Javascript segment inside an escaped Unicode format within
2421                                 # the generally utf-8 page
2422                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2423                 video_info['video_urls'] = video_urls
2424
2425                 return video_info
2426
2427         def _real_initialize(self):
2428                 if self._downloader is None:
2429                         return
2430
2431                 useremail = None
2432                 password = None
2433                 downloader_params = self._downloader.params
2434
2435                 # Attempt to use provided username and password or .netrc data
2436                 if downloader_params.get('username', None) is not None:
2437                         useremail = downloader_params['username']
2438                         password = downloader_params['password']
2439                 elif downloader_params.get('usenetrc', False):
2440                         try:
2441                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2442                                 if info is not None:
2443                                         useremail = info[0]
2444                                         password = info[2]
2445                                 else:
2446                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2447                         except (IOError, netrc.NetrcParseError), err:
2448                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2449                                 return
2450
2451                 if useremail is None:
2452                         return
2453
2454                 # Log in
2455                 login_form = {
2456                         'email': useremail,
2457                         'pass': password,
2458                         'login': 'Log+In'
2459                         }
2460                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2461                 try:
2462                         self.report_login()
2463                         login_results = urllib2.urlopen(request).read()
2464                         if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2465                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2466                                 return
2467                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2468                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2469                         return
2470
2471         def _real_extract(self, url):
2472                 mobj = re.match(self._VALID_URL, url)
2473                 if mobj is None:
2474                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2475                         return
2476                 video_id = mobj.group('ID')
2477
2478                 # Get video webpage
2479                 self.report_video_webpage_download(video_id)
2480                 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2481                 try:
2482                         page = urllib2.urlopen(request)
2483                         video_webpage = page.read()
2484                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2485                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2486                         return
2487
2488                 # Start extracting information
2489                 self.report_information_extraction(video_id)
2490
2491                 # Extract information
2492                 video_info = self._parse_page(video_webpage)
2493
2494                 # uploader
2495                 if 'owner' not in video_info:
2496                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2497                         return
2498                 video_uploader = video_info['owner']
2499
2500                 # title
2501                 if 'title' not in video_info:
2502                         self._downloader.trouble(u'ERROR: unable to extract video title')
2503                         return
2504                 video_title = video_info['title']
2505                 video_title = video_title.decode('utf-8')
2506                 video_title = sanitize_title(video_title)
2507
2508                 # simplified title
2509                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2510                 simple_title = simple_title.strip(ur'_')
2511
2512                 # thumbnail image
2513                 if 'thumbnail' not in video_info:
2514                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2515                         video_thumbnail = ''
2516                 else:
2517                         video_thumbnail = video_info['thumbnail']
2518
2519                 # upload date
2520                 upload_date = u'NA'
2521                 if 'upload_date' in video_info:
2522                         upload_time = video_info['upload_date']
2523                         timetuple = email.utils.parsedate_tz(upload_time)
2524                         if timetuple is not None:
2525                                 try:
2526                                         upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2527                                 except:
2528                                         pass
2529
2530                 # description
2531                 video_description = 'No description available.'
2532                 if (self._downloader.params.get('forcedescription', False) and
2533                         'description' in video_info):
2534                         video_description = video_info['description']
2535
2536                 url_map = video_info['video_urls']
2537                 if len(url_map.keys()) > 0:
2538                         # Decide which formats to download
2539                         req_format = self._downloader.params.get('format', None)
2540                         format_limit = self._downloader.params.get('format_limit', None)
2541
2542                         if format_limit is not None and format_limit in self._available_formats:
2543                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
2544                         else:
2545                                 format_list = self._available_formats
2546                         existing_formats = [x for x in format_list if x in url_map]
2547                         if len(existing_formats) == 0:
2548                                 self._downloader.trouble(u'ERROR: no known formats available for video')
2549                                 return
2550                         if req_format is None:
2551                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2552                         elif req_format == '-1':
2553                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2554                         else:
2555                                 # Specific format
2556                                 if req_format not in url_map:
2557                                         self._downloader.trouble(u'ERROR: requested format not available')
2558                                         return
2559                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
2560
2561                 for format_param, video_real_url in video_url_list:
2562
2563                         # At this point we have a new video
2564                         self._downloader.increment_downloads()
2565
2566                         # Extension
2567                         video_extension = self._video_extensions.get(format_param, 'mp4')
2568
2569                         # Find the video URL in fmt_url_map or conn paramters
2570                         try:
2571                                 # Process video information
2572                                 self._downloader.process_info({
2573                                         'id':           video_id.decode('utf-8'),
2574                                         'url':          video_real_url.decode('utf-8'),
2575                                         'uploader':     video_uploader.decode('utf-8'),
2576                                         'upload_date':  upload_date,
2577                                         'title':        video_title,
2578                                         'stitle':       simple_title,
2579                                         'ext':          video_extension.decode('utf-8'),
2580                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
2581                                         'thumbnail':    video_thumbnail.decode('utf-8'),
2582                                         'description':  video_description.decode('utf-8'),
2583                                         'player_url':   None,
2584                                 })
2585                         except UnavailableVideoError, err:
2586                                 self._downloader.trouble(u'\nERROR: unable to download video')
2587
2588
2589 class PostProcessor(object):
2590         """Post Processor class.
2591
2592         PostProcessor objects can be added to downloaders with their
2593         add_post_processor() method. When the downloader has finished a
2594         successful download, it will take its internal chain of PostProcessors
2595         and start calling the run() method on each one of them, first with
2596         an initial argument and then with the returned value of the previous
2597         PostProcessor.
2598
2599         The chain will be stopped if one of them ever returns None or the end
2600         of the chain is reached.
2601
2602         PostProcessor objects follow a "mutual registration" process similar
2603         to InfoExtractor objects.
2604         """
2605
2606         _downloader = None
2607
2608         def __init__(self, downloader=None):
2609                 self._downloader = downloader
2610
2611         def set_downloader(self, downloader):
2612                 """Sets the downloader for this PP."""
2613                 self._downloader = downloader
2614
2615         def run(self, information):
2616                 """Run the PostProcessor.
2617
2618                 The "information" argument is a dictionary like the ones
2619                 composed by InfoExtractors. The only difference is that this
2620                 one has an extra field called "filepath" that points to the
2621                 downloaded file.
2622
2623                 When this method returns None, the postprocessing chain is
2624                 stopped. However, this method may return an information
2625                 dictionary that will be passed to the next postprocessing
2626                 object in the chain. It can be the one it received after
2627                 changing some fields.
2628
2629                 In addition, this method may raise a PostProcessingError
2630                 exception that will be taken into account by the downloader
2631                 it was called from.
2632                 """
2633                 return information # by default, do nothing
2634
2635
2636 class FFmpegExtractAudioPP(PostProcessor):
2637
2638         def __init__(self, downloader=None, preferredcodec=None):
2639                 PostProcessor.__init__(self, downloader)
2640                 if preferredcodec is None:
2641                         preferredcodec = 'best'
2642                 self._preferredcodec = preferredcodec
2643
2644         @staticmethod
2645         def get_audio_codec(path):
2646                 try:
2647                         cmd = ['ffprobe', '-show_streams', '--', path]
2648                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
2649                         output = handle.communicate()[0]
2650                         if handle.wait() != 0:
2651                                 return None
2652                 except (IOError, OSError):
2653                         return None
2654                 audio_codec = None
2655                 for line in output.split('\n'):
2656                         if line.startswith('codec_name='):
2657                                 audio_codec = line.split('=')[1].strip()
2658                         elif line.strip() == 'codec_type=audio' and audio_codec is not None:
2659                                 return audio_codec
2660                 return None
2661
2662         @staticmethod
2663         def run_ffmpeg(path, out_path, codec, more_opts):
2664                 try:
2665                         cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
2666                         ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
2667                         return (ret == 0)
2668                 except (IOError, OSError):
2669                         return False
2670
2671         def run(self, information):
2672                 path = information['filepath']
2673
2674                 filecodec = self.get_audio_codec(path)
2675                 if filecodec is None:
2676                         self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
2677                         return None
2678
2679                 more_opts = []
2680                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
2681                         if filecodec == 'aac' or filecodec == 'mp3':
2682                                 # Lossless if possible
2683                                 acodec = 'copy'
2684                                 extension = filecodec
2685                                 if filecodec == 'aac':
2686                                         more_opts = ['-f', 'adts']
2687                         else:
2688                                 # MP3 otherwise.
2689                                 acodec = 'libmp3lame'
2690                                 extension = 'mp3'
2691                                 more_opts = ['-ab', '128k']
2692                 else:
2693                         # We convert the audio (lossy)
2694                         acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
2695                         extension = self._preferredcodec
2696                         more_opts = ['-ab', '128k']
2697                         if self._preferredcodec == 'aac':
2698                                 more_opts += ['-f', 'adts']
2699
2700                 (prefix, ext) = os.path.splitext(path)
2701                 new_path = prefix + '.' + extension
2702                 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
2703                 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
2704
2705                 if not status:
2706                         self._downloader.to_stderr(u'WARNING: error running ffmpeg')
2707                         return None
2708
2709                 try:
2710                         os.remove(path)
2711                 except (IOError, OSError):
2712                         self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
2713                         return None
2714
2715                 information['filepath'] = new_path
2716                 return information
2717
2718 ### MAIN PROGRAM ###
2719 if __name__ == '__main__':
2720         try:
2721                 # Modules needed only when running the main program
2722                 import getpass
2723                 import optparse
2724
2725                 # Function to update the program file with the latest version from the repository.
2726                 def update_self(downloader, filename):
2727                         # Note: downloader only used for options
2728                         if not os.access(filename, os.W_OK):
2729                                 sys.exit('ERROR: no write permissions on %s' % filename)
2730
2731                         downloader.to_screen('Updating to latest stable version...')
2732                         try:
2733                                 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2734                                 latest_version = urllib.urlopen(latest_url).read().strip()
2735                                 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2736                                 newcontent = urllib.urlopen(prog_url).read()
2737                         except (IOError, OSError), err:
2738                                 sys.exit('ERROR: unable to download latest version')
2739                         try:
2740                                 stream = open(filename, 'w')
2741                                 stream.write(newcontent)
2742                                 stream.close()
2743                         except (IOError, OSError), err:
2744                                 sys.exit('ERROR: unable to overwrite current version')
2745                         downloader.to_screen('Updated to version %s' % latest_version)
2746
2747                 # Parse command line
2748                 parser = optparse.OptionParser(
2749                         usage='Usage: %prog [options] url...',
2750                         version='2011.08.04',
2751                         conflict_handler='resolve',
2752                 )
2753
2754                 parser.add_option('-h', '--help',
2755                                 action='help', help='print this help text and exit')
2756                 parser.add_option('-v', '--version',
2757                                 action='version', help='print program version and exit')
2758                 parser.add_option('-U', '--update',
2759                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2760                 parser.add_option('-i', '--ignore-errors',
2761                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2762                 parser.add_option('-r', '--rate-limit',
2763                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2764                 parser.add_option('-R', '--retries',
2765                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2766                 parser.add_option('--playlist-start',
2767                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2768                 parser.add_option('--playlist-end',
2769                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2770                 parser.add_option('--dump-user-agent',
2771                                 action='store_true', dest='dump_user_agent',
2772                                 help='display the current browser identification', default=False)
2773
2774                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2775                 authentication.add_option('-u', '--username',
2776                                 dest='username', metavar='USERNAME', help='account username')
2777                 authentication.add_option('-p', '--password',
2778                                 dest='password', metavar='PASSWORD', help='account password')
2779                 authentication.add_option('-n', '--netrc',
2780                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2781                 parser.add_option_group(authentication)
2782
2783                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2784                 video_format.add_option('-f', '--format',
2785                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2786                 video_format.add_option('--all-formats',
2787                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2788                 video_format.add_option('--max-quality',
2789                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2790                 parser.add_option_group(video_format)
2791
2792                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2793                 verbosity.add_option('-q', '--quiet',
2794                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2795                 verbosity.add_option('-s', '--simulate',
2796                                 action='store_true', dest='simulate', help='do not download video', default=False)
2797                 verbosity.add_option('-g', '--get-url',
2798                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2799                 verbosity.add_option('-e', '--get-title',
2800                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2801                 verbosity.add_option('--get-thumbnail',
2802                                 action='store_true', dest='getthumbnail',
2803                                 help='simulate, quiet but print thumbnail URL', default=False)
2804                 verbosity.add_option('--get-description',
2805                                 action='store_true', dest='getdescription',
2806                                 help='simulate, quiet but print video description', default=False)
2807                 verbosity.add_option('--get-filename',
2808                                 action='store_true', dest='getfilename',
2809                                 help='simulate, quiet but print output filename', default=False)
2810                 verbosity.add_option('--no-progress',
2811                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2812                 verbosity.add_option('--console-title',
2813                                 action='store_true', dest='consoletitle',
2814                                 help='display progress in console titlebar', default=False)
2815                 parser.add_option_group(verbosity)
2816
2817                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2818                 filesystem.add_option('-t', '--title',
2819                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2820                 filesystem.add_option('-l', '--literal',
2821                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2822                 filesystem.add_option('-A', '--auto-number',
2823                                 action='store_true', dest='autonumber',
2824                                 help='number downloaded files starting from 00000', default=False)
2825                 filesystem.add_option('-o', '--output',
2826                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2827                 filesystem.add_option('-a', '--batch-file',
2828                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2829                 filesystem.add_option('-w', '--no-overwrites',
2830                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2831                 filesystem.add_option('-c', '--continue',
2832                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2833                 filesystem.add_option('--cookies',
2834                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2835                 filesystem.add_option('--no-part',
2836                                 action='store_true', dest='nopart', help='do not use .part files', default=False)
2837                 filesystem.add_option('--no-mtime',
2838                                 action='store_false', dest='updatetime',
2839                                 help='do not use the Last-modified header to set the file modification time', default=True)
2840                 parser.add_option_group(filesystem)
2841
2842                 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
2843                 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
2844                                 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
2845                 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
2846                                 help='"best", "aac" or "mp3"; best by default')
2847                 parser.add_option_group(postproc)
2848
2849                 (opts, args) = parser.parse_args()
2850
2851                 # Open appropriate CookieJar
2852                 if opts.cookiefile is None:
2853                         jar = cookielib.CookieJar()
2854                 else:
2855                         try:
2856                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2857                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2858                                         jar.load()
2859                         except (IOError, OSError), err:
2860                                 sys.exit(u'ERROR: unable to open cookie file')
2861
2862                 # Dump user agent
2863                 if opts.dump_user_agent:
2864                         print std_headers['User-Agent']
2865                         sys.exit(0)
2866
2867                 # General configuration
2868                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2869                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
2870                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2871
2872                 # Batch file verification
2873                 batchurls = []
2874                 if opts.batchfile is not None:
2875                         try:
2876                                 if opts.batchfile == '-':
2877                                         batchfd = sys.stdin
2878                                 else:
2879                                         batchfd = open(opts.batchfile, 'r')
2880                                 batchurls = batchfd.readlines()
2881                                 batchurls = [x.strip() for x in batchurls]
2882                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2883                         except IOError:
2884                                 sys.exit(u'ERROR: batch file could not be read')
2885                 all_urls = batchurls + args
2886
2887                 # Conflicting, missing and erroneous options
2888                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2889                         parser.error(u'using .netrc conflicts with giving username/password')
2890                 if opts.password is not None and opts.username is None:
2891                         parser.error(u'account username missing')
2892                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2893                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2894                 if opts.usetitle and opts.useliteral:
2895                         parser.error(u'using title conflicts with using literal title')
2896                 if opts.username is not None and opts.password is None:
2897                         opts.password = getpass.getpass(u'Type account password and press return:')
2898                 if opts.ratelimit is not None:
2899                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2900                         if numeric_limit is None:
2901                                 parser.error(u'invalid rate limit specified')
2902                         opts.ratelimit = numeric_limit
2903                 if opts.retries is not None:
2904                         try:
2905                                 opts.retries = long(opts.retries)
2906                         except (TypeError, ValueError), err:
2907                                 parser.error(u'invalid retry count specified')
2908                 try:
2909                         opts.playliststart = long(opts.playliststart)
2910                         if opts.playliststart <= 0:
2911                                 raise ValueError
2912                 except (TypeError, ValueError), err:
2913                         parser.error(u'invalid playlist start number specified')
2914                 try:
2915                         opts.playlistend = long(opts.playlistend)
2916                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2917                                 raise ValueError
2918                 except (TypeError, ValueError), err:
2919                         parser.error(u'invalid playlist end number specified')
2920                 if opts.extractaudio:
2921                         if opts.audioformat not in ['best', 'aac', 'mp3']:
2922                                 parser.error(u'invalid audio format specified')
2923
2924                 # Information extractors
2925                 youtube_ie = YoutubeIE()
2926                 metacafe_ie = MetacafeIE(youtube_ie)
2927                 dailymotion_ie = DailymotionIE()
2928                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2929                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2930                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2931                 google_ie = GoogleIE()
2932                 google_search_ie = GoogleSearchIE(google_ie)
2933                 photobucket_ie = PhotobucketIE()
2934                 yahoo_ie = YahooIE()
2935                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2936                 deposit_files_ie = DepositFilesIE()
2937                 facebook_ie = FacebookIE()
2938                 generic_ie = GenericIE()
2939
2940                 # File downloader
2941                 fd = FileDownloader({
2942                         'usenetrc': opts.usenetrc,
2943                         'username': opts.username,
2944                         'password': opts.password,
2945                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2946                         'forceurl': opts.geturl,
2947                         'forcetitle': opts.gettitle,
2948                         'forcethumbnail': opts.getthumbnail,
2949                         'forcedescription': opts.getdescription,
2950                         'forcefilename': opts.getfilename,
2951                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2952                         'format': opts.format,
2953                         'format_limit': opts.format_limit,
2954                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2955                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2956                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2957                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2958                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2959                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2960                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2961                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2962                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2963                                 or u'%(id)s.%(ext)s'),
2964                         'ignoreerrors': opts.ignoreerrors,
2965                         'ratelimit': opts.ratelimit,
2966                         'nooverwrites': opts.nooverwrites,
2967                         'retries': opts.retries,
2968                         'continuedl': opts.continue_dl,
2969                         'noprogress': opts.noprogress,
2970                         'playliststart': opts.playliststart,
2971                         'playlistend': opts.playlistend,
2972                         'logtostderr': opts.outtmpl == '-',
2973                         'consoletitle': opts.consoletitle,
2974                         'nopart': opts.nopart,
2975                         'updatetime': opts.updatetime,
2976                         })
2977                 fd.add_info_extractor(youtube_search_ie)
2978                 fd.add_info_extractor(youtube_pl_ie)
2979                 fd.add_info_extractor(youtube_user_ie)
2980                 fd.add_info_extractor(metacafe_ie)
2981                 fd.add_info_extractor(dailymotion_ie)
2982                 fd.add_info_extractor(youtube_ie)
2983                 fd.add_info_extractor(google_ie)
2984                 fd.add_info_extractor(google_search_ie)
2985                 fd.add_info_extractor(photobucket_ie)
2986                 fd.add_info_extractor(yahoo_ie)
2987                 fd.add_info_extractor(yahoo_search_ie)
2988                 fd.add_info_extractor(deposit_files_ie)
2989                 fd.add_info_extractor(facebook_ie)
2990
2991                 # This must come last since it's the
2992                 # fallback if none of the others work
2993                 fd.add_info_extractor(generic_ie)
2994
2995                 # PostProcessors
2996                 if opts.extractaudio:
2997                         fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
2998
2999                 # Update version
3000                 if opts.update_self:
3001                         update_self(fd, sys.argv[0])
3002
3003                 # Maybe do nothing
3004                 if len(all_urls) < 1:
3005                         if not opts.update_self:
3006                                 parser.error(u'you must provide at least one URL')
3007                         else:
3008                                 sys.exit()
3009                 retcode = fd.download(all_urls)
3010
3011                 # Dump cookie jar if requested
3012                 if opts.cookiefile is not None:
3013                         try:
3014                                 jar.save()
3015                         except (IOError, OSError), err:
3016                                 sys.exit(u'ERROR: unable to save cookie jar')
3017
3018                 sys.exit(retcode)
3019
3020         except DownloadError:
3021                 sys.exit(1)
3022         except SameFileError:
3023                 sys.exit(u'ERROR: fixed output name but more than one file to download')
3024         except KeyboardInterrupt:
3025                 sys.exit(u'\nERROR: Interrupted by user')