Merge https://github.com/rg3/youtube-dl into vimeo
[youtube-dl.git] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # Author: Vasyl' Vavrychuk
7 # Author: Witold Baryluk
8 # Author: PaweÅ‚ Paprota
9 # Author: Gergely Imreh
10 # License: Public domain code
11 import cookielib
12 import ctypes
13 import datetime
14 import email.utils
15 import gzip
16 import htmlentitydefs
17 import httplib
18 import locale
19 import math
20 import netrc
21 import os
22 import os.path
23 import re
24 import socket
25 import string
26 import StringIO
27 import subprocess
28 import sys
29 import time
30 import urllib
31 import urllib2
32 import zlib
33
34 # parse_qs was moved from the cgi module to the urlparse module recently.
35 try:
36         from urlparse import parse_qs
37 except ImportError:
38         from cgi import parse_qs
39
40 std_headers = {
41         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
42         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
43         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
44         'Accept-Encoding': 'gzip, deflate',
45         'Accept-Language': 'en-us,en;q=0.5',
46 }
47
48 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
49
50 def preferredencoding():
51         """Get preferred encoding.
52
53         Returns the best encoding scheme for the system, based on
54         locale.getpreferredencoding() and some further tweaks.
55         """
56         def yield_preferredencoding():
57                 try:
58                         pref = locale.getpreferredencoding()
59                         u'TEST'.encode(pref)
60                 except:
61                         pref = 'UTF-8'
62                 while True:
63                         yield pref
64         return yield_preferredencoding().next()
65
66 def htmlentity_transform(matchobj):
67         """Transforms an HTML entity to a Unicode character.
68
69         This function receives a match object and is intended to be used with
70         the re.sub() function.
71         """
72         entity = matchobj.group(1)
73
74         # Known non-numeric HTML entity
75         if entity in htmlentitydefs.name2codepoint:
76                 return unichr(htmlentitydefs.name2codepoint[entity])
77
78         # Unicode character
79         mobj = re.match(ur'(?u)#(x?\d+)', entity)
80         if mobj is not None:
81                 numstr = mobj.group(1)
82                 if numstr.startswith(u'x'):
83                         base = 16
84                         numstr = u'0%s' % numstr
85                 else:
86                         base = 10
87                 return unichr(long(numstr, base))
88
89         # Unknown entity in name, return its literal representation
90         return (u'&%s;' % entity)
91
92 def sanitize_title(utitle):
93         """Sanitizes a video title so it could be used as part of a filename."""
94         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
95         return utitle.replace(unicode(os.sep), u'%')
96
97 def sanitize_open(filename, open_mode):
98         """Try to open the given filename, and slightly tweak it if this fails.
99
100         Attempts to open the given filename. If this fails, it tries to change
101         the filename slightly, step by step, until it's either able to open it
102         or it fails and raises a final exception, like the standard open()
103         function.
104
105         It returns the tuple (stream, definitive_file_name).
106         """
107         try:
108                 if filename == u'-':
109                         if sys.platform == 'win32':
110                                 import msvcrt
111                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
112                         return (sys.stdout, filename)
113                 stream = open(filename, open_mode)
114                 return (stream, filename)
115         except (IOError, OSError), err:
116                 # In case of error, try to remove win32 forbidden chars
117                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
118
119                 # An exception here should be caught in the caller
120                 stream = open(filename, open_mode)
121                 return (stream, filename)
122
123 def timeconvert(timestr):
124     """Convert RFC 2822 defined time string into system timestamp"""
125     timestamp = None
126     timetuple = email.utils.parsedate_tz(timestr)
127     if timetuple is not None:
128         timestamp = email.utils.mktime_tz(timetuple)
129     return timestamp
130
131 class DownloadError(Exception):
132         """Download Error exception.
133
134         This exception may be thrown by FileDownloader objects if they are not
135         configured to continue on errors. They will contain the appropriate
136         error message.
137         """
138         pass
139
140 class SameFileError(Exception):
141         """Same File exception.
142
143         This exception will be thrown by FileDownloader objects if they detect
144         multiple files would have to be downloaded to the same file on disk.
145         """
146         pass
147
148 class PostProcessingError(Exception):
149         """Post Processing exception.
150
151         This exception may be raised by PostProcessor's .run() method to
152         indicate an error in the postprocessing task.
153         """
154         pass
155
156 class UnavailableVideoError(Exception):
157         """Unavailable Format exception.
158
159         This exception will be thrown when a video is requested
160         in a format that is not available for that video.
161         """
162         pass
163
164 class ContentTooShortError(Exception):
165         """Content Too Short exception.
166
167         This exception may be raised by FileDownloader objects when a file they
168         download is too small for what the server announced first, indicating
169         the connection was probably interrupted.
170         """
171         # Both in bytes
172         downloaded = None
173         expected = None
174
175         def __init__(self, downloaded, expected):
176                 self.downloaded = downloaded
177                 self.expected = expected
178
179 class YoutubeDLHandler(urllib2.HTTPHandler):
180         """Handler for HTTP requests and responses.
181
182         This class, when installed with an OpenerDirector, automatically adds
183         the standard headers to every HTTP request and handles gzipped and
184         deflated responses from web servers. If compression is to be avoided in
185         a particular request, the original request in the program code only has
186         to include the HTTP header "Youtubedl-No-Compression", which will be
187         removed before making the real request.
188         
189         Part of this code was copied from:
190
191           http://techknack.net/python-urllib2-handlers/
192           
193         Andrew Rowls, the author of that code, agreed to release it to the
194         public domain.
195         """
196
197         @staticmethod
198         def deflate(data):
199                 try:
200                         return zlib.decompress(data, -zlib.MAX_WBITS)
201                 except zlib.error:
202                         return zlib.decompress(data)
203         
204         @staticmethod
205         def addinfourl_wrapper(stream, headers, url, code):
206                 if hasattr(urllib2.addinfourl, 'getcode'):
207                         return urllib2.addinfourl(stream, headers, url, code)
208                 ret = urllib2.addinfourl(stream, headers, url)
209                 ret.code = code
210                 return ret
211         
212         def http_request(self, req):
213                 for h in std_headers:
214                         if h in req.headers:
215                                 del req.headers[h]
216                         req.add_header(h, std_headers[h])
217                 if 'Youtubedl-no-compression' in req.headers:
218                         if 'Accept-encoding' in req.headers:
219                                 del req.headers['Accept-encoding']
220                         del req.headers['Youtubedl-no-compression']
221                 return req
222
223         def http_response(self, req, resp):
224                 old_resp = resp
225                 # gzip
226                 if resp.headers.get('Content-encoding', '') == 'gzip':
227                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
228                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
229                         resp.msg = old_resp.msg
230                 # deflate
231                 if resp.headers.get('Content-encoding', '') == 'deflate':
232                         gz = StringIO.StringIO(self.deflate(resp.read()))
233                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
234                         resp.msg = old_resp.msg
235                 return resp
236
237 class FileDownloader(object):
238         """File Downloader class.
239
240         File downloader objects are the ones responsible of downloading the
241         actual video file and writing it to disk if the user has requested
242         it, among some other tasks. In most cases there should be one per
243         program. As, given a video URL, the downloader doesn't know how to
244         extract all the needed information, task that InfoExtractors do, it
245         has to pass the URL to one of them.
246
247         For this, file downloader objects have a method that allows
248         InfoExtractors to be registered in a given order. When it is passed
249         a URL, the file downloader handles it to the first InfoExtractor it
250         finds that reports being able to handle it. The InfoExtractor extracts
251         all the information about the video or videos the URL refers to, and
252         asks the FileDownloader to process the video information, possibly
253         downloading the video.
254
255         File downloaders accept a lot of parameters. In order not to saturate
256         the object constructor with arguments, it receives a dictionary of
257         options instead. These options are available through the params
258         attribute for the InfoExtractors to use. The FileDownloader also
259         registers itself as the downloader in charge for the InfoExtractors
260         that are added to it, so this is a "mutual registration".
261
262         Available options:
263
264         username:         Username for authentication purposes.
265         password:         Password for authentication purposes.
266         usenetrc:         Use netrc for authentication instead.
267         quiet:            Do not print messages to stdout.
268         forceurl:         Force printing final URL.
269         forcetitle:       Force printing title.
270         forcethumbnail:   Force printing thumbnail URL.
271         forcedescription: Force printing description.
272         forcefilename:    Force printing final filename.
273         simulate:         Do not download the video files.
274         format:           Video format code.
275         format_limit:     Highest quality format to try.
276         outtmpl:          Template for output names.
277         ignoreerrors:     Do not stop on download errors.
278         ratelimit:        Download speed limit, in bytes/sec.
279         nooverwrites:     Prevent overwriting files.
280         retries:          Number of times to retry for HTTP error 5xx
281         continuedl:       Try to continue downloads if possible.
282         noprogress:       Do not print the progress bar.
283         playliststart:    Playlist item to start at.
284         playlistend:      Playlist item to end at.
285         logtostderr:      Log messages to stderr instead of stdout.
286         consoletitle:     Display progress in console window's titlebar.
287         nopart:           Do not use temporary .part files.
288         updatetime:       Use the Last-modified header to set output file timestamps.
289         """
290
291         params = None
292         _ies = []
293         _pps = []
294         _download_retcode = None
295         _num_downloads = None
296         _screen_file = None
297
298         def __init__(self, params):
299                 """Create a FileDownloader object with the given options."""
300                 self._ies = []
301                 self._pps = []
302                 self._download_retcode = 0
303                 self._num_downloads = 0
304                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
305                 self.params = params
306
307         @staticmethod
308         def pmkdir(filename):
309                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
310                 components = filename.split(os.sep)
311                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
312                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
313                 for dir in aggregate:
314                         if not os.path.exists(dir):
315                                 os.mkdir(dir)
316
317         @staticmethod
318         def format_bytes(bytes):
319                 if bytes is None:
320                         return 'N/A'
321                 if type(bytes) is str:
322                         bytes = float(bytes)
323                 if bytes == 0.0:
324                         exponent = 0
325                 else:
326                         exponent = long(math.log(bytes, 1024.0))
327                 suffix = 'bkMGTPEZY'[exponent]
328                 converted = float(bytes) / float(1024**exponent)
329                 return '%.2f%s' % (converted, suffix)
330
331         @staticmethod
332         def calc_percent(byte_counter, data_len):
333                 if data_len is None:
334                         return '---.-%'
335                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
336
337         @staticmethod
338         def calc_eta(start, now, total, current):
339                 if total is None:
340                         return '--:--'
341                 dif = now - start
342                 if current == 0 or dif < 0.001: # One millisecond
343                         return '--:--'
344                 rate = float(current) / dif
345                 eta = long((float(total) - float(current)) / rate)
346                 (eta_mins, eta_secs) = divmod(eta, 60)
347                 if eta_mins > 99:
348                         return '--:--'
349                 return '%02d:%02d' % (eta_mins, eta_secs)
350
351         @staticmethod
352         def calc_speed(start, now, bytes):
353                 dif = now - start
354                 if bytes == 0 or dif < 0.001: # One millisecond
355                         return '%10s' % '---b/s'
356                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
357
358         @staticmethod
359         def best_block_size(elapsed_time, bytes):
360                 new_min = max(bytes / 2.0, 1.0)
361                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
362                 if elapsed_time < 0.001:
363                         return long(new_max)
364                 rate = bytes / elapsed_time
365                 if rate > new_max:
366                         return long(new_max)
367                 if rate < new_min:
368                         return long(new_min)
369                 return long(rate)
370
371         @staticmethod
372         def parse_bytes(bytestr):
373                 """Parse a string indicating a byte quantity into a long integer."""
374                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
375                 if matchobj is None:
376                         return None
377                 number = float(matchobj.group(1))
378                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
379                 return long(round(number * multiplier))
380
381         def add_info_extractor(self, ie):
382                 """Add an InfoExtractor object to the end of the list."""
383                 self._ies.append(ie)
384                 ie.set_downloader(self)
385
386         def add_post_processor(self, pp):
387                 """Add a PostProcessor object to the end of the chain."""
388                 self._pps.append(pp)
389                 pp.set_downloader(self)
390
391         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
392                 """Print message to stdout if not in quiet mode."""
393                 try:
394                         if not self.params.get('quiet', False):
395                                 terminator = [u'\n', u''][skip_eol]
396                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
397                         self._screen_file.flush()
398                 except (UnicodeEncodeError), err:
399                         if not ignore_encoding_errors:
400                                 raise
401
402         def to_stderr(self, message):
403                 """Print message to stderr."""
404                 print >>sys.stderr, message.encode(preferredencoding())
405
406         def to_cons_title(self, message):
407                 """Set console/terminal window title to message."""
408                 if not self.params.get('consoletitle', False):
409                         return
410                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
411                         # c_wchar_p() might not be necessary if `message` is
412                         # already of type unicode()
413                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
414                 elif 'TERM' in os.environ:
415                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
416
417         def fixed_template(self):
418                 """Checks if the output template is fixed."""
419                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
420
421         def trouble(self, message=None):
422                 """Determine action to take when a download problem appears.
423
424                 Depending on if the downloader has been configured to ignore
425                 download errors or not, this method may throw an exception or
426                 not when errors are found, after printing the message.
427                 """
428                 if message is not None:
429                         self.to_stderr(message)
430                 if not self.params.get('ignoreerrors', False):
431                         raise DownloadError(message)
432                 self._download_retcode = 1
433
434         def slow_down(self, start_time, byte_counter):
435                 """Sleep if the download speed is over the rate limit."""
436                 rate_limit = self.params.get('ratelimit', None)
437                 if rate_limit is None or byte_counter == 0:
438                         return
439                 now = time.time()
440                 elapsed = now - start_time
441                 if elapsed <= 0.0:
442                         return
443                 speed = float(byte_counter) / elapsed
444                 if speed > rate_limit:
445                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
446
447         def temp_name(self, filename):
448                 """Returns a temporary filename for the given filename."""
449                 if self.params.get('nopart', False) or filename == u'-' or \
450                                 (os.path.exists(filename) and not os.path.isfile(filename)):
451                         return filename
452                 return filename + u'.part'
453
454         def undo_temp_name(self, filename):
455                 if filename.endswith(u'.part'):
456                         return filename[:-len(u'.part')]
457                 return filename
458
459         def try_rename(self, old_filename, new_filename):
460                 try:
461                         if old_filename == new_filename:
462                                 return
463                         os.rename(old_filename, new_filename)
464                 except (IOError, OSError), err:
465                         self.trouble(u'ERROR: unable to rename file')
466         
467         def try_utime(self, filename, last_modified_hdr):
468                 """Try to set the last-modified time of the given file."""
469                 if last_modified_hdr is None:
470                         return
471                 if not os.path.isfile(filename):
472                         return
473                 timestr = last_modified_hdr
474                 if timestr is None:
475                         return
476                 filetime = timeconvert(timestr)
477                 if filetime is None:
478                         return
479                 try:
480                         os.utime(filename,(time.time(), filetime))
481                 except:
482                         pass
483
484         def report_destination(self, filename):
485                 """Report destination filename."""
486                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
487
488         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
489                 """Report download progress."""
490                 if self.params.get('noprogress', False):
491                         return
492                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
493                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
494                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
495                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
496
497         def report_resuming_byte(self, resume_len):
498                 """Report attempt to resume at given byte."""
499                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
500
501         def report_retry(self, count, retries):
502                 """Report retry in case of HTTP error 5xx"""
503                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
504
505         def report_file_already_downloaded(self, file_name):
506                 """Report file has already been fully downloaded."""
507                 try:
508                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
509                 except (UnicodeEncodeError), err:
510                         self.to_screen(u'[download] The file has already been downloaded')
511
512         def report_unable_to_resume(self):
513                 """Report it was impossible to resume download."""
514                 self.to_screen(u'[download] Unable to resume')
515
516         def report_finish(self):
517                 """Report download finished."""
518                 if self.params.get('noprogress', False):
519                         self.to_screen(u'[download] Download completed')
520                 else:
521                         self.to_screen(u'')
522
523         def increment_downloads(self):
524                 """Increment the ordinal that assigns a number to each file."""
525                 self._num_downloads += 1
526
527         def prepare_filename(self, info_dict):
528                 """Generate the output filename."""
529                 try:
530                         template_dict = dict(info_dict)
531                         template_dict['epoch'] = unicode(long(time.time()))
532                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
533                         filename = self.params['outtmpl'] % template_dict
534                         return filename
535                 except (ValueError, KeyError), err:
536                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
537                         return None
538
539         def process_info(self, info_dict):
540                 """Process a single dictionary returned by an InfoExtractor."""
541                 filename = self.prepare_filename(info_dict)
542                 # Do nothing else if in simulate mode
543                 if self.params.get('simulate', False):
544                         # Forced printings
545                         if self.params.get('forcetitle', False):
546                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
547                         if self.params.get('forceurl', False):
548                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
549                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
550                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
551                         if self.params.get('forcedescription', False) and 'description' in info_dict:
552                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
553                         if self.params.get('forcefilename', False) and filename is not None:
554                                 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
555
556                         return
557
558                 if filename is None:
559                         return
560                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
561                         self.to_stderr(u'WARNING: file exists and will be skipped')
562                         return
563
564                 try:
565                         self.pmkdir(filename)
566                 except (OSError, IOError), err:
567                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
568                         return
569
570                 try:
571                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
572                 except (OSError, IOError), err:
573                         raise UnavailableVideoError
574                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
575                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
576                         return
577                 except (ContentTooShortError, ), err:
578                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
579                         return
580
581                 if success:
582                         try:
583                                 self.post_process(filename, info_dict)
584                         except (PostProcessingError), err:
585                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
586                                 return
587
588         def download(self, url_list):
589                 """Download a given list of URLs."""
590                 if len(url_list) > 1 and self.fixed_template():
591                         raise SameFileError(self.params['outtmpl'])
592
593                 for url in url_list:
594                         suitable_found = False
595                         for ie in self._ies:
596                                 # Go to next InfoExtractor if not suitable
597                                 if not ie.suitable(url):
598                                         continue
599
600                                 # Suitable InfoExtractor found
601                                 suitable_found = True
602
603                                 # Extract information from URL and process it
604                                 ie.extract(url)
605
606                                 # Suitable InfoExtractor had been found; go to next URL
607                                 break
608
609                         if not suitable_found:
610                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
611
612                 return self._download_retcode
613
614         def post_process(self, filename, ie_info):
615                 """Run the postprocessing chain on the given file."""
616                 info = dict(ie_info)
617                 info['filepath'] = filename
618                 for pp in self._pps:
619                         info = pp.run(info)
620                         if info is None:
621                                 break
622
623         def _download_with_rtmpdump(self, filename, url, player_url):
624                 self.report_destination(filename)
625                 tmpfilename = self.temp_name(filename)
626
627                 # Check for rtmpdump first
628                 try:
629                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
630                 except (OSError, IOError):
631                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
632                         return False
633
634                 # Download using rtmpdump. rtmpdump returns exit code 2 when
635                 # the connection was interrumpted and resuming appears to be
636                 # possible. This is part of rtmpdump's normal usage, AFAIK.
637                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
638                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
639                 while retval == 2 or retval == 1:
640                         prevsize = os.path.getsize(tmpfilename)
641                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
642                         time.sleep(5.0) # This seems to be needed
643                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
644                         cursize = os.path.getsize(tmpfilename)
645                         if prevsize == cursize and retval == 1:
646                                 break
647                 if retval == 0:
648                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
649                         self.try_rename(tmpfilename, filename)
650                         return True
651                 else:
652                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
653                         return False
654
655         def _do_download(self, filename, url, player_url):
656                 # Check file already present
657                 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
658                         self.report_file_already_downloaded(filename)
659                         return True
660
661                 # Attempt to download using rtmpdump
662                 if url.startswith('rtmp'):
663                         return self._download_with_rtmpdump(filename, url, player_url)
664
665                 tmpfilename = self.temp_name(filename)
666                 stream = None
667                 open_mode = 'wb'
668
669                 # Do not include the Accept-Encoding header
670                 headers = {'Youtubedl-no-compression': 'True'}
671                 basic_request = urllib2.Request(url, None, headers)
672                 request = urllib2.Request(url, None, headers)
673
674                 # Establish possible resume length
675                 if os.path.isfile(tmpfilename):
676                         resume_len = os.path.getsize(tmpfilename)
677                 else:
678                         resume_len = 0
679
680                 # Request parameters in case of being able to resume
681                 if self.params.get('continuedl', False) and resume_len != 0:
682                         self.report_resuming_byte(resume_len)
683                         request.add_header('Range','bytes=%d-' % resume_len)
684                         open_mode = 'ab'
685
686                 count = 0
687                 retries = self.params.get('retries', 0)
688                 while count <= retries:
689                         # Establish connection
690                         try:
691                                 data = urllib2.urlopen(request)
692                                 break
693                         except (urllib2.HTTPError, ), err:
694                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
695                                         # Unexpected HTTP error
696                                         raise
697                                 elif err.code == 416:
698                                         # Unable to resume (requested range not satisfiable)
699                                         try:
700                                                 # Open the connection again without the range header
701                                                 data = urllib2.urlopen(basic_request)
702                                                 content_length = data.info()['Content-Length']
703                                         except (urllib2.HTTPError, ), err:
704                                                 if err.code < 500 or err.code >= 600:
705                                                         raise
706                                         else:
707                                                 # Examine the reported length
708                                                 if (content_length is not None and
709                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
710                                                         # The file had already been fully downloaded.
711                                                         # Explanation to the above condition: in issue #175 it was revealed that
712                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
713                                                         # changing the file size slightly and causing problems for some users. So
714                                                         # I decided to implement a suggested change and consider the file
715                                                         # completely downloaded if the file size differs less than 100 bytes from
716                                                         # the one in the hard drive.
717                                                         self.report_file_already_downloaded(filename)
718                                                         self.try_rename(tmpfilename, filename)
719                                                         return True
720                                                 else:
721                                                         # The length does not match, we start the download over
722                                                         self.report_unable_to_resume()
723                                                         open_mode = 'wb'
724                                                         break
725                         # Retry
726                         count += 1
727                         if count <= retries:
728                                 self.report_retry(count, retries)
729
730                 if count > retries:
731                         self.trouble(u'ERROR: giving up after %s retries' % retries)
732                         return False
733
734                 data_len = data.info().get('Content-length', None)
735                 if data_len is not None:
736                         data_len = long(data_len) + resume_len
737                 data_len_str = self.format_bytes(data_len)
738                 byte_counter = 0 + resume_len
739                 block_size = 1024
740                 start = time.time()
741                 while True:
742                         # Download and write
743                         before = time.time()
744                         data_block = data.read(block_size)
745                         after = time.time()
746                         if len(data_block) == 0:
747                                 break
748                         byte_counter += len(data_block)
749
750                         # Open file just in time
751                         if stream is None:
752                                 try:
753                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
754                                         filename = self.undo_temp_name(tmpfilename)
755                                         self.report_destination(filename)
756                                 except (OSError, IOError), err:
757                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
758                                         return False
759                         try:
760                                 stream.write(data_block)
761                         except (IOError, OSError), err:
762                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
763                                 return False
764                         block_size = self.best_block_size(after - before, len(data_block))
765
766                         # Progress message
767                         percent_str = self.calc_percent(byte_counter, data_len)
768                         eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
769                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
770                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
771
772                         # Apply rate limit
773                         self.slow_down(start, byte_counter - resume_len)
774
775                 stream.close()
776                 self.report_finish()
777                 if data_len is not None and byte_counter != data_len:
778                         raise ContentTooShortError(byte_counter, long(data_len))
779                 self.try_rename(tmpfilename, filename)
780
781                 # Update file modification time
782                 if self.params.get('updatetime', True):
783                         self.try_utime(filename, data.info().get('last-modified', None))
784
785                 return True
786
787 class InfoExtractor(object):
788         """Information Extractor class.
789
790         Information extractors are the classes that, given a URL, extract
791         information from the video (or videos) the URL refers to. This
792         information includes the real video URL, the video title and simplified
793         title, author and others. The information is stored in a dictionary
794         which is then passed to the FileDownloader. The FileDownloader
795         processes this information possibly downloading the video to the file
796         system, among other possible outcomes. The dictionaries must include
797         the following fields:
798
799         id:             Video identifier.
800         url:            Final video URL.
801         uploader:       Nickname of the video uploader.
802         title:          Literal title.
803         stitle:         Simplified title.
804         ext:            Video filename extension.
805         format:         Video format.
806         player_url:     SWF Player URL (may be None).
807
808         The following fields are optional. Their primary purpose is to allow
809         youtube-dl to serve as the backend for a video search function, such
810         as the one in youtube2mp3.  They are only used when their respective
811         forced printing functions are called:
812
813         thumbnail:      Full URL to a video thumbnail image.
814         description:    One-line video description.
815
816         Subclasses of this one should re-define the _real_initialize() and
817         _real_extract() methods, as well as the suitable() static method.
818         Probably, they should also be instantiated and added to the main
819         downloader.
820         """
821
822         _ready = False
823         _downloader = None
824
825         def __init__(self, downloader=None):
826                 """Constructor. Receives an optional downloader."""
827                 self._ready = False
828                 self.set_downloader(downloader)
829
830         @staticmethod
831         def suitable(url):
832                 """Receives a URL and returns True if suitable for this IE."""
833                 return False
834
835         def initialize(self):
836                 """Initializes an instance (authentication, etc)."""
837                 if not self._ready:
838                         self._real_initialize()
839                         self._ready = True
840
841         def extract(self, url):
842                 """Extracts URL information and returns it in list of dicts."""
843                 self.initialize()
844                 return self._real_extract(url)
845
846         def set_downloader(self, downloader):
847                 """Sets the downloader for this IE."""
848                 self._downloader = downloader
849
850         def _real_initialize(self):
851                 """Real initialization process. Redefine in subclasses."""
852                 pass
853
854         def _real_extract(self, url):
855                 """Real extraction process. Redefine in subclasses."""
856                 pass
857
858 class YoutubeIE(InfoExtractor):
859         """Information extractor for youtube.com."""
860
861         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
862         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
863         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
864         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
865         _NETRC_MACHINE = 'youtube'
866         # Listed in order of quality
867         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
868         _video_extensions = {
869                 '13': '3gp',
870                 '17': 'mp4',
871                 '18': 'mp4',
872                 '22': 'mp4',
873                 '37': 'mp4',
874                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
875                 '43': 'webm',
876                 '45': 'webm',
877         }
878
879         @staticmethod
880         def suitable(url):
881                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
882
883         def report_lang(self):
884                 """Report attempt to set language."""
885                 self._downloader.to_screen(u'[youtube] Setting language')
886
887         def report_login(self):
888                 """Report attempt to log in."""
889                 self._downloader.to_screen(u'[youtube] Logging in')
890
891         def report_age_confirmation(self):
892                 """Report attempt to confirm age."""
893                 self._downloader.to_screen(u'[youtube] Confirming age')
894
895         def report_video_webpage_download(self, video_id):
896                 """Report attempt to download video webpage."""
897                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
898
899         def report_video_info_webpage_download(self, video_id):
900                 """Report attempt to download video info webpage."""
901                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
902
903         def report_information_extraction(self, video_id):
904                 """Report attempt to extract video information."""
905                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
906
907         def report_unavailable_format(self, video_id, format):
908                 """Report extracted video URL."""
909                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
910
911         def report_rtmp_download(self):
912                 """Indicate the download will use the RTMP protocol."""
913                 self._downloader.to_screen(u'[youtube] RTMP download detected')
914
915         def _real_initialize(self):
916                 if self._downloader is None:
917                         return
918
919                 username = None
920                 password = None
921                 downloader_params = self._downloader.params
922
923                 # Attempt to use provided username and password or .netrc data
924                 if downloader_params.get('username', None) is not None:
925                         username = downloader_params['username']
926                         password = downloader_params['password']
927                 elif downloader_params.get('usenetrc', False):
928                         try:
929                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
930                                 if info is not None:
931                                         username = info[0]
932                                         password = info[2]
933                                 else:
934                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
935                         except (IOError, netrc.NetrcParseError), err:
936                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
937                                 return
938
939                 # Set language
940                 request = urllib2.Request(self._LANG_URL)
941                 try:
942                         self.report_lang()
943                         urllib2.urlopen(request).read()
944                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
945                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
946                         return
947
948                 # No authentication to be performed
949                 if username is None:
950                         return
951
952                 # Log in
953                 login_form = {
954                                 'current_form': 'loginForm',
955                                 'next':         '/',
956                                 'action_login': 'Log In',
957                                 'username':     username,
958                                 'password':     password,
959                                 }
960                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
961                 try:
962                         self.report_login()
963                         login_results = urllib2.urlopen(request).read()
964                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
965                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
966                                 return
967                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
968                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
969                         return
970
971                 # Confirm age
972                 age_form = {
973                                 'next_url':             '/',
974                                 'action_confirm':       'Confirm',
975                                 }
976                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
977                 try:
978                         self.report_age_confirmation()
979                         age_results = urllib2.urlopen(request).read()
980                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
981                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
982                         return
983
984         def _real_extract(self, url):
985                 # Extract video id from URL
986                 mobj = re.match(self._VALID_URL, url)
987                 if mobj is None:
988                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
989                         return
990                 video_id = mobj.group(2)
991
992                 # Get video webpage
993                 self.report_video_webpage_download(video_id)
994                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
995                 try:
996                         video_webpage = urllib2.urlopen(request).read()
997                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
998                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
999                         return
1000
1001                 # Attempt to extract SWF player URL
1002                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1003                 if mobj is not None:
1004                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1005                 else:
1006                         player_url = None
1007
1008                 # Get video info
1009                 self.report_video_info_webpage_download(video_id)
1010                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1011                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1012                                            % (video_id, el_type))
1013                         request = urllib2.Request(video_info_url)
1014                         try:
1015                                 video_info_webpage = urllib2.urlopen(request).read()
1016                                 video_info = parse_qs(video_info_webpage)
1017                                 if 'token' in video_info:
1018                                         break
1019                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1020                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1021                                 return
1022                 if 'token' not in video_info:
1023                         if 'reason' in video_info:
1024                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1025                         else:
1026                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1027                         return
1028
1029                 # Start extracting information
1030                 self.report_information_extraction(video_id)
1031
1032                 # uploader
1033                 if 'author' not in video_info:
1034                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1035                         return
1036                 video_uploader = urllib.unquote_plus(video_info['author'][0])
1037
1038                 # title
1039                 if 'title' not in video_info:
1040                         self._downloader.trouble(u'ERROR: unable to extract video title')
1041                         return
1042                 video_title = urllib.unquote_plus(video_info['title'][0])
1043                 video_title = video_title.decode('utf-8')
1044                 video_title = sanitize_title(video_title)
1045
1046                 # simplified title
1047                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1048                 simple_title = simple_title.strip(ur'_')
1049
1050                 # thumbnail image
1051                 if 'thumbnail_url' not in video_info:
1052                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1053                         video_thumbnail = ''
1054                 else:   # don't panic if we can't find it
1055                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1056
1057                 # upload date
1058                 upload_date = u'NA'
1059                 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1060                 if mobj is not None:
1061                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1062                         format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1063                         for expression in format_expressions:
1064                                 try:
1065                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1066                                 except:
1067                                         pass
1068
1069                 # description
1070                 video_description = 'No description available.'
1071                 if self._downloader.params.get('forcedescription', False):
1072                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1073                         if mobj is not None:
1074                                 video_description = mobj.group(1)
1075
1076                 # token
1077                 video_token = urllib.unquote_plus(video_info['token'][0])
1078
1079                 # Decide which formats to download
1080                 req_format = self._downloader.params.get('format', None)
1081
1082                 if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1083                         url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
1084                         url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs]
1085                         url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data)
1086                         format_limit = self._downloader.params.get('format_limit', None)
1087                         if format_limit is not None and format_limit in self._available_formats:
1088                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
1089                         else:
1090                                 format_list = self._available_formats
1091                         existing_formats = [x for x in format_list if x in url_map]
1092                         if len(existing_formats) == 0:
1093                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1094                                 return
1095                         if req_format is None:
1096                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1097                         elif req_format == '-1':
1098                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1099                         else:
1100                                 # Specific format
1101                                 if req_format not in url_map:
1102                                         self._downloader.trouble(u'ERROR: requested format not available')
1103                                         return
1104                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
1105
1106                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1107                         self.report_rtmp_download()
1108                         video_url_list = [(None, video_info['conn'][0])]
1109
1110                 else:
1111                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
1112                         return
1113
1114                 for format_param, video_real_url in video_url_list:
1115                         # At this point we have a new video
1116                         self._downloader.increment_downloads()
1117
1118                         # Extension
1119                         video_extension = self._video_extensions.get(format_param, 'flv')
1120
1121                         # Find the video URL in fmt_url_map or conn paramters
1122                         try:
1123                                 # Process video information
1124                                 self._downloader.process_info({
1125                                         'id':           video_id.decode('utf-8'),
1126                                         'url':          video_real_url.decode('utf-8'),
1127                                         'uploader':     video_uploader.decode('utf-8'),
1128                                         'upload_date':  upload_date,
1129                                         'title':        video_title,
1130                                         'stitle':       simple_title,
1131                                         'ext':          video_extension.decode('utf-8'),
1132                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1133                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1134                                         'description':  video_description.decode('utf-8'),
1135                                         'player_url':   player_url,
1136                                 })
1137                         except UnavailableVideoError, err:
1138                                 self._downloader.trouble(u'\nERROR: unable to download video')
1139
1140
1141 class MetacafeIE(InfoExtractor):
1142         """Information Extractor for metacafe.com."""
1143
1144         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1145         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1146         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1147         _youtube_ie = None
1148
1149         def __init__(self, youtube_ie, downloader=None):
1150                 InfoExtractor.__init__(self, downloader)
1151                 self._youtube_ie = youtube_ie
1152
1153         @staticmethod
1154         def suitable(url):
1155                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1156
1157         def report_disclaimer(self):
1158                 """Report disclaimer retrieval."""
1159                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1160
1161         def report_age_confirmation(self):
1162                 """Report attempt to confirm age."""
1163                 self._downloader.to_screen(u'[metacafe] Confirming age')
1164
1165         def report_download_webpage(self, video_id):
1166                 """Report webpage download."""
1167                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1168
1169         def report_extraction(self, video_id):
1170                 """Report information extraction."""
1171                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1172
1173         def _real_initialize(self):
1174                 # Retrieve disclaimer
1175                 request = urllib2.Request(self._DISCLAIMER)
1176                 try:
1177                         self.report_disclaimer()
1178                         disclaimer = urllib2.urlopen(request).read()
1179                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1180                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1181                         return
1182
1183                 # Confirm age
1184                 disclaimer_form = {
1185                         'filters': '0',
1186                         'submit': "Continue - I'm over 18",
1187                         }
1188                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1189                 try:
1190                         self.report_age_confirmation()
1191                         disclaimer = urllib2.urlopen(request).read()
1192                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1193                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1194                         return
1195
1196         def _real_extract(self, url):
1197                 # Extract id and simplified title from URL
1198                 mobj = re.match(self._VALID_URL, url)
1199                 if mobj is None:
1200                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1201                         return
1202
1203                 video_id = mobj.group(1)
1204
1205                 # Check if video comes from YouTube
1206                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1207                 if mobj2 is not None:
1208                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1209                         return
1210
1211                 # At this point we have a new video
1212                 self._downloader.increment_downloads()
1213
1214                 simple_title = mobj.group(2).decode('utf-8')
1215
1216                 # Retrieve video webpage to extract further information
1217                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1218                 try:
1219                         self.report_download_webpage(video_id)
1220                         webpage = urllib2.urlopen(request).read()
1221                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1222                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1223                         return
1224
1225                 # Extract URL, uploader and title from webpage
1226                 self.report_extraction(video_id)
1227                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1228                 if mobj is not None:
1229                         mediaURL = urllib.unquote(mobj.group(1))
1230                         video_extension = mediaURL[-3:]
1231
1232                         # Extract gdaKey if available
1233                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1234                         if mobj is None:
1235                                 video_url = mediaURL
1236                         else:
1237                                 gdaKey = mobj.group(1)
1238                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1239                 else:
1240                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1241                         if mobj is None:
1242                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1243                                 return
1244                         vardict = parse_qs(mobj.group(1))
1245                         if 'mediaData' not in vardict:
1246                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1247                                 return
1248                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1249                         if mobj is None:
1250                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1251                                 return
1252                         mediaURL = mobj.group(1).replace('\\/', '/')
1253                         video_extension = mediaURL[-3:]
1254                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1255
1256                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1257                 if mobj is None:
1258                         self._downloader.trouble(u'ERROR: unable to extract title')
1259                         return
1260                 video_title = mobj.group(1).decode('utf-8')
1261                 video_title = sanitize_title(video_title)
1262
1263                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1264                 if mobj is None:
1265                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1266                         return
1267                 video_uploader = mobj.group(1)
1268
1269                 try:
1270                         # Process video information
1271                         self._downloader.process_info({
1272                                 'id':           video_id.decode('utf-8'),
1273                                 'url':          video_url.decode('utf-8'),
1274                                 'uploader':     video_uploader.decode('utf-8'),
1275                                 'upload_date':  u'NA',
1276                                 'title':        video_title,
1277                                 'stitle':       simple_title,
1278                                 'ext':          video_extension.decode('utf-8'),
1279                                 'format':       u'NA',
1280                                 'player_url':   None,
1281                         })
1282                 except UnavailableVideoError:
1283                         self._downloader.trouble(u'\nERROR: unable to download video')
1284
1285
1286 class DailymotionIE(InfoExtractor):
1287         """Information Extractor for Dailymotion"""
1288
1289         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1290
1291         def __init__(self, downloader=None):
1292                 InfoExtractor.__init__(self, downloader)
1293
1294         @staticmethod
1295         def suitable(url):
1296                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1297
1298         def report_download_webpage(self, video_id):
1299                 """Report webpage download."""
1300                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1301
1302         def report_extraction(self, video_id):
1303                 """Report information extraction."""
1304                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1305
1306         def _real_initialize(self):
1307                 return
1308
1309         def _real_extract(self, url):
1310                 # Extract id and simplified title from URL
1311                 mobj = re.match(self._VALID_URL, url)
1312                 if mobj is None:
1313                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1314                         return
1315
1316                 # At this point we have a new video
1317                 self._downloader.increment_downloads()
1318                 video_id = mobj.group(1)
1319
1320                 simple_title = mobj.group(2).decode('utf-8')
1321                 video_extension = 'flv'
1322
1323                 # Retrieve video webpage to extract further information
1324                 request = urllib2.Request(url)
1325                 try:
1326                         self.report_download_webpage(video_id)
1327                         webpage = urllib2.urlopen(request).read()
1328                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1329                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1330                         return
1331
1332                 # Extract URL, uploader and title from webpage
1333                 self.report_extraction(video_id)
1334                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1335                 if mobj is None:
1336                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1337                         return
1338                 mediaURL = urllib.unquote(mobj.group(1))
1339
1340                 # if needed add http://www.dailymotion.com/ if relative URL
1341
1342                 video_url = mediaURL
1343
1344                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1345                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1346                 if mobj is None:
1347                         self._downloader.trouble(u'ERROR: unable to extract title')
1348                         return
1349                 video_title = mobj.group(1).decode('utf-8')
1350                 video_title = sanitize_title(video_title)
1351
1352                 mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
1353                 if mobj is None:
1354                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1355                         return
1356                 video_uploader = mobj.group(1)
1357
1358                 try:
1359                         # Process video information
1360                         self._downloader.process_info({
1361                                 'id':           video_id.decode('utf-8'),
1362                                 'url':          video_url.decode('utf-8'),
1363                                 'uploader':     video_uploader.decode('utf-8'),
1364                                 'upload_date':  u'NA',
1365                                 'title':        video_title,
1366                                 'stitle':       simple_title,
1367                                 'ext':          video_extension.decode('utf-8'),
1368                                 'format':       u'NA',
1369                                 'player_url':   None,
1370                         })
1371                 except UnavailableVideoError:
1372                         self._downloader.trouble(u'\nERROR: unable to download video')
1373
1374 class GoogleIE(InfoExtractor):
1375         """Information extractor for video.google.com."""
1376
1377         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1378
1379         def __init__(self, downloader=None):
1380                 InfoExtractor.__init__(self, downloader)
1381
1382         @staticmethod
1383         def suitable(url):
1384                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1385
1386         def report_download_webpage(self, video_id):
1387                 """Report webpage download."""
1388                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1389
1390         def report_extraction(self, video_id):
1391                 """Report information extraction."""
1392                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1393
1394         def _real_initialize(self):
1395                 return
1396
1397         def _real_extract(self, url):
1398                 # Extract id from URL
1399                 mobj = re.match(self._VALID_URL, url)
1400                 if mobj is None:
1401                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1402                         return
1403
1404                 # At this point we have a new video
1405                 self._downloader.increment_downloads()
1406                 video_id = mobj.group(1)
1407
1408                 video_extension = 'mp4'
1409
1410                 # Retrieve video webpage to extract further information
1411                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1412                 try:
1413                         self.report_download_webpage(video_id)
1414                         webpage = urllib2.urlopen(request).read()
1415                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1416                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1417                         return
1418
1419                 # Extract URL, uploader, and title from webpage
1420                 self.report_extraction(video_id)
1421                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1422                 if mobj is None:
1423                         video_extension = 'flv'
1424                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1425                 if mobj is None:
1426                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1427                         return
1428                 mediaURL = urllib.unquote(mobj.group(1))
1429                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1430                 mediaURL = mediaURL.replace('\\x26', '\x26')
1431
1432                 video_url = mediaURL
1433
1434                 mobj = re.search(r'<title>(.*)</title>', webpage)
1435                 if mobj is None:
1436                         self._downloader.trouble(u'ERROR: unable to extract title')
1437                         return
1438                 video_title = mobj.group(1).decode('utf-8')
1439                 video_title = sanitize_title(video_title)
1440                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1441
1442                 # Extract video description
1443                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1444                 if mobj is None:
1445                         self._downloader.trouble(u'ERROR: unable to extract video description')
1446                         return
1447                 video_description = mobj.group(1).decode('utf-8')
1448                 if not video_description:
1449                         video_description = 'No description available.'
1450
1451                 # Extract video thumbnail
1452                 if self._downloader.params.get('forcethumbnail', False):
1453                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1454                         try:
1455                                 webpage = urllib2.urlopen(request).read()
1456                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1457                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1458                                 return
1459                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1460                         if mobj is None:
1461                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1462                                 return
1463                         video_thumbnail = mobj.group(1)
1464                 else:   # we need something to pass to process_info
1465                         video_thumbnail = ''
1466
1467
1468                 try:
1469                         # Process video information
1470                         self._downloader.process_info({
1471                                 'id':           video_id.decode('utf-8'),
1472                                 'url':          video_url.decode('utf-8'),
1473                                 'uploader':     u'NA',
1474                                 'upload_date':  u'NA',
1475                                 'title':        video_title,
1476                                 'stitle':       simple_title,
1477                                 'ext':          video_extension.decode('utf-8'),
1478                                 'format':       u'NA',
1479                                 'player_url':   None,
1480                         })
1481                 except UnavailableVideoError:
1482                         self._downloader.trouble(u'\nERROR: unable to download video')
1483
1484
1485 class PhotobucketIE(InfoExtractor):
1486         """Information extractor for photobucket.com."""
1487
1488         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1489
1490         def __init__(self, downloader=None):
1491                 InfoExtractor.__init__(self, downloader)
1492
1493         @staticmethod
1494         def suitable(url):
1495                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1496
1497         def report_download_webpage(self, video_id):
1498                 """Report webpage download."""
1499                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1500
1501         def report_extraction(self, video_id):
1502                 """Report information extraction."""
1503                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1504
1505         def _real_initialize(self):
1506                 return
1507
1508         def _real_extract(self, url):
1509                 # Extract id from URL
1510                 mobj = re.match(self._VALID_URL, url)
1511                 if mobj is None:
1512                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1513                         return
1514
1515                 # At this point we have a new video
1516                 self._downloader.increment_downloads()
1517                 video_id = mobj.group(1)
1518
1519                 video_extension = 'flv'
1520
1521                 # Retrieve video webpage to extract further information
1522                 request = urllib2.Request(url)
1523                 try:
1524                         self.report_download_webpage(video_id)
1525                         webpage = urllib2.urlopen(request).read()
1526                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1527                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1528                         return
1529
1530                 # Extract URL, uploader, and title from webpage
1531                 self.report_extraction(video_id)
1532                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1533                 if mobj is None:
1534                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1535                         return
1536                 mediaURL = urllib.unquote(mobj.group(1))
1537
1538                 video_url = mediaURL
1539
1540                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1541                 if mobj is None:
1542                         self._downloader.trouble(u'ERROR: unable to extract title')
1543                         return
1544                 video_title = mobj.group(1).decode('utf-8')
1545                 video_title = sanitize_title(video_title)
1546                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1547
1548                 video_uploader = mobj.group(2).decode('utf-8')
1549
1550                 try:
1551                         # Process video information
1552                         self._downloader.process_info({
1553                                 'id':           video_id.decode('utf-8'),
1554                                 'url':          video_url.decode('utf-8'),
1555                                 'uploader':     video_uploader,
1556                                 'upload_date':  u'NA',
1557                                 'title':        video_title,
1558                                 'stitle':       simple_title,
1559                                 'ext':          video_extension.decode('utf-8'),
1560                                 'format':       u'NA',
1561                                 'player_url':   None,
1562                         })
1563                 except UnavailableVideoError:
1564                         self._downloader.trouble(u'\nERROR: unable to download video')
1565
1566
1567 class YahooIE(InfoExtractor):
1568         """Information extractor for video.yahoo.com."""
1569
1570         # _VALID_URL matches all Yahoo! Video URLs
1571         # _VPAGE_URL matches only the extractable '/watch/' URLs
1572         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1573         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1574
1575         def __init__(self, downloader=None):
1576                 InfoExtractor.__init__(self, downloader)
1577
1578         @staticmethod
1579         def suitable(url):
1580                 return (re.match(YahooIE._VALID_URL, url) is not None)
1581
1582         def report_download_webpage(self, video_id):
1583                 """Report webpage download."""
1584                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1585
1586         def report_extraction(self, video_id):
1587                 """Report information extraction."""
1588                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1589
1590         def _real_initialize(self):
1591                 return
1592
1593         def _real_extract(self, url, new_video=True):
1594                 # Extract ID from URL
1595                 mobj = re.match(self._VALID_URL, url)
1596                 if mobj is None:
1597                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1598                         return
1599
1600                 # At this point we have a new video
1601                 self._downloader.increment_downloads()
1602                 video_id = mobj.group(2)
1603                 video_extension = 'flv'
1604
1605                 # Rewrite valid but non-extractable URLs as
1606                 # extractable English language /watch/ URLs
1607                 if re.match(self._VPAGE_URL, url) is None:
1608                         request = urllib2.Request(url)
1609                         try:
1610                                 webpage = urllib2.urlopen(request).read()
1611                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1612                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1613                                 return
1614
1615                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1616                         if mobj is None:
1617                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1618                                 return
1619                         yahoo_id = mobj.group(1)
1620
1621                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1622                         if mobj is None:
1623                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1624                                 return
1625                         yahoo_vid = mobj.group(1)
1626
1627                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1628                         return self._real_extract(url, new_video=False)
1629
1630                 # Retrieve video webpage to extract further information
1631                 request = urllib2.Request(url)
1632                 try:
1633                         self.report_download_webpage(video_id)
1634                         webpage = urllib2.urlopen(request).read()
1635                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1636                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1637                         return
1638
1639                 # Extract uploader and title from webpage
1640                 self.report_extraction(video_id)
1641                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1642                 if mobj is None:
1643                         self._downloader.trouble(u'ERROR: unable to extract video title')
1644                         return
1645                 video_title = mobj.group(1).decode('utf-8')
1646                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1647
1648                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1649                 if mobj is None:
1650                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1651                         return
1652                 video_uploader = mobj.group(1).decode('utf-8')
1653
1654                 # Extract video thumbnail
1655                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1656                 if mobj is None:
1657                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1658                         return
1659                 video_thumbnail = mobj.group(1).decode('utf-8')
1660
1661                 # Extract video description
1662                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1663                 if mobj is None:
1664                         self._downloader.trouble(u'ERROR: unable to extract video description')
1665                         return
1666                 video_description = mobj.group(1).decode('utf-8')
1667                 if not video_description: video_description = 'No description available.'
1668
1669                 # Extract video height and width
1670                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1671                 if mobj is None:
1672                         self._downloader.trouble(u'ERROR: unable to extract video height')
1673                         return
1674                 yv_video_height = mobj.group(1)
1675
1676                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1677                 if mobj is None:
1678                         self._downloader.trouble(u'ERROR: unable to extract video width')
1679                         return
1680                 yv_video_width = mobj.group(1)
1681
1682                 # Retrieve video playlist to extract media URL
1683                 # I'm not completely sure what all these options are, but we
1684                 # seem to need most of them, otherwise the server sends a 401.
1685                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1686                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1687                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1688                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1689                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1690                 try:
1691                         self.report_download_webpage(video_id)
1692                         webpage = urllib2.urlopen(request).read()
1693                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1694                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1695                         return
1696
1697                 # Extract media URL from playlist XML
1698                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1699                 if mobj is None:
1700                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1701                         return
1702                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1703                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1704
1705                 try:
1706                         # Process video information
1707                         self._downloader.process_info({
1708                                 'id':           video_id.decode('utf-8'),
1709                                 'url':          video_url,
1710                                 'uploader':     video_uploader,
1711                                 'upload_date':  u'NA',
1712                                 'title':        video_title,
1713                                 'stitle':       simple_title,
1714                                 'ext':          video_extension.decode('utf-8'),
1715                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1716                                 'description':  video_description,
1717                                 'thumbnail':    video_thumbnail,
1718                                 'description':  video_description,
1719                                 'player_url':   None,
1720                         })
1721                 except UnavailableVideoError:
1722                         self._downloader.trouble(u'\nERROR: unable to download video')
1723
1724
1725 class VimeoIE(InfoExtractor):
1726         """Information extractor for vimeo.com."""
1727
1728         # _VALID_URL matches Vimeo URLs
1729         _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
1730
1731         def __init__(self, downloader=None):
1732                 InfoExtractor.__init__(self, downloader)
1733
1734         @staticmethod
1735         def suitable(url):
1736                 return (re.match(VimeoIE._VALID_URL, url) is not None)
1737
1738         def report_download_webpage(self, video_id):
1739                 """Report webpage download."""
1740                 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
1741
1742         def report_extraction(self, video_id):
1743                 """Report information extraction."""
1744                 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
1745
1746         def _real_initialize(self):
1747                 return
1748
1749         def _real_extract(self, url, new_video=True):
1750                 # Extract ID from URL
1751                 mobj = re.match(self._VALID_URL, url)
1752                 if mobj is None:
1753                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1754                         return
1755
1756                 # At this point we have a new video
1757                 self._downloader.increment_downloads()
1758                 video_id = mobj.group(1)
1759
1760                 # Retrieve video webpage to extract further information
1761                 request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
1762                 try:
1763                         self.report_download_webpage(video_id)
1764                         webpage = urllib2.urlopen(request).read()
1765                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1766                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1767                         return
1768
1769                 # Now we begin extracting as much information as we can from what we
1770                 # retrieved. First we extract the information common to all extractors,
1771                 # and latter we extract those that are Vimeo specific.
1772                 self.report_extraction(video_id)
1773
1774                 # Extract title
1775                 mobj = re.search(r'<caption>(.*?)</caption>', webpage)
1776                 if mobj is None:
1777                         self._downloader.trouble(u'ERROR: unable to extract video title')
1778                         return
1779                 video_title = mobj.group(1).decode('utf-8')
1780                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1781
1782                 # Extract uploader
1783                 mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
1784                 if mobj is None:
1785                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1786                         return
1787                 video_uploader = mobj.group(1).decode('utf-8')
1788
1789                 # Extract video thumbnail
1790                 mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
1791                 if mobj is None:
1792                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1793                         return
1794                 video_thumbnail = mobj.group(1).decode('utf-8')
1795
1796                 # # Extract video description
1797                 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
1798                 # if mobj is None:
1799                 #       self._downloader.trouble(u'ERROR: unable to extract video description')
1800                 #       return
1801                 # video_description = mobj.group(1).decode('utf-8')
1802                 # if not video_description: video_description = 'No description available.'
1803                 video_description = 'Foo.'
1804
1805                 # Vimeo specific: extract request signature
1806                 mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
1807                 if mobj is None:
1808                         self._downloader.trouble(u'ERROR: unable to extract request signature')
1809                         return
1810                 sig = mobj.group(1).decode('utf-8')
1811
1812                 # Vimeo specific: Extract request signature expiration
1813                 mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
1814                 if mobj is None:
1815                         self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
1816                         return
1817                 sig_exp = mobj.group(1).decode('utf-8')
1818
1819                 video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
1820
1821                 try:
1822                         # Process video information
1823                         self._downloader.process_info({
1824                                 'id':           video_id.decode('utf-8'),
1825                                 'url':          video_url,
1826                                 'uploader':     video_uploader,
1827                                 'upload_date':  u'NA',
1828                                 'title':        video_title,
1829                                 'stitle':       simple_title,
1830                                 'ext':          u'mp4',
1831                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1832                                 'description':  video_description,
1833                                 'thumbnail':    video_thumbnail,
1834                                 'description':  video_description,
1835                                 'player_url':   None,
1836                         })
1837                 except UnavailableVideoError:
1838                         self._downloader.trouble(u'ERROR: unable to download video')
1839
1840
1841 class GenericIE(InfoExtractor):
1842         """Generic last-resort information extractor."""
1843
1844         def __init__(self, downloader=None):
1845                 InfoExtractor.__init__(self, downloader)
1846
1847         @staticmethod
1848         def suitable(url):
1849                 return True
1850
1851         def report_download_webpage(self, video_id):
1852                 """Report webpage download."""
1853                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1854                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1855
1856         def report_extraction(self, video_id):
1857                 """Report information extraction."""
1858                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1859
1860         def _real_initialize(self):
1861                 return
1862
1863         def _real_extract(self, url):
1864                 # At this point we have a new video
1865                 self._downloader.increment_downloads()
1866
1867                 video_id = url.split('/')[-1]
1868                 request = urllib2.Request(url)
1869                 try:
1870                         self.report_download_webpage(video_id)
1871                         webpage = urllib2.urlopen(request).read()
1872                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1873                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1874                         return
1875                 except ValueError, err:
1876                         # since this is the last-resort InfoExtractor, if
1877                         # this error is thrown, it'll be thrown here
1878                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1879                         return
1880
1881                 self.report_extraction(video_id)
1882                 # Start with something easy: JW Player in SWFObject
1883                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1884                 if mobj is None:
1885                         # Broaden the search a little bit
1886                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1887                 if mobj is None:
1888                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1889                         return
1890
1891                 # It's possible that one of the regexes
1892                 # matched, but returned an empty group:
1893                 if mobj.group(1) is None:
1894                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1895                         return
1896
1897                 video_url = urllib.unquote(mobj.group(1))
1898                 video_id  = os.path.basename(video_url)
1899
1900                 # here's a fun little line of code for you:
1901                 video_extension = os.path.splitext(video_id)[1][1:]
1902                 video_id        = os.path.splitext(video_id)[0]
1903
1904                 # it's tempting to parse this further, but you would
1905                 # have to take into account all the variations like
1906                 #   Video Title - Site Name
1907                 #   Site Name | Video Title
1908                 #   Video Title - Tagline | Site Name
1909                 # and so on and so forth; it's just not practical
1910                 mobj = re.search(r'<title>(.*)</title>', webpage)
1911                 if mobj is None:
1912                         self._downloader.trouble(u'ERROR: unable to extract title')
1913                         return
1914                 video_title = mobj.group(1).decode('utf-8')
1915                 video_title = sanitize_title(video_title)
1916                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1917
1918                 # video uploader is domain name
1919                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1920                 if mobj is None:
1921                         self._downloader.trouble(u'ERROR: unable to extract title')
1922                         return
1923                 video_uploader = mobj.group(1).decode('utf-8')
1924
1925                 try:
1926                         # Process video information
1927                         self._downloader.process_info({
1928                                 'id':           video_id.decode('utf-8'),
1929                                 'url':          video_url.decode('utf-8'),
1930                                 'uploader':     video_uploader,
1931                                 'upload_date':  u'NA',
1932                                 'title':        video_title,
1933                                 'stitle':       simple_title,
1934                                 'ext':          video_extension.decode('utf-8'),
1935                                 'format':       u'NA',
1936                                 'player_url':   None,
1937                         })
1938                 except UnavailableVideoError, err:
1939                         self._downloader.trouble(u'\nERROR: unable to download video')
1940
1941
1942 class YoutubeSearchIE(InfoExtractor):
1943         """Information Extractor for YouTube search queries."""
1944         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1945         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1946         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1947         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1948         _youtube_ie = None
1949         _max_youtube_results = 1000
1950
1951         def __init__(self, youtube_ie, downloader=None):
1952                 InfoExtractor.__init__(self, downloader)
1953                 self._youtube_ie = youtube_ie
1954
1955         @staticmethod
1956         def suitable(url):
1957                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1958
1959         def report_download_page(self, query, pagenum):
1960                 """Report attempt to download playlist page with given number."""
1961                 query = query.decode(preferredencoding())
1962                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1963
1964         def _real_initialize(self):
1965                 self._youtube_ie.initialize()
1966
1967         def _real_extract(self, query):
1968                 mobj = re.match(self._VALID_QUERY, query)
1969                 if mobj is None:
1970                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1971                         return
1972
1973                 prefix, query = query.split(':')
1974                 prefix = prefix[8:]
1975                 query  = query.encode('utf-8')
1976                 if prefix == '':
1977                         self._download_n_results(query, 1)
1978                         return
1979                 elif prefix == 'all':
1980                         self._download_n_results(query, self._max_youtube_results)
1981                         return
1982                 else:
1983                         try:
1984                                 n = long(prefix)
1985                                 if n <= 0:
1986                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1987                                         return
1988                                 elif n > self._max_youtube_results:
1989                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1990                                         n = self._max_youtube_results
1991                                 self._download_n_results(query, n)
1992                                 return
1993                         except ValueError: # parsing prefix as integer fails
1994                                 self._download_n_results(query, 1)
1995                                 return
1996
1997         def _download_n_results(self, query, n):
1998                 """Downloads a specified number of results for a query"""
1999
2000                 video_ids = []
2001                 already_seen = set()
2002                 pagenum = 1
2003
2004                 while True:
2005                         self.report_download_page(query, pagenum)
2006                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2007                         request = urllib2.Request(result_url)
2008                         try:
2009                                 page = urllib2.urlopen(request).read()
2010                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2011                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2012                                 return
2013
2014                         # Extract video identifiers
2015                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2016                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2017                                 if video_id not in already_seen:
2018                                         video_ids.append(video_id)
2019                                         already_seen.add(video_id)
2020                                         if len(video_ids) == n:
2021                                                 # Specified n videos reached
2022                                                 for id in video_ids:
2023                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2024                                                 return
2025
2026                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2027                                 for id in video_ids:
2028                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2029                                 return
2030
2031                         pagenum = pagenum + 1
2032
2033 class GoogleSearchIE(InfoExtractor):
2034         """Information Extractor for Google Video search queries."""
2035         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
2036         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2037         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2038         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2039         _google_ie = None
2040         _max_google_results = 1000
2041
2042         def __init__(self, google_ie, downloader=None):
2043                 InfoExtractor.__init__(self, downloader)
2044                 self._google_ie = google_ie
2045
2046         @staticmethod
2047         def suitable(url):
2048                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
2049
2050         def report_download_page(self, query, pagenum):
2051                 """Report attempt to download playlist page with given number."""
2052                 query = query.decode(preferredencoding())
2053                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
2054
2055         def _real_initialize(self):
2056                 self._google_ie.initialize()
2057
2058         def _real_extract(self, query):
2059                 mobj = re.match(self._VALID_QUERY, query)
2060                 if mobj is None:
2061                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2062                         return
2063
2064                 prefix, query = query.split(':')
2065                 prefix = prefix[8:]
2066                 query  = query.encode('utf-8')
2067                 if prefix == '':
2068                         self._download_n_results(query, 1)
2069                         return
2070                 elif prefix == 'all':
2071                         self._download_n_results(query, self._max_google_results)
2072                         return
2073                 else:
2074                         try:
2075                                 n = long(prefix)
2076                                 if n <= 0:
2077                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2078                                         return
2079                                 elif n > self._max_google_results:
2080                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
2081                                         n = self._max_google_results
2082                                 self._download_n_results(query, n)
2083                                 return
2084                         except ValueError: # parsing prefix as integer fails
2085                                 self._download_n_results(query, 1)
2086                                 return
2087
2088         def _download_n_results(self, query, n):
2089                 """Downloads a specified number of results for a query"""
2090
2091                 video_ids = []
2092                 already_seen = set()
2093                 pagenum = 1
2094
2095                 while True:
2096                         self.report_download_page(query, pagenum)
2097                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2098                         request = urllib2.Request(result_url)
2099                         try:
2100                                 page = urllib2.urlopen(request).read()
2101                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2102                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2103                                 return
2104
2105                         # Extract video identifiers
2106                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2107                                 video_id = mobj.group(1)
2108                                 if video_id not in already_seen:
2109                                         video_ids.append(video_id)
2110                                         already_seen.add(video_id)
2111                                         if len(video_ids) == n:
2112                                                 # Specified n videos reached
2113                                                 for id in video_ids:
2114                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2115                                                 return
2116
2117                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2118                                 for id in video_ids:
2119                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2120                                 return
2121
2122                         pagenum = pagenum + 1
2123
2124 class YahooSearchIE(InfoExtractor):
2125         """Information Extractor for Yahoo! Video search queries."""
2126         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
2127         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2128         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2129         _MORE_PAGES_INDICATOR = r'\s*Next'
2130         _yahoo_ie = None
2131         _max_yahoo_results = 1000
2132
2133         def __init__(self, yahoo_ie, downloader=None):
2134                 InfoExtractor.__init__(self, downloader)
2135                 self._yahoo_ie = yahoo_ie
2136
2137         @staticmethod
2138         def suitable(url):
2139                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
2140
2141         def report_download_page(self, query, pagenum):
2142                 """Report attempt to download playlist page with given number."""
2143                 query = query.decode(preferredencoding())
2144                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2145
2146         def _real_initialize(self):
2147                 self._yahoo_ie.initialize()
2148
2149         def _real_extract(self, query):
2150                 mobj = re.match(self._VALID_QUERY, query)
2151                 if mobj is None:
2152                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2153                         return
2154
2155                 prefix, query = query.split(':')
2156                 prefix = prefix[8:]
2157                 query  = query.encode('utf-8')
2158                 if prefix == '':
2159                         self._download_n_results(query, 1)
2160                         return
2161                 elif prefix == 'all':
2162                         self._download_n_results(query, self._max_yahoo_results)
2163                         return
2164                 else:
2165                         try:
2166                                 n = long(prefix)
2167                                 if n <= 0:
2168                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2169                                         return
2170                                 elif n > self._max_yahoo_results:
2171                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
2172                                         n = self._max_yahoo_results
2173                                 self._download_n_results(query, n)
2174                                 return
2175                         except ValueError: # parsing prefix as integer fails
2176                                 self._download_n_results(query, 1)
2177                                 return
2178
2179         def _download_n_results(self, query, n):
2180                 """Downloads a specified number of results for a query"""
2181
2182                 video_ids = []
2183                 already_seen = set()
2184                 pagenum = 1
2185
2186                 while True:
2187                         self.report_download_page(query, pagenum)
2188                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2189                         request = urllib2.Request(result_url)
2190                         try:
2191                                 page = urllib2.urlopen(request).read()
2192                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2193                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2194                                 return
2195
2196                         # Extract video identifiers
2197                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2198                                 video_id = mobj.group(1)
2199                                 if video_id not in already_seen:
2200                                         video_ids.append(video_id)
2201                                         already_seen.add(video_id)
2202                                         if len(video_ids) == n:
2203                                                 # Specified n videos reached
2204                                                 for id in video_ids:
2205                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2206                                                 return
2207
2208                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2209                                 for id in video_ids:
2210                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2211                                 return
2212
2213                         pagenum = pagenum + 1
2214
2215 class YoutubePlaylistIE(InfoExtractor):
2216         """Information Extractor for YouTube playlists."""
2217
2218         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2219         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2220         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2221         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2222         _youtube_ie = None
2223
2224         def __init__(self, youtube_ie, downloader=None):
2225                 InfoExtractor.__init__(self, downloader)
2226                 self._youtube_ie = youtube_ie
2227
2228         @staticmethod
2229         def suitable(url):
2230                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2231
2232         def report_download_page(self, playlist_id, pagenum):
2233                 """Report attempt to download playlist page with given number."""
2234                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2235
2236         def _real_initialize(self):
2237                 self._youtube_ie.initialize()
2238
2239         def _real_extract(self, url):
2240                 # Extract playlist id
2241                 mobj = re.match(self._VALID_URL, url)
2242                 if mobj is None:
2243                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2244                         return
2245
2246                 # Single video case
2247                 if mobj.group(3) is not None:
2248                         self._youtube_ie.extract(mobj.group(3))
2249                         return
2250
2251                 # Download playlist pages
2252                 # prefix is 'p' as default for playlists but there are other types that need extra care
2253                 playlist_prefix = mobj.group(1)
2254                 if playlist_prefix == 'a':
2255                         playlist_access = 'artist'
2256                 else:
2257                         playlist_prefix = 'p'
2258                         playlist_access = 'view_play_list'
2259                 playlist_id = mobj.group(2)
2260                 video_ids = []
2261                 pagenum = 1
2262
2263                 while True:
2264                         self.report_download_page(playlist_id, pagenum)
2265                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
2266                         try:
2267                                 page = urllib2.urlopen(request).read()
2268                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2269                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2270                                 return
2271
2272                         # Extract video identifiers
2273                         ids_in_page = []
2274                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2275                                 if mobj.group(1) not in ids_in_page:
2276                                         ids_in_page.append(mobj.group(1))
2277                         video_ids.extend(ids_in_page)
2278
2279                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2280                                 break
2281                         pagenum = pagenum + 1
2282
2283                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2284                 playlistend = self._downloader.params.get('playlistend', -1)
2285                 video_ids = video_ids[playliststart:playlistend]
2286
2287                 for id in video_ids:
2288                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2289                 return
2290
2291 class YoutubeUserIE(InfoExtractor):
2292         """Information Extractor for YouTube users."""
2293
2294         _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2295         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2296         _GDATA_PAGE_SIZE = 50
2297         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2298         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2299         _youtube_ie = None
2300
2301         def __init__(self, youtube_ie, downloader=None):
2302                 InfoExtractor.__init__(self, downloader)
2303                 self._youtube_ie = youtube_ie
2304
2305         @staticmethod
2306         def suitable(url):
2307                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2308
2309         def report_download_page(self, username, start_index):
2310                 """Report attempt to download user page."""
2311                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2312                                            (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2313
2314         def _real_initialize(self):
2315                 self._youtube_ie.initialize()
2316
2317         def _real_extract(self, url):
2318                 # Extract username
2319                 mobj = re.match(self._VALID_URL, url)
2320                 if mobj is None:
2321                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2322                         return
2323
2324                 username = mobj.group(1)
2325
2326                 # Download video ids using YouTube Data API. Result size per
2327                 # query is limited (currently to 50 videos) so we need to query
2328                 # page by page until there are no video ids - it means we got
2329                 # all of them.
2330
2331                 video_ids = []
2332                 pagenum = 0
2333
2334                 while True:
2335                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2336                         self.report_download_page(username, start_index)
2337
2338                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2339
2340                         try:
2341                                 page = urllib2.urlopen(request).read()
2342                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2343                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2344                                 return
2345
2346                         # Extract video identifiers
2347                         ids_in_page = []
2348
2349                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2350                                 if mobj.group(1) not in ids_in_page:
2351                                         ids_in_page.append(mobj.group(1))
2352
2353                         video_ids.extend(ids_in_page)
2354
2355                         # A little optimization - if current page is not
2356                         # "full", ie. does not contain PAGE_SIZE video ids then
2357                         # we can assume that this page is the last one - there
2358                         # are no more ids on further pages - no need to query
2359                         # again.
2360
2361                         if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2362                                 break
2363
2364                         pagenum += 1
2365
2366                 all_ids_count = len(video_ids)
2367                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2368                 playlistend = self._downloader.params.get('playlistend', -1)
2369
2370                 if playlistend == -1:
2371                         video_ids = video_ids[playliststart:]
2372                 else:
2373                         video_ids = video_ids[playliststart:playlistend]
2374                         
2375                 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2376                                            (username, all_ids_count, len(video_ids)))
2377
2378                 for video_id in video_ids:
2379                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2380
2381
2382 class DepositFilesIE(InfoExtractor):
2383         """Information extractor for depositfiles.com"""
2384
2385         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2386
2387         def __init__(self, downloader=None):
2388                 InfoExtractor.__init__(self, downloader)
2389
2390         @staticmethod
2391         def suitable(url):
2392                 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2393
2394         def report_download_webpage(self, file_id):
2395                 """Report webpage download."""
2396                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2397
2398         def report_extraction(self, file_id):
2399                 """Report information extraction."""
2400                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2401
2402         def _real_initialize(self):
2403                 return
2404
2405         def _real_extract(self, url):
2406                 # At this point we have a new file
2407                 self._downloader.increment_downloads()
2408
2409                 file_id = url.split('/')[-1]
2410                 # Rebuild url in english locale
2411                 url = 'http://depositfiles.com/en/files/' + file_id
2412
2413                 # Retrieve file webpage with 'Free download' button pressed
2414                 free_download_indication = { 'gateway_result' : '1' }
2415                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2416                 try:
2417                         self.report_download_webpage(file_id)
2418                         webpage = urllib2.urlopen(request).read()
2419                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2420                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2421                         return
2422
2423                 # Search for the real file URL
2424                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2425                 if (mobj is None) or (mobj.group(1) is None):
2426                         # Try to figure out reason of the error.
2427                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2428                         if (mobj is not None) and (mobj.group(1) is not None):
2429                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2430                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2431                         else:
2432                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2433                         return
2434
2435                 file_url = mobj.group(1)
2436                 file_extension = os.path.splitext(file_url)[1][1:]
2437
2438                 # Search for file title
2439                 mobj = re.search(r'<b title="(.*?)">', webpage)
2440                 if mobj is None:
2441                         self._downloader.trouble(u'ERROR: unable to extract title')
2442                         return
2443                 file_title = mobj.group(1).decode('utf-8')
2444
2445                 try:
2446                         # Process file information
2447                         self._downloader.process_info({
2448                                 'id':           file_id.decode('utf-8'),
2449                                 'url':          file_url.decode('utf-8'),
2450                                 'uploader':     u'NA',
2451                                 'upload_date':  u'NA',
2452                                 'title':        file_title,
2453                                 'stitle':       file_title,
2454                                 'ext':          file_extension.decode('utf-8'),
2455                                 'format':       u'NA',
2456                                 'player_url':   None,
2457                         })
2458                 except UnavailableVideoError, err:
2459                         self._downloader.trouble(u'ERROR: unable to download file')
2460
2461 class FacebookIE(InfoExtractor):
2462         """Information Extractor for Facebook"""
2463
2464         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2465         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2466         _NETRC_MACHINE = 'facebook'
2467         _available_formats = ['highqual', 'lowqual']
2468         _video_extensions = {
2469                 'highqual': 'mp4',
2470                 'lowqual': 'mp4',
2471         }
2472
2473         def __init__(self, downloader=None):
2474                 InfoExtractor.__init__(self, downloader)
2475
2476         @staticmethod
2477         def suitable(url):
2478                 return (re.match(FacebookIE._VALID_URL, url) is not None)
2479
2480         def _reporter(self, message):
2481                 """Add header and report message."""
2482                 self._downloader.to_screen(u'[facebook] %s' % message)
2483
2484         def report_login(self):
2485                 """Report attempt to log in."""
2486                 self._reporter(u'Logging in')
2487
2488         def report_video_webpage_download(self, video_id):
2489                 """Report attempt to download video webpage."""
2490                 self._reporter(u'%s: Downloading video webpage' % video_id)
2491
2492         def report_information_extraction(self, video_id):
2493                 """Report attempt to extract video information."""
2494                 self._reporter(u'%s: Extracting video information' % video_id)
2495
2496         def _parse_page(self, video_webpage):
2497                 """Extract video information from page"""
2498                 # General data
2499                 data = {'title': r'class="video_title datawrap">(.*?)</',
2500                         'description': r'<div class="datawrap">(.*?)</div>',
2501                         'owner': r'\("video_owner_name", "(.*?)"\)',
2502                         'upload_date': r'data-date="(.*?)"',
2503                         'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2504                         }
2505                 video_info = {}
2506                 for piece in data.keys():
2507                         mobj = re.search(data[piece], video_webpage)
2508                         if mobj is not None:
2509                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2510
2511                 # Video urls
2512                 video_urls = {}
2513                 for fmt in self._available_formats:
2514                         mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2515                         if mobj is not None:
2516                                 # URL is in a Javascript segment inside an escaped Unicode format within
2517                                 # the generally utf-8 page
2518                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2519                 video_info['video_urls'] = video_urls
2520
2521                 return video_info
2522
2523         def _real_initialize(self):
2524                 if self._downloader is None:
2525                         return
2526
2527                 useremail = None
2528                 password = None
2529                 downloader_params = self._downloader.params
2530
2531                 # Attempt to use provided username and password or .netrc data
2532                 if downloader_params.get('username', None) is not None:
2533                         useremail = downloader_params['username']
2534                         password = downloader_params['password']
2535                 elif downloader_params.get('usenetrc', False):
2536                         try:
2537                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2538                                 if info is not None:
2539                                         useremail = info[0]
2540                                         password = info[2]
2541                                 else:
2542                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2543                         except (IOError, netrc.NetrcParseError), err:
2544                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2545                                 return
2546
2547                 if useremail is None:
2548                         return
2549
2550                 # Log in
2551                 login_form = {
2552                         'email': useremail,
2553                         'pass': password,
2554                         'login': 'Log+In'
2555                         }
2556                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2557                 try:
2558                         self.report_login()
2559                         login_results = urllib2.urlopen(request).read()
2560                         if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2561                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2562                                 return
2563                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2564                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2565                         return
2566
2567         def _real_extract(self, url):
2568                 mobj = re.match(self._VALID_URL, url)
2569                 if mobj is None:
2570                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2571                         return
2572                 video_id = mobj.group('ID')
2573
2574                 # Get video webpage
2575                 self.report_video_webpage_download(video_id)
2576                 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2577                 try:
2578                         page = urllib2.urlopen(request)
2579                         video_webpage = page.read()
2580                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2581                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2582                         return
2583
2584                 # Start extracting information
2585                 self.report_information_extraction(video_id)
2586
2587                 # Extract information
2588                 video_info = self._parse_page(video_webpage)
2589
2590                 # uploader
2591                 if 'owner' not in video_info:
2592                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2593                         return
2594                 video_uploader = video_info['owner']
2595
2596                 # title
2597                 if 'title' not in video_info:
2598                         self._downloader.trouble(u'ERROR: unable to extract video title')
2599                         return
2600                 video_title = video_info['title']
2601                 video_title = video_title.decode('utf-8')
2602                 video_title = sanitize_title(video_title)
2603
2604                 # simplified title
2605                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2606                 simple_title = simple_title.strip(ur'_')
2607
2608                 # thumbnail image
2609                 if 'thumbnail' not in video_info:
2610                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2611                         video_thumbnail = ''
2612                 else:
2613                         video_thumbnail = video_info['thumbnail']
2614
2615                 # upload date
2616                 upload_date = u'NA'
2617                 if 'upload_date' in video_info:
2618                         upload_time = video_info['upload_date']
2619                         timetuple = email.utils.parsedate_tz(upload_time)
2620                         if timetuple is not None:
2621                                 try:
2622                                         upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2623                                 except:
2624                                         pass
2625
2626                 # description
2627                 video_description = 'No description available.'
2628                 if (self._downloader.params.get('forcedescription', False) and
2629                     'description' in video_info):
2630                         video_description = video_info['description']
2631
2632                 url_map = video_info['video_urls']
2633                 if len(url_map.keys()) > 0:
2634                         # Decide which formats to download
2635                         req_format = self._downloader.params.get('format', None)
2636                         format_limit = self._downloader.params.get('format_limit', None)
2637
2638                         if format_limit is not None and format_limit in self._available_formats:
2639                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
2640                         else:
2641                                 format_list = self._available_formats
2642                         existing_formats = [x for x in format_list if x in url_map]
2643                         if len(existing_formats) == 0:
2644                                 self._downloader.trouble(u'ERROR: no known formats available for video')
2645                                 return
2646                         if req_format is None:
2647                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2648                         elif req_format == '-1':
2649                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2650                         else:
2651                                 # Specific format
2652                                 if req_format not in url_map:
2653                                         self._downloader.trouble(u'ERROR: requested format not available')
2654                                         return
2655                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
2656
2657                 for format_param, video_real_url in video_url_list:
2658
2659                         # At this point we have a new video
2660                         self._downloader.increment_downloads()
2661
2662                         # Extension
2663                         video_extension = self._video_extensions.get(format_param, 'mp4')
2664
2665                         # Find the video URL in fmt_url_map or conn paramters
2666                         try:
2667                                 # Process video information
2668                                 self._downloader.process_info({
2669                                         'id':           video_id.decode('utf-8'),
2670                                         'url':          video_real_url.decode('utf-8'),
2671                                         'uploader':     video_uploader.decode('utf-8'),
2672                                         'upload_date':  upload_date,
2673                                         'title':        video_title,
2674                                         'stitle':       simple_title,
2675                                         'ext':          video_extension.decode('utf-8'),
2676                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
2677                                         'thumbnail':    video_thumbnail.decode('utf-8'),
2678                                         'description':  video_description.decode('utf-8'),
2679                                         'player_url':   None,
2680                                 })
2681                         except UnavailableVideoError, err:
2682                                 self._downloader.trouble(u'\nERROR: unable to download video')
2683
2684 class PostProcessor(object):
2685         """Post Processor class.
2686
2687         PostProcessor objects can be added to downloaders with their
2688         add_post_processor() method. When the downloader has finished a
2689         successful download, it will take its internal chain of PostProcessors
2690         and start calling the run() method on each one of them, first with
2691         an initial argument and then with the returned value of the previous
2692         PostProcessor.
2693
2694         The chain will be stopped if one of them ever returns None or the end
2695         of the chain is reached.
2696
2697         PostProcessor objects follow a "mutual registration" process similar
2698         to InfoExtractor objects.
2699         """
2700
2701         _downloader = None
2702
2703         def __init__(self, downloader=None):
2704                 self._downloader = downloader
2705
2706         def set_downloader(self, downloader):
2707                 """Sets the downloader for this PP."""
2708                 self._downloader = downloader
2709
2710         def run(self, information):
2711                 """Run the PostProcessor.
2712
2713                 The "information" argument is a dictionary like the ones
2714                 composed by InfoExtractors. The only difference is that this
2715                 one has an extra field called "filepath" that points to the
2716                 downloaded file.
2717
2718                 When this method returns None, the postprocessing chain is
2719                 stopped. However, this method may return an information
2720                 dictionary that will be passed to the next postprocessing
2721                 object in the chain. It can be the one it received after
2722                 changing some fields.
2723
2724                 In addition, this method may raise a PostProcessingError
2725                 exception that will be taken into account by the downloader
2726                 it was called from.
2727                 """
2728                 return information # by default, do nothing
2729
2730 class FFmpegExtractAudioPP(PostProcessor):
2731
2732         def __init__(self, downloader=None, preferredcodec=None):
2733                 PostProcessor.__init__(self, downloader)
2734                 if preferredcodec is None:
2735                         preferredcodec = 'best'
2736                 self._preferredcodec = preferredcodec
2737
2738         @staticmethod
2739         def get_audio_codec(path):
2740                 try:
2741                         cmd = ['ffprobe', '-show_streams', '--', path]
2742                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
2743                         output = handle.communicate()[0]
2744                         if handle.wait() != 0:
2745                                 return None
2746                 except (IOError, OSError):
2747                         return None
2748                 audio_codec = None
2749                 for line in output.split('\n'):
2750                         if line.startswith('codec_name='):
2751                                 audio_codec = line.split('=')[1].strip()
2752                         elif line.strip() == 'codec_type=audio' and audio_codec is not None:
2753                                 return audio_codec
2754                 return None
2755
2756         @staticmethod
2757         def run_ffmpeg(path, out_path, codec, more_opts):
2758                 try:
2759                         cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
2760                         ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
2761                         return (ret == 0)
2762                 except (IOError, OSError):
2763                         return False
2764
2765         def run(self, information):
2766                 path = information['filepath']
2767
2768                 filecodec = self.get_audio_codec(path)
2769                 if filecodec is None:
2770                         self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
2771                         return None
2772
2773                 more_opts = []
2774                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
2775                         if filecodec == 'aac' or filecodec == 'mp3':
2776                                 # Lossless if possible
2777                                 acodec = 'copy'
2778                                 extension = filecodec
2779                                 if filecodec == 'aac':
2780                                         more_opts = ['-f', 'adts']
2781                         else:
2782                                 # MP3 otherwise.
2783                                 acodec = 'libmp3lame'
2784                                 extension = 'mp3'
2785                                 more_opts = ['-ab', '128k']
2786                 else:
2787                         # We convert the audio (lossy)
2788                         acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
2789                         extension = self._preferredcodec
2790                         more_opts = ['-ab', '128k']
2791                         if self._preferredcodec == 'aac':
2792                                 more_opts += ['-f', 'adts']
2793
2794                 (prefix, ext) = os.path.splitext(path)
2795                 new_path = prefix + '.' + extension
2796                 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
2797                 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
2798
2799                 if not status:
2800                         self._downloader.to_stderr(u'WARNING: error running ffmpeg')
2801                         return None
2802
2803                 try:
2804                         os.remove(path)
2805                 except (IOError, OSError):
2806                         self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
2807                         return None
2808
2809                 information['filepath'] = new_path
2810                 return information
2811
2812 ### MAIN PROGRAM ###
2813 if __name__ == '__main__':
2814         try:
2815                 # Modules needed only when running the main program
2816                 import getpass
2817                 import optparse
2818
2819                 # Function to update the program file with the latest version from the repository.
2820                 def update_self(downloader, filename):
2821                         # Note: downloader only used for options
2822                         if not os.access(filename, os.W_OK):
2823                                 sys.exit('ERROR: no write permissions on %s' % filename)
2824
2825                         downloader.to_screen('Updating to latest stable version...')
2826                         try:
2827                                 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2828                                 latest_version = urllib.urlopen(latest_url).read().strip()
2829                                 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2830                                 newcontent = urllib.urlopen(prog_url).read()
2831                         except (IOError, OSError), err:
2832                                 sys.exit('ERROR: unable to download latest version')
2833                         try:
2834                                 stream = open(filename, 'w')
2835                                 stream.write(newcontent)
2836                                 stream.close()
2837                         except (IOError, OSError), err:
2838                                 sys.exit('ERROR: unable to overwrite current version')
2839                         downloader.to_screen('Updated to version %s' % latest_version)
2840
2841                 # Parse command line
2842                 parser = optparse.OptionParser(
2843                         usage='Usage: %prog [options] url...',
2844                         version='2011.08.04',
2845                         conflict_handler='resolve',
2846                 )
2847
2848                 parser.add_option('-h', '--help',
2849                                 action='help', help='print this help text and exit')
2850                 parser.add_option('-v', '--version',
2851                                 action='version', help='print program version and exit')
2852                 parser.add_option('-U', '--update',
2853                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2854                 parser.add_option('-i', '--ignore-errors',
2855                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2856                 parser.add_option('-r', '--rate-limit',
2857                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2858                 parser.add_option('-R', '--retries',
2859                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2860                 parser.add_option('--playlist-start',
2861                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2862                 parser.add_option('--playlist-end',
2863                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2864                 parser.add_option('--dump-user-agent',
2865                                 action='store_true', dest='dump_user_agent',
2866                                 help='display the current browser identification', default=False)
2867
2868                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2869                 authentication.add_option('-u', '--username',
2870                                 dest='username', metavar='USERNAME', help='account username')
2871                 authentication.add_option('-p', '--password',
2872                                 dest='password', metavar='PASSWORD', help='account password')
2873                 authentication.add_option('-n', '--netrc',
2874                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2875                 parser.add_option_group(authentication)
2876
2877                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2878                 video_format.add_option('-f', '--format',
2879                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2880                 video_format.add_option('--all-formats',
2881                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2882                 video_format.add_option('--max-quality',
2883                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2884                 parser.add_option_group(video_format)
2885
2886                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2887                 verbosity.add_option('-q', '--quiet',
2888                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2889                 verbosity.add_option('-s', '--simulate',
2890                                 action='store_true', dest='simulate', help='do not download video', default=False)
2891                 verbosity.add_option('-g', '--get-url',
2892                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2893                 verbosity.add_option('-e', '--get-title',
2894                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2895                 verbosity.add_option('--get-thumbnail',
2896                                 action='store_true', dest='getthumbnail',
2897                                 help='simulate, quiet but print thumbnail URL', default=False)
2898                 verbosity.add_option('--get-description',
2899                                 action='store_true', dest='getdescription',
2900                                 help='simulate, quiet but print video description', default=False)
2901                 verbosity.add_option('--get-filename',
2902                                 action='store_true', dest='getfilename',
2903                                 help='simulate, quiet but print output filename', default=False)
2904                 verbosity.add_option('--no-progress',
2905                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2906                 verbosity.add_option('--console-title',
2907                                 action='store_true', dest='consoletitle',
2908                                 help='display progress in console titlebar', default=False)
2909                 parser.add_option_group(verbosity)
2910
2911                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2912                 filesystem.add_option('-t', '--title',
2913                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2914                 filesystem.add_option('-l', '--literal',
2915                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2916                 filesystem.add_option('-A', '--auto-number',
2917                                 action='store_true', dest='autonumber',
2918                                 help='number downloaded files starting from 00000', default=False)
2919                 filesystem.add_option('-o', '--output',
2920                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2921                 filesystem.add_option('-a', '--batch-file',
2922                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2923                 filesystem.add_option('-w', '--no-overwrites',
2924                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2925                 filesystem.add_option('-c', '--continue',
2926                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2927                 filesystem.add_option('--cookies',
2928                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2929                 filesystem.add_option('--no-part',
2930                                 action='store_true', dest='nopart', help='do not use .part files', default=False)
2931                 filesystem.add_option('--no-mtime',
2932                                 action='store_false', dest='updatetime',
2933                                 help='do not use the Last-modified header to set the file modification time', default=True)
2934                 parser.add_option_group(filesystem)
2935
2936                 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
2937                 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
2938                                 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
2939                 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
2940                                 help='"best", "aac" or "mp3"; best by default')
2941                 parser.add_option_group(postproc)
2942
2943                 (opts, args) = parser.parse_args()
2944
2945                 # Open appropriate CookieJar
2946                 if opts.cookiefile is None:
2947                         jar = cookielib.CookieJar()
2948                 else:
2949                         try:
2950                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2951                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2952                                         jar.load()
2953                         except (IOError, OSError), err:
2954                                 sys.exit(u'ERROR: unable to open cookie file')
2955
2956                 # Dump user agent
2957                 if opts.dump_user_agent:
2958                         print std_headers['User-Agent']
2959                         sys.exit(0)
2960
2961                 # General configuration
2962                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2963                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
2964                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2965
2966                 # Batch file verification
2967                 batchurls = []
2968                 if opts.batchfile is not None:
2969                         try:
2970                                 if opts.batchfile == '-':
2971                                         batchfd = sys.stdin
2972                                 else:
2973                                         batchfd = open(opts.batchfile, 'r')
2974                                 batchurls = batchfd.readlines()
2975                                 batchurls = [x.strip() for x in batchurls]
2976                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2977                         except IOError:
2978                                 sys.exit(u'ERROR: batch file could not be read')
2979                 all_urls = batchurls + args
2980
2981                 # Conflicting, missing and erroneous options
2982                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2983                         parser.error(u'using .netrc conflicts with giving username/password')
2984                 if opts.password is not None and opts.username is None:
2985                         parser.error(u'account username missing')
2986                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2987                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2988                 if opts.usetitle and opts.useliteral:
2989                         parser.error(u'using title conflicts with using literal title')
2990                 if opts.username is not None and opts.password is None:
2991                         opts.password = getpass.getpass(u'Type account password and press return:')
2992                 if opts.ratelimit is not None:
2993                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2994                         if numeric_limit is None:
2995                                 parser.error(u'invalid rate limit specified')
2996                         opts.ratelimit = numeric_limit
2997                 if opts.retries is not None:
2998                         try:
2999                                 opts.retries = long(opts.retries)
3000                         except (TypeError, ValueError), err:
3001                                 parser.error(u'invalid retry count specified')
3002                 try:
3003                         opts.playliststart = long(opts.playliststart)
3004                         if opts.playliststart <= 0:
3005                                 raise ValueError
3006                 except (TypeError, ValueError), err:
3007                         parser.error(u'invalid playlist start number specified')
3008                 try:
3009                         opts.playlistend = long(opts.playlistend)
3010                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
3011                                 raise ValueError
3012                 except (TypeError, ValueError), err:
3013                         parser.error(u'invalid playlist end number specified')
3014                 if opts.extractaudio:
3015                         if opts.audioformat not in ['best', 'aac', 'mp3']:
3016                                 parser.error(u'invalid audio format specified')
3017
3018                 # Information extractors
3019                 vimeo_ie = VimeoIE()
3020                 youtube_ie = YoutubeIE()
3021                 metacafe_ie = MetacafeIE(youtube_ie)
3022                 dailymotion_ie = DailymotionIE()
3023                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
3024                 youtube_user_ie = YoutubeUserIE(youtube_ie)
3025                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
3026                 google_ie = GoogleIE()
3027                 google_search_ie = GoogleSearchIE(google_ie)
3028                 photobucket_ie = PhotobucketIE()
3029                 yahoo_ie = YahooIE()
3030                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
3031                 deposit_files_ie = DepositFilesIE()
3032                 facebook_ie = FacebookIE()
3033                 generic_ie = GenericIE()
3034
3035                 # File downloader
3036                 fd = FileDownloader({
3037                         'usenetrc': opts.usenetrc,
3038                         'username': opts.username,
3039                         'password': opts.password,
3040                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3041                         'forceurl': opts.geturl,
3042                         'forcetitle': opts.gettitle,
3043                         'forcethumbnail': opts.getthumbnail,
3044                         'forcedescription': opts.getdescription,
3045                         'forcefilename': opts.getfilename,
3046                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3047                         'format': opts.format,
3048                         'format_limit': opts.format_limit,
3049                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
3050                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
3051                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
3052                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
3053                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
3054                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
3055                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
3056                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
3057                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
3058                                 or u'%(id)s.%(ext)s'),
3059                         'ignoreerrors': opts.ignoreerrors,
3060                         'ratelimit': opts.ratelimit,
3061                         'nooverwrites': opts.nooverwrites,
3062                         'retries': opts.retries,
3063                         'continuedl': opts.continue_dl,
3064                         'noprogress': opts.noprogress,
3065                         'playliststart': opts.playliststart,
3066                         'playlistend': opts.playlistend,
3067                         'logtostderr': opts.outtmpl == '-',
3068                         'consoletitle': opts.consoletitle,
3069                         'nopart': opts.nopart,
3070                         'updatetime': opts.updatetime,
3071                         })
3072                 fd.add_info_extractor(vimeo_ie)
3073                 fd.add_info_extractor(youtube_search_ie)
3074                 fd.add_info_extractor(youtube_pl_ie)
3075                 fd.add_info_extractor(youtube_user_ie)
3076                 fd.add_info_extractor(metacafe_ie)
3077                 fd.add_info_extractor(dailymotion_ie)
3078                 fd.add_info_extractor(youtube_ie)
3079                 fd.add_info_extractor(google_ie)
3080                 fd.add_info_extractor(google_search_ie)
3081                 fd.add_info_extractor(photobucket_ie)
3082                 fd.add_info_extractor(yahoo_ie)
3083                 fd.add_info_extractor(yahoo_search_ie)
3084                 fd.add_info_extractor(deposit_files_ie)
3085                 fd.add_info_extractor(facebook_ie)
3086
3087                 # This must come last since it's the
3088                 # fallback if none of the others work
3089                 fd.add_info_extractor(generic_ie)
3090
3091                 # PostProcessors
3092                 if opts.extractaudio:
3093                         fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
3094
3095                 # Update version
3096                 if opts.update_self:
3097                         update_self(fd, sys.argv[0])
3098
3099                 # Maybe do nothing
3100                 if len(all_urls) < 1:
3101                         if not opts.update_self:
3102                                 parser.error(u'you must provide at least one URL')
3103                         else:
3104                                 sys.exit()
3105                 retcode = fd.download(all_urls)
3106
3107                 # Dump cookie jar if requested
3108                 if opts.cookiefile is not None:
3109                         try:
3110                                 jar.save()
3111                         except (IOError, OSError), err:
3112                                 sys.exit(u'ERROR: unable to save cookie jar')
3113
3114                 sys.exit(retcode)
3115
3116         except DownloadError:
3117                 sys.exit(1)
3118         except SameFileError:
3119                 sys.exit(u'ERROR: fixed output name but more than one file to download')
3120         except KeyboardInterrupt:
3121                 sys.exit(u'\nERROR: Interrupted by user')