Move imports to top (Closes #283)
[youtube-dl.git] / youtube_dl / __init__.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 __authors__  = (
5         'Ricardo Garcia Gonzalez',
6         'Danny Colligan',
7         'Benjamin Johnson',
8         'Vasyl\' Vavrychuk',
9         'Witold Baryluk',
10         'Paweł Paprota',
11         'Gergely Imreh',
12         'Rogério Brito',
13         'Philipp Hagemeister',
14         'Sören Schulze',
15         'Kevin Ngo',
16         'Ori Avtalion',
17         'shizeeg',
18         )
19
20 __license__ = 'Public Domain'
21 __version__ = '2012.01.08b'
22
23 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
24
25
26 import cookielib
27 import datetime
28 import getpass
29 import gzip
30 import htmlentitydefs
31 import HTMLParser
32 import httplib
33 import locale
34 import math
35 import netrc
36 import optparse
37 import os
38 import os.path
39 import re
40 import shlex
41 import socket
42 import string
43 import subprocess
44 import sys
45 import time
46 import urllib
47 import urllib2
48 import warnings
49 import zlib
50
51 if os.name == 'nt':
52         import ctypes
53
54 try:
55         import email.utils
56 except ImportError: # Python 2.4
57         import email.Utils
58 try:
59         import cStringIO as StringIO
60 except ImportError:
61         import StringIO
62
63 # parse_qs was moved from the cgi module to the urlparse module recently.
64 try:
65         from urlparse import parse_qs
66 except ImportError:
67         from cgi import parse_qs
68
69 try:
70         import lxml.etree
71 except ImportError:
72         pass # Handled below
73
74 try:
75         import xml.etree.ElementTree
76 except ImportError: # Python<2.5: Not officially supported, but let it slip
77         warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
78
79 std_headers = {
80         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
81         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
82         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
83         'Accept-Encoding': 'gzip, deflate',
84         'Accept-Language': 'en-us,en;q=0.5',
85 }
86
87 try:
88         import json
89 except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
90         import re
91         class json(object):
92                 @staticmethod
93                 def loads(s):
94                         s = s.decode('UTF-8')
95                         def raiseError(msg, i):
96                                 raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
97                         def skipSpace(i, expectMore=True):
98                                 while i < len(s) and s[i] in ' \t\r\n':
99                                         i += 1
100                                 if expectMore:
101                                         if i >= len(s):
102                                                 raiseError('Premature end', i)
103                                 return i
104                         def decodeEscape(match):
105                                 esc = match.group(1)
106                                 _STATIC = {
107                                         '"': '"',
108                                         '\\': '\\',
109                                         '/': '/',
110                                         'b': unichr(0x8),
111                                         'f': unichr(0xc),
112                                         'n': '\n',
113                                         'r': '\r',
114                                         't': '\t',
115                                 }
116                                 if esc in _STATIC:
117                                         return _STATIC[esc]
118                                 if esc[0] == 'u':
119                                         if len(esc) == 1+4:
120                                                 return unichr(int(esc[1:5], 16))
121                                         if len(esc) == 5+6 and esc[5:7] == '\\u':
122                                                 hi = int(esc[1:5], 16)
123                                                 low = int(esc[7:11], 16)
124                                                 return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
125                                 raise ValueError('Unknown escape ' + str(esc))
126                         def parseString(i):
127                                 i += 1
128                                 e = i
129                                 while True:
130                                         e = s.index('"', e)
131                                         bslashes = 0
132                                         while s[e-bslashes-1] == '\\':
133                                                 bslashes += 1
134                                         if bslashes % 2 == 1:
135                                                 e += 1
136                                                 continue
137                                         break
138                                 rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
139                                 stri = rexp.sub(decodeEscape, s[i:e])
140                                 return (e+1,stri)
141                         def parseObj(i):
142                                 i += 1
143                                 res = {}
144                                 i = skipSpace(i)
145                                 if s[i] == '}': # Empty dictionary
146                                         return (i+1,res)
147                                 while True:
148                                         if s[i] != '"':
149                                                 raiseError('Expected a string object key', i)
150                                         i,key = parseString(i)
151                                         i = skipSpace(i)
152                                         if i >= len(s) or s[i] != ':':
153                                                 raiseError('Expected a colon', i)
154                                         i,val = parse(i+1)
155                                         res[key] = val
156                                         i = skipSpace(i)
157                                         if s[i] == '}':
158                                                 return (i+1, res)
159                                         if s[i] != ',':
160                                                 raiseError('Expected comma or closing curly brace', i)
161                                         i = skipSpace(i+1)
162                         def parseArray(i):
163                                 res = []
164                                 i = skipSpace(i+1)
165                                 if s[i] == ']': # Empty array
166                                         return (i+1,res)
167                                 while True:
168                                         i,val = parse(i)
169                                         res.append(val)
170                                         i = skipSpace(i) # Raise exception if premature end
171                                         if s[i] == ']':
172                                                 return (i+1, res)
173                                         if s[i] != ',':
174                                                 raiseError('Expected a comma or closing bracket', i)
175                                         i = skipSpace(i+1)
176                         def parseDiscrete(i):
177                                 for k,v in {'true': True, 'false': False, 'null': None}.items():
178                                         if s.startswith(k, i):
179                                                 return (i+len(k), v)
180                                 raiseError('Not a boolean (or null)', i)
181                         def parseNumber(i):
182                                 mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
183                                 if mobj is None:
184                                         raiseError('Not a number', i)
185                                 nums = mobj.group(1)
186                                 if '.' in nums or 'e' in nums or 'E' in nums:
187                                         return (i+len(nums), float(nums))
188                                 return (i+len(nums), int(nums))
189                         CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
190                         def parse(i):
191                                 i = skipSpace(i)
192                                 i,res = CHARMAP.get(s[i], parseNumber)(i)
193                                 i = skipSpace(i, False)
194                                 return (i,res)
195                         i,res = parse(0)
196                         if i < len(s):
197                                 raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
198                         return res
199
200 def preferredencoding():
201         """Get preferred encoding.
202
203         Returns the best encoding scheme for the system, based on
204         locale.getpreferredencoding() and some further tweaks.
205         """
206         def yield_preferredencoding():
207                 try:
208                         pref = locale.getpreferredencoding()
209                         u'TEST'.encode(pref)
210                 except:
211                         pref = 'UTF-8'
212                 while True:
213                         yield pref
214         return yield_preferredencoding().next()
215
216
217 def htmlentity_transform(matchobj):
218         """Transforms an HTML entity to a Unicode character.
219
220         This function receives a match object and is intended to be used with
221         the re.sub() function.
222         """
223         entity = matchobj.group(1)
224
225         # Known non-numeric HTML entity
226         if entity in htmlentitydefs.name2codepoint:
227                 return unichr(htmlentitydefs.name2codepoint[entity])
228
229         # Unicode character
230         mobj = re.match(ur'(?u)#(x?\d+)', entity)
231         if mobj is not None:
232                 numstr = mobj.group(1)
233                 if numstr.startswith(u'x'):
234                         base = 16
235                         numstr = u'0%s' % numstr
236                 else:
237                         base = 10
238                 return unichr(long(numstr, base))
239
240         # Unknown entity in name, return its literal representation
241         return (u'&%s;' % entity)
242
243
244 def sanitize_title(utitle):
245         """Sanitizes a video title so it could be used as part of a filename."""
246         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
247         return utitle.replace(unicode(os.sep), u'%')
248
249
250 def sanitize_open(filename, open_mode):
251         """Try to open the given filename, and slightly tweak it if this fails.
252
253         Attempts to open the given filename. If this fails, it tries to change
254         the filename slightly, step by step, until it's either able to open it
255         or it fails and raises a final exception, like the standard open()
256         function.
257
258         It returns the tuple (stream, definitive_file_name).
259         """
260         try:
261                 if filename == u'-':
262                         if sys.platform == 'win32':
263                                 import msvcrt
264                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
265                         return (sys.stdout, filename)
266                 stream = open(_encodeFilename(filename), open_mode)
267                 return (stream, filename)
268         except (IOError, OSError), err:
269                 # In case of error, try to remove win32 forbidden chars
270                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
271
272                 # An exception here should be caught in the caller
273                 stream = open(_encodeFilename(filename), open_mode)
274                 return (stream, filename)
275
276
277 def timeconvert(timestr):
278         """Convert RFC 2822 defined time string into system timestamp"""
279         timestamp = None
280         timetuple = email.utils.parsedate_tz(timestr)
281         if timetuple is not None:
282                 timestamp = email.utils.mktime_tz(timetuple)
283         return timestamp
284
285 def _simplify_title(title):
286         expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
287         return expr.sub(u'_', title).strip(u'_')
288
289 def _orderedSet(iterable):
290         """ Remove all duplicates from the input iterable """
291         res = []
292         for el in iterable:
293                 if el not in res:
294                         res.append(el)
295         return res
296
297 def _unescapeHTML(s):
298         """
299         @param s a string (of type unicode)
300         """
301         assert type(s) == type(u'')
302
303         htmlParser = HTMLParser.HTMLParser()
304         return htmlParser.unescape(s)
305
306 def _encodeFilename(s):
307         """
308         @param s The name of the file (of type unicode)
309         """
310
311         assert type(s) == type(u'')
312
313         if sys.platform == 'win32' and sys.getwindowsversion().major >= 5:
314                 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
315                 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
316                 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
317                 return s
318         else:
319                 return s.encode(sys.getfilesystemencoding(), 'ignore')
320
321 class DownloadError(Exception):
322         """Download Error exception.
323
324         This exception may be thrown by FileDownloader objects if they are not
325         configured to continue on errors. They will contain the appropriate
326         error message.
327         """
328         pass
329
330
331 class SameFileError(Exception):
332         """Same File exception.
333
334         This exception will be thrown by FileDownloader objects if they detect
335         multiple files would have to be downloaded to the same file on disk.
336         """
337         pass
338
339
340 class PostProcessingError(Exception):
341         """Post Processing exception.
342
343         This exception may be raised by PostProcessor's .run() method to
344         indicate an error in the postprocessing task.
345         """
346         pass
347
348 class MaxDownloadsReached(Exception):
349         """ --max-downloads limit has been reached. """
350         pass
351
352
353 class UnavailableVideoError(Exception):
354         """Unavailable Format exception.
355
356         This exception will be thrown when a video is requested
357         in a format that is not available for that video.
358         """
359         pass
360
361
362 class ContentTooShortError(Exception):
363         """Content Too Short exception.
364
365         This exception may be raised by FileDownloader objects when a file they
366         download is too small for what the server announced first, indicating
367         the connection was probably interrupted.
368         """
369         # Both in bytes
370         downloaded = None
371         expected = None
372
373         def __init__(self, downloaded, expected):
374                 self.downloaded = downloaded
375                 self.expected = expected
376
377
378 class YoutubeDLHandler(urllib2.HTTPHandler):
379         """Handler for HTTP requests and responses.
380
381         This class, when installed with an OpenerDirector, automatically adds
382         the standard headers to every HTTP request and handles gzipped and
383         deflated responses from web servers. If compression is to be avoided in
384         a particular request, the original request in the program code only has
385         to include the HTTP header "Youtubedl-No-Compression", which will be
386         removed before making the real request.
387
388         Part of this code was copied from:
389
390         http://techknack.net/python-urllib2-handlers/
391
392         Andrew Rowls, the author of that code, agreed to release it to the
393         public domain.
394         """
395
396         @staticmethod
397         def deflate(data):
398                 try:
399                         return zlib.decompress(data, -zlib.MAX_WBITS)
400                 except zlib.error:
401                         return zlib.decompress(data)
402
403         @staticmethod
404         def addinfourl_wrapper(stream, headers, url, code):
405                 if hasattr(urllib2.addinfourl, 'getcode'):
406                         return urllib2.addinfourl(stream, headers, url, code)
407                 ret = urllib2.addinfourl(stream, headers, url)
408                 ret.code = code
409                 return ret
410
411         def http_request(self, req):
412                 for h in std_headers:
413                         if h in req.headers:
414                                 del req.headers[h]
415                         req.add_header(h, std_headers[h])
416                 if 'Youtubedl-no-compression' in req.headers:
417                         if 'Accept-encoding' in req.headers:
418                                 del req.headers['Accept-encoding']
419                         del req.headers['Youtubedl-no-compression']
420                 return req
421
422         def http_response(self, req, resp):
423                 old_resp = resp
424                 # gzip
425                 if resp.headers.get('Content-encoding', '') == 'gzip':
426                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
427                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
428                         resp.msg = old_resp.msg
429                 # deflate
430                 if resp.headers.get('Content-encoding', '') == 'deflate':
431                         gz = StringIO.StringIO(self.deflate(resp.read()))
432                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
433                         resp.msg = old_resp.msg
434                 return resp
435
436
437 class FileDownloader(object):
438         """File Downloader class.
439
440         File downloader objects are the ones responsible of downloading the
441         actual video file and writing it to disk if the user has requested
442         it, among some other tasks. In most cases there should be one per
443         program. As, given a video URL, the downloader doesn't know how to
444         extract all the needed information, task that InfoExtractors do, it
445         has to pass the URL to one of them.
446
447         For this, file downloader objects have a method that allows
448         InfoExtractors to be registered in a given order. When it is passed
449         a URL, the file downloader handles it to the first InfoExtractor it
450         finds that reports being able to handle it. The InfoExtractor extracts
451         all the information about the video or videos the URL refers to, and
452         asks the FileDownloader to process the video information, possibly
453         downloading the video.
454
455         File downloaders accept a lot of parameters. In order not to saturate
456         the object constructor with arguments, it receives a dictionary of
457         options instead. These options are available through the params
458         attribute for the InfoExtractors to use. The FileDownloader also
459         registers itself as the downloader in charge for the InfoExtractors
460         that are added to it, so this is a "mutual registration".
461
462         Available options:
463
464         username:         Username for authentication purposes.
465         password:         Password for authentication purposes.
466         usenetrc:         Use netrc for authentication instead.
467         quiet:            Do not print messages to stdout.
468         forceurl:         Force printing final URL.
469         forcetitle:       Force printing title.
470         forcethumbnail:   Force printing thumbnail URL.
471         forcedescription: Force printing description.
472         forcefilename:    Force printing final filename.
473         simulate:         Do not download the video files.
474         format:           Video format code.
475         format_limit:     Highest quality format to try.
476         outtmpl:          Template for output names.
477         ignoreerrors:     Do not stop on download errors.
478         ratelimit:        Download speed limit, in bytes/sec.
479         nooverwrites:     Prevent overwriting files.
480         retries:          Number of times to retry for HTTP error 5xx
481         continuedl:       Try to continue downloads if possible.
482         noprogress:       Do not print the progress bar.
483         playliststart:    Playlist item to start at.
484         playlistend:      Playlist item to end at.
485         matchtitle:       Download only matching titles.
486         rejecttitle:      Reject downloads for matching titles.
487         logtostderr:      Log messages to stderr instead of stdout.
488         consoletitle:     Display progress in console window's titlebar.
489         nopart:           Do not use temporary .part files.
490         updatetime:       Use the Last-modified header to set output file timestamps.
491         writedescription: Write the video description to a .description file
492         writeinfojson:    Write the video description to a .info.json file
493         """
494
495         params = None
496         _ies = []
497         _pps = []
498         _download_retcode = None
499         _num_downloads = None
500         _screen_file = None
501
502         def __init__(self, params):
503                 """Create a FileDownloader object with the given options."""
504                 self._ies = []
505                 self._pps = []
506                 self._download_retcode = 0
507                 self._num_downloads = 0
508                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
509                 self.params = params
510
511         @staticmethod
512         def format_bytes(bytes):
513                 if bytes is None:
514                         return 'N/A'
515                 if type(bytes) is str:
516                         bytes = float(bytes)
517                 if bytes == 0.0:
518                         exponent = 0
519                 else:
520                         exponent = long(math.log(bytes, 1024.0))
521                 suffix = 'bkMGTPEZY'[exponent]
522                 converted = float(bytes) / float(1024 ** exponent)
523                 return '%.2f%s' % (converted, suffix)
524
525         @staticmethod
526         def calc_percent(byte_counter, data_len):
527                 if data_len is None:
528                         return '---.-%'
529                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
530
531         @staticmethod
532         def calc_eta(start, now, total, current):
533                 if total is None:
534                         return '--:--'
535                 dif = now - start
536                 if current == 0 or dif < 0.001: # One millisecond
537                         return '--:--'
538                 rate = float(current) / dif
539                 eta = long((float(total) - float(current)) / rate)
540                 (eta_mins, eta_secs) = divmod(eta, 60)
541                 if eta_mins > 99:
542                         return '--:--'
543                 return '%02d:%02d' % (eta_mins, eta_secs)
544
545         @staticmethod
546         def calc_speed(start, now, bytes):
547                 dif = now - start
548                 if bytes == 0 or dif < 0.001: # One millisecond
549                         return '%10s' % '---b/s'
550                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
551
552         @staticmethod
553         def best_block_size(elapsed_time, bytes):
554                 new_min = max(bytes / 2.0, 1.0)
555                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
556                 if elapsed_time < 0.001:
557                         return long(new_max)
558                 rate = bytes / elapsed_time
559                 if rate > new_max:
560                         return long(new_max)
561                 if rate < new_min:
562                         return long(new_min)
563                 return long(rate)
564
565         @staticmethod
566         def parse_bytes(bytestr):
567                 """Parse a string indicating a byte quantity into a long integer."""
568                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
569                 if matchobj is None:
570                         return None
571                 number = float(matchobj.group(1))
572                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
573                 return long(round(number * multiplier))
574
575         def add_info_extractor(self, ie):
576                 """Add an InfoExtractor object to the end of the list."""
577                 self._ies.append(ie)
578                 ie.set_downloader(self)
579
580         def add_post_processor(self, pp):
581                 """Add a PostProcessor object to the end of the chain."""
582                 self._pps.append(pp)
583                 pp.set_downloader(self)
584
585         def to_screen(self, message, skip_eol=False):
586                 """Print message to stdout if not in quiet mode."""
587                 assert type(message) == type(u'')
588                 if not self.params.get('quiet', False):
589                         terminator = [u'\n', u''][skip_eol]
590                         output = message + terminator
591
592                         if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
593                                 output = output.encode(preferredencoding(), 'ignore')
594                         self._screen_file.write(output)
595                         self._screen_file.flush()
596
597         def to_stderr(self, message):
598                 """Print message to stderr."""
599                 print >>sys.stderr, message.encode(preferredencoding())
600
601         def to_cons_title(self, message):
602                 """Set console/terminal window title to message."""
603                 if not self.params.get('consoletitle', False):
604                         return
605                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
606                         # c_wchar_p() might not be necessary if `message` is
607                         # already of type unicode()
608                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
609                 elif 'TERM' in os.environ:
610                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
611
612         def fixed_template(self):
613                 """Checks if the output template is fixed."""
614                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
615
616         def trouble(self, message=None):
617                 """Determine action to take when a download problem appears.
618
619                 Depending on if the downloader has been configured to ignore
620                 download errors or not, this method may throw an exception or
621                 not when errors are found, after printing the message.
622                 """
623                 if message is not None:
624                         self.to_stderr(message)
625                 if not self.params.get('ignoreerrors', False):
626                         raise DownloadError(message)
627                 self._download_retcode = 1
628
629         def slow_down(self, start_time, byte_counter):
630                 """Sleep if the download speed is over the rate limit."""
631                 rate_limit = self.params.get('ratelimit', None)
632                 if rate_limit is None or byte_counter == 0:
633                         return
634                 now = time.time()
635                 elapsed = now - start_time
636                 if elapsed <= 0.0:
637                         return
638                 speed = float(byte_counter) / elapsed
639                 if speed > rate_limit:
640                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
641
642         def temp_name(self, filename):
643                 """Returns a temporary filename for the given filename."""
644                 if self.params.get('nopart', False) or filename == u'-' or \
645                                 (os.path.exists(_encodeFilename(filename)) and not os.path.isfile(_encodeFilename(filename))):
646                         return filename
647                 return filename + u'.part'
648
649         def undo_temp_name(self, filename):
650                 if filename.endswith(u'.part'):
651                         return filename[:-len(u'.part')]
652                 return filename
653
654         def try_rename(self, old_filename, new_filename):
655                 try:
656                         if old_filename == new_filename:
657                                 return
658                         os.rename(_encodeFilename(old_filename), _encodeFilename(new_filename))
659                 except (IOError, OSError), err:
660                         self.trouble(u'ERROR: unable to rename file')
661
662         def try_utime(self, filename, last_modified_hdr):
663                 """Try to set the last-modified time of the given file."""
664                 if last_modified_hdr is None:
665                         return
666                 if not os.path.isfile(_encodeFilename(filename)):
667                         return
668                 timestr = last_modified_hdr
669                 if timestr is None:
670                         return
671                 filetime = timeconvert(timestr)
672                 if filetime is None:
673                         return filetime
674                 try:
675                         os.utime(filename, (time.time(), filetime))
676                 except:
677                         pass
678                 return filetime
679
680         def report_writedescription(self, descfn):
681                 """ Report that the description file is being written """
682                 self.to_screen(u'[info] Writing video description to: ' + descfn)
683
684         def report_writeinfojson(self, infofn):
685                 """ Report that the metadata file has been written """
686                 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
687
688         def report_destination(self, filename):
689                 """Report destination filename."""
690                 self.to_screen(u'[download] Destination: ' + filename)
691
692         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
693                 """Report download progress."""
694                 if self.params.get('noprogress', False):
695                         return
696                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
697                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
698                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
699                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
700
701         def report_resuming_byte(self, resume_len):
702                 """Report attempt to resume at given byte."""
703                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
704
705         def report_retry(self, count, retries):
706                 """Report retry in case of HTTP error 5xx"""
707                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
708
709         def report_file_already_downloaded(self, file_name):
710                 """Report file has already been fully downloaded."""
711                 try:
712                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
713                 except (UnicodeEncodeError), err:
714                         self.to_screen(u'[download] The file has already been downloaded')
715
716         def report_unable_to_resume(self):
717                 """Report it was impossible to resume download."""
718                 self.to_screen(u'[download] Unable to resume')
719
720         def report_finish(self):
721                 """Report download finished."""
722                 if self.params.get('noprogress', False):
723                         self.to_screen(u'[download] Download completed')
724                 else:
725                         self.to_screen(u'')
726
727         def increment_downloads(self):
728                 """Increment the ordinal that assigns a number to each file."""
729                 self._num_downloads += 1
730
731         def prepare_filename(self, info_dict):
732                 """Generate the output filename."""
733                 try:
734                         template_dict = dict(info_dict)
735                         template_dict['epoch'] = unicode(long(time.time()))
736                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
737                         filename = self.params['outtmpl'] % template_dict
738                         return filename
739                 except (ValueError, KeyError), err:
740                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
741                         return None
742
743         def _match_entry(self, info_dict):
744                 """ Returns None iff the file should be downloaded """
745
746                 title = info_dict['title']
747                 matchtitle = self.params.get('matchtitle', False)
748                 if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
749                         return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
750                 rejecttitle = self.params.get('rejecttitle', False)
751                 if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
752                         return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
753                 return None
754
755         def process_info(self, info_dict):
756                 """Process a single dictionary returned by an InfoExtractor."""
757
758                 reason = self._match_entry(info_dict)
759                 if reason is not None:
760                         self.to_screen(u'[download] ' + reason)
761                         return
762
763                 max_downloads = self.params.get('max_downloads')
764                 if max_downloads is not None:
765                         if self._num_downloads > int(max_downloads):
766                                 raise MaxDownloadsReached()
767
768                 filename = self.prepare_filename(info_dict)
769                 
770                 # Forced printings
771                 if self.params.get('forcetitle', False):
772                         print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
773                 if self.params.get('forceurl', False):
774                         print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
775                 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
776                         print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
777                 if self.params.get('forcedescription', False) and 'description' in info_dict:
778                         print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
779                 if self.params.get('forcefilename', False) and filename is not None:
780                         print filename.encode(preferredencoding(), 'xmlcharrefreplace')
781                 if self.params.get('forceformat', False):
782                         print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
783
784                 # Do nothing else if in simulate mode
785                 if self.params.get('simulate', False):
786                         return
787
788                 if filename is None:
789                         return
790
791                 try:
792                         dn = os.path.dirname(_encodeFilename(filename))
793                         if dn != '' and not os.path.exists(dn): # dn is already encoded
794                                 os.makedirs(dn)
795                 except (OSError, IOError), err:
796                         self.trouble(u'ERROR: unable to create directory ' + unicode(err))
797                         return
798
799                 if self.params.get('writedescription', False):
800                         try:
801                                 descfn = filename + u'.description'
802                                 self.report_writedescription(descfn)
803                                 descfile = open(_encodeFilename(descfn), 'wb')
804                                 try:
805                                         descfile.write(info_dict['description'].encode('utf-8'))
806                                 finally:
807                                         descfile.close()
808                         except (OSError, IOError):
809                                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
810                                 return
811
812                 if self.params.get('writeinfojson', False):
813                         infofn = filename + u'.info.json'
814                         self.report_writeinfojson(infofn)
815                         try:
816                                 json.dump
817                         except (NameError,AttributeError):
818                                 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
819                                 return
820                         try:
821                                 infof = open(_encodeFilename(infofn), 'wb')
822                                 try:
823                                         json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
824                                         json.dump(json_info_dict, infof)
825                                 finally:
826                                         infof.close()
827                         except (OSError, IOError):
828                                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
829                                 return
830
831                 if not self.params.get('skip_download', False):
832                         if self.params.get('nooverwrites', False) and os.path.exists(_encodeFilename(filename)):
833                                 success = True
834                         else:
835                                 try:
836                                         success = self._do_download(filename, info_dict)
837                                 except (OSError, IOError), err:
838                                         raise UnavailableVideoError
839                                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
840                                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
841                                         return
842                                 except (ContentTooShortError, ), err:
843                                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
844                                         return
845         
846                         if success:
847                                 try:
848                                         self.post_process(filename, info_dict)
849                                 except (PostProcessingError), err:
850                                         self.trouble(u'ERROR: postprocessing: %s' % str(err))
851                                         return
852
853         def download(self, url_list):
854                 """Download a given list of URLs."""
855                 if len(url_list) > 1 and self.fixed_template():
856                         raise SameFileError(self.params['outtmpl'])
857
858                 for url in url_list:
859                         suitable_found = False
860                         for ie in self._ies:
861                                 # Go to next InfoExtractor if not suitable
862                                 if not ie.suitable(url):
863                                         continue
864
865                                 # Suitable InfoExtractor found
866                                 suitable_found = True
867
868                                 # Extract information from URL and process it
869                                 ie.extract(url)
870
871                                 # Suitable InfoExtractor had been found; go to next URL
872                                 break
873
874                         if not suitable_found:
875                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
876
877                 return self._download_retcode
878
879         def post_process(self, filename, ie_info):
880                 """Run the postprocessing chain on the given file."""
881                 info = dict(ie_info)
882                 info['filepath'] = filename
883                 for pp in self._pps:
884                         info = pp.run(info)
885                         if info is None:
886                                 break
887
888         def _download_with_rtmpdump(self, filename, url, player_url):
889                 self.report_destination(filename)
890                 tmpfilename = self.temp_name(filename)
891
892                 # Check for rtmpdump first
893                 try:
894                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
895                 except (OSError, IOError):
896                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
897                         return False
898
899                 # Download using rtmpdump. rtmpdump returns exit code 2 when
900                 # the connection was interrumpted and resuming appears to be
901                 # possible. This is part of rtmpdump's normal usage, AFAIK.
902                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
903                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
904                 while retval == 2 or retval == 1:
905                         prevsize = os.path.getsize(_encodeFilename(tmpfilename))
906                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
907                         time.sleep(5.0) # This seems to be needed
908                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
909                         cursize = os.path.getsize(_encodeFilename(tmpfilename))
910                         if prevsize == cursize and retval == 1:
911                                 break
912                          # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
913                         if prevsize == cursize and retval == 2 and cursize > 1024:
914                                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
915                                 retval = 0
916                                 break
917                 if retval == 0:
918                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(_encodeFilename(tmpfilename)))
919                         self.try_rename(tmpfilename, filename)
920                         return True
921                 else:
922                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
923                         return False
924
925         def _do_download(self, filename, info_dict):
926                 url = info_dict['url']
927                 player_url = info_dict.get('player_url', None)
928
929                 # Check file already present
930                 if self.params.get('continuedl', False) and os.path.isfile(_encodeFilename(filename)) and not self.params.get('nopart', False):
931                         self.report_file_already_downloaded(filename)
932                         return True
933
934                 # Attempt to download using rtmpdump
935                 if url.startswith('rtmp'):
936                         return self._download_with_rtmpdump(filename, url, player_url)
937
938                 tmpfilename = self.temp_name(filename)
939                 stream = None
940
941                 # Do not include the Accept-Encoding header
942                 headers = {'Youtubedl-no-compression': 'True'}
943                 basic_request = urllib2.Request(url, None, headers)
944                 request = urllib2.Request(url, None, headers)
945
946                 # Establish possible resume length
947                 if os.path.isfile(_encodeFilename(tmpfilename)):
948                         resume_len = os.path.getsize(_encodeFilename(tmpfilename))
949                 else:
950                         resume_len = 0
951
952                 open_mode = 'wb'
953                 if resume_len != 0:
954                         if self.params.get('continuedl', False):
955                                 self.report_resuming_byte(resume_len)
956                                 request.add_header('Range','bytes=%d-' % resume_len)
957                                 open_mode = 'ab'
958                         else:
959                                 resume_len = 0
960
961                 count = 0
962                 retries = self.params.get('retries', 0)
963                 while count <= retries:
964                         # Establish connection
965                         try:
966                                 if count == 0 and 'urlhandle' in info_dict:
967                                         data = info_dict['urlhandle']
968                                 data = urllib2.urlopen(request)
969                                 break
970                         except (urllib2.HTTPError, ), err:
971                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
972                                         # Unexpected HTTP error
973                                         raise
974                                 elif err.code == 416:
975                                         # Unable to resume (requested range not satisfiable)
976                                         try:
977                                                 # Open the connection again without the range header
978                                                 data = urllib2.urlopen(basic_request)
979                                                 content_length = data.info()['Content-Length']
980                                         except (urllib2.HTTPError, ), err:
981                                                 if err.code < 500 or err.code >= 600:
982                                                         raise
983                                         else:
984                                                 # Examine the reported length
985                                                 if (content_length is not None and
986                                                                 (resume_len - 100 < long(content_length) < resume_len + 100)):
987                                                         # The file had already been fully downloaded.
988                                                         # Explanation to the above condition: in issue #175 it was revealed that
989                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
990                                                         # changing the file size slightly and causing problems for some users. So
991                                                         # I decided to implement a suggested change and consider the file
992                                                         # completely downloaded if the file size differs less than 100 bytes from
993                                                         # the one in the hard drive.
994                                                         self.report_file_already_downloaded(filename)
995                                                         self.try_rename(tmpfilename, filename)
996                                                         return True
997                                                 else:
998                                                         # The length does not match, we start the download over
999                                                         self.report_unable_to_resume()
1000                                                         open_mode = 'wb'
1001                                                         break
1002                         # Retry
1003                         count += 1
1004                         if count <= retries:
1005                                 self.report_retry(count, retries)
1006
1007                 if count > retries:
1008                         self.trouble(u'ERROR: giving up after %s retries' % retries)
1009                         return False
1010
1011                 data_len = data.info().get('Content-length', None)
1012                 if data_len is not None:
1013                         data_len = long(data_len) + resume_len
1014                 data_len_str = self.format_bytes(data_len)
1015                 byte_counter = 0 + resume_len
1016                 block_size = 1024
1017                 start = time.time()
1018                 while True:
1019                         # Download and write
1020                         before = time.time()
1021                         data_block = data.read(block_size)
1022                         after = time.time()
1023                         if len(data_block) == 0:
1024                                 break
1025                         byte_counter += len(data_block)
1026
1027                         # Open file just in time
1028                         if stream is None:
1029                                 try:
1030                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
1031                                         assert stream is not None
1032                                         filename = self.undo_temp_name(tmpfilename)
1033                                         self.report_destination(filename)
1034                                 except (OSError, IOError), err:
1035                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
1036                                         return False
1037                         try:
1038                                 stream.write(data_block)
1039                         except (IOError, OSError), err:
1040                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
1041                                 return False
1042                         block_size = self.best_block_size(after - before, len(data_block))
1043
1044                         # Progress message
1045                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
1046                         if data_len is None:
1047                                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
1048                         else:
1049                                 percent_str = self.calc_percent(byte_counter, data_len)
1050                                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
1051                                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
1052
1053                         # Apply rate limit
1054                         self.slow_down(start, byte_counter - resume_len)
1055
1056                 if stream is None:
1057                         self.trouble(u'\nERROR: Did not get any data blocks')
1058                         return False
1059                 stream.close()
1060                 self.report_finish()
1061                 if data_len is not None and byte_counter != data_len:
1062                         raise ContentTooShortError(byte_counter, long(data_len))
1063                 self.try_rename(tmpfilename, filename)
1064
1065                 # Update file modification time
1066                 if self.params.get('updatetime', True):
1067                         info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
1068
1069                 return True
1070
1071
1072 class InfoExtractor(object):
1073         """Information Extractor class.
1074
1075         Information extractors are the classes that, given a URL, extract
1076         information from the video (or videos) the URL refers to. This
1077         information includes the real video URL, the video title and simplified
1078         title, author and others. The information is stored in a dictionary
1079         which is then passed to the FileDownloader. The FileDownloader
1080         processes this information possibly downloading the video to the file
1081         system, among other possible outcomes. The dictionaries must include
1082         the following fields:
1083
1084         id:             Video identifier.
1085         url:            Final video URL.
1086         uploader:       Nickname of the video uploader.
1087         title:          Literal title.
1088         stitle:         Simplified title.
1089         ext:            Video filename extension.
1090         format:         Video format.
1091         player_url:     SWF Player URL (may be None).
1092
1093         The following fields are optional. Their primary purpose is to allow
1094         youtube-dl to serve as the backend for a video search function, such
1095         as the one in youtube2mp3.  They are only used when their respective
1096         forced printing functions are called:
1097
1098         thumbnail:      Full URL to a video thumbnail image.
1099         description:    One-line video description.
1100
1101         Subclasses of this one should re-define the _real_initialize() and
1102         _real_extract() methods and define a _VALID_URL regexp.
1103         Probably, they should also be added to the list of extractors.
1104         """
1105
1106         _ready = False
1107         _downloader = None
1108
1109         def __init__(self, downloader=None):
1110                 """Constructor. Receives an optional downloader."""
1111                 self._ready = False
1112                 self.set_downloader(downloader)
1113
1114         def suitable(self, url):
1115                 """Receives a URL and returns True if suitable for this IE."""
1116                 return re.match(self._VALID_URL, url) is not None
1117
1118         def initialize(self):
1119                 """Initializes an instance (authentication, etc)."""
1120                 if not self._ready:
1121                         self._real_initialize()
1122                         self._ready = True
1123
1124         def extract(self, url):
1125                 """Extracts URL information and returns it in list of dicts."""
1126                 self.initialize()
1127                 return self._real_extract(url)
1128
1129         def set_downloader(self, downloader):
1130                 """Sets the downloader for this IE."""
1131                 self._downloader = downloader
1132
1133         def _real_initialize(self):
1134                 """Real initialization process. Redefine in subclasses."""
1135                 pass
1136
1137         def _real_extract(self, url):
1138                 """Real extraction process. Redefine in subclasses."""
1139                 pass
1140
1141
1142 class YoutubeIE(InfoExtractor):
1143         """Information extractor for youtube.com."""
1144
1145         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
1146         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
1147         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
1148         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
1149         _NETRC_MACHINE = 'youtube'
1150         # Listed in order of quality
1151         _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
1152         _available_formats_prefer_free = ['38', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
1153         _video_extensions = {
1154                 '13': '3gp',
1155                 '17': 'mp4',
1156                 '18': 'mp4',
1157                 '22': 'mp4',
1158                 '37': 'mp4',
1159                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
1160                 '43': 'webm',
1161                 '44': 'webm',
1162                 '45': 'webm',
1163         }
1164         _video_dimensions = {
1165                 '5': '240x400',
1166                 '6': '???',
1167                 '13': '???',
1168                 '17': '144x176',
1169                 '18': '360x640',
1170                 '22': '720x1280',
1171                 '34': '360x640',
1172                 '35': '480x854',
1173                 '37': '1080x1920',
1174                 '38': '3072x4096',
1175                 '43': '360x640',
1176                 '44': '480x854',
1177                 '45': '720x1280',
1178         }       
1179         IE_NAME = u'youtube'
1180
1181         def report_lang(self):
1182                 """Report attempt to set language."""
1183                 self._downloader.to_screen(u'[youtube] Setting language')
1184
1185         def report_login(self):
1186                 """Report attempt to log in."""
1187                 self._downloader.to_screen(u'[youtube] Logging in')
1188
1189         def report_age_confirmation(self):
1190                 """Report attempt to confirm age."""
1191                 self._downloader.to_screen(u'[youtube] Confirming age')
1192
1193         def report_video_webpage_download(self, video_id):
1194                 """Report attempt to download video webpage."""
1195                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
1196
1197         def report_video_info_webpage_download(self, video_id):
1198                 """Report attempt to download video info webpage."""
1199                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
1200
1201         def report_information_extraction(self, video_id):
1202                 """Report attempt to extract video information."""
1203                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
1204
1205         def report_unavailable_format(self, video_id, format):
1206                 """Report extracted video URL."""
1207                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
1208
1209         def report_rtmp_download(self):
1210                 """Indicate the download will use the RTMP protocol."""
1211                 self._downloader.to_screen(u'[youtube] RTMP download detected')
1212
1213         def _print_formats(self, formats):
1214                 print 'Available formats:'
1215                 for x in formats:
1216                         print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
1217
1218         def _real_initialize(self):
1219                 if self._downloader is None:
1220                         return
1221
1222                 username = None
1223                 password = None
1224                 downloader_params = self._downloader.params
1225
1226                 # Attempt to use provided username and password or .netrc data
1227                 if downloader_params.get('username', None) is not None:
1228                         username = downloader_params['username']
1229                         password = downloader_params['password']
1230                 elif downloader_params.get('usenetrc', False):
1231                         try:
1232                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
1233                                 if info is not None:
1234                                         username = info[0]
1235                                         password = info[2]
1236                                 else:
1237                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
1238                         except (IOError, netrc.NetrcParseError), err:
1239                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
1240                                 return
1241
1242                 # Set language
1243                 request = urllib2.Request(self._LANG_URL)
1244                 try:
1245                         self.report_lang()
1246                         urllib2.urlopen(request).read()
1247                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1248                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
1249                         return
1250
1251                 # No authentication to be performed
1252                 if username is None:
1253                         return
1254
1255                 # Log in
1256                 login_form = {
1257                                 'current_form': 'loginForm',
1258                                 'next':         '/',
1259                                 'action_login': 'Log In',
1260                                 'username':     username,
1261                                 'password':     password,
1262                                 }
1263                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
1264                 try:
1265                         self.report_login()
1266                         login_results = urllib2.urlopen(request).read()
1267                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
1268                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
1269                                 return
1270                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1271                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
1272                         return
1273
1274                 # Confirm age
1275                 age_form = {
1276                                 'next_url':             '/',
1277                                 'action_confirm':       'Confirm',
1278                                 }
1279                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
1280                 try:
1281                         self.report_age_confirmation()
1282                         age_results = urllib2.urlopen(request).read()
1283                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1284                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1285                         return
1286
1287         def _real_extract(self, url):
1288                 # Extract video id from URL
1289                 mobj = re.match(self._VALID_URL, url)
1290                 if mobj is None:
1291                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1292                         return
1293                 video_id = mobj.group(2)
1294
1295                 # Get video webpage
1296                 self.report_video_webpage_download(video_id)
1297                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
1298                 try:
1299                         video_webpage = urllib2.urlopen(request).read()
1300                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1301                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1302                         return
1303
1304                 # Attempt to extract SWF player URL
1305                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1306                 if mobj is not None:
1307                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1308                 else:
1309                         player_url = None
1310
1311                 # Get video info
1312                 self.report_video_info_webpage_download(video_id)
1313                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1314                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1315                                         % (video_id, el_type))
1316                         request = urllib2.Request(video_info_url)
1317                         try:
1318                                 video_info_webpage = urllib2.urlopen(request).read()
1319                                 video_info = parse_qs(video_info_webpage)
1320                                 if 'token' in video_info:
1321                                         break
1322                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1323                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1324                                 return
1325                 if 'token' not in video_info:
1326                         if 'reason' in video_info:
1327                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1328                         else:
1329                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1330                         return
1331
1332                 # Start extracting information
1333                 self.report_information_extraction(video_id)
1334
1335                 # uploader
1336                 if 'author' not in video_info:
1337                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1338                         return
1339                 video_uploader = urllib.unquote_plus(video_info['author'][0])
1340
1341                 # title
1342                 if 'title' not in video_info:
1343                         self._downloader.trouble(u'ERROR: unable to extract video title')
1344                         return
1345                 video_title = urllib.unquote_plus(video_info['title'][0])
1346                 video_title = video_title.decode('utf-8')
1347                 video_title = sanitize_title(video_title)
1348
1349                 # simplified title
1350                 simple_title = _simplify_title(video_title)
1351
1352                 # thumbnail image
1353                 if 'thumbnail_url' not in video_info:
1354                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1355                         video_thumbnail = ''
1356                 else:   # don't panic if we can't find it
1357                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1358
1359                 # upload date
1360                 upload_date = u'NA'
1361                 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1362                 if mobj is not None:
1363                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1364                         format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1365                         for expression in format_expressions:
1366                                 try:
1367                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1368                                 except:
1369                                         pass
1370
1371                 # description
1372                 try:
1373                         lxml.etree
1374                 except NameError:
1375                         video_description = u'No description available.'
1376                         if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
1377                                 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1378                                 if mobj is not None:
1379                                         video_description = mobj.group(1).decode('utf-8')
1380                 else:
1381                         html_parser = lxml.etree.HTMLParser(encoding='utf-8')
1382                         vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
1383                         video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
1384                         # TODO use another parser
1385
1386                 # token
1387                 video_token = urllib.unquote_plus(video_info['token'][0])
1388
1389                 # Decide which formats to download
1390                 req_format = self._downloader.params.get('format', None)
1391
1392                 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1393                         self.report_rtmp_download()
1394                         video_url_list = [(None, video_info['conn'][0])]
1395                 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1396                         url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
1397                         url_data = [parse_qs(uds) for uds in url_data_strs]
1398                         url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
1399                         url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
1400
1401                         format_limit = self._downloader.params.get('format_limit', None)
1402                         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1403                         if format_limit is not None and format_limit in available_formats:
1404                                 format_list = available_formats[available_formats.index(format_limit):]
1405                         else:
1406                                 format_list = available_formats
1407                         existing_formats = [x for x in format_list if x in url_map]
1408                         if len(existing_formats) == 0:
1409                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1410                                 return
1411                         if self._downloader.params.get('listformats', None):
1412                                 self._print_formats(existing_formats)
1413                                 return
1414                         if req_format is None or req_format == 'best':
1415                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1416                         elif req_format == 'worst':
1417                                 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
1418                         elif req_format in ('-1', 'all'):
1419                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1420                         else:
1421                                 # Specific formats. We pick the first in a slash-delimeted sequence.
1422                                 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1423                                 req_formats = req_format.split('/')
1424                                 video_url_list = None
1425                                 for rf in req_formats:
1426                                         if rf in url_map:
1427                                                 video_url_list = [(rf, url_map[rf])]
1428                                                 break
1429                                 if video_url_list is None:
1430                                         self._downloader.trouble(u'ERROR: requested format not available')
1431                                         return
1432                 else:
1433                         self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
1434                         return
1435
1436                 for format_param, video_real_url in video_url_list:
1437                         # At this point we have a new video
1438                         self._downloader.increment_downloads()
1439
1440                         # Extension
1441                         video_extension = self._video_extensions.get(format_param, 'flv')
1442
1443                         try:
1444                                 # Process video information
1445                                 self._downloader.process_info({
1446                                         'id':           video_id.decode('utf-8'),
1447                                         'url':          video_real_url.decode('utf-8'),
1448                                         'uploader':     video_uploader.decode('utf-8'),
1449                                         'upload_date':  upload_date,
1450                                         'title':        video_title,
1451                                         'stitle':       simple_title,
1452                                         'ext':          video_extension.decode('utf-8'),
1453                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1454                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1455                                         'description':  video_description,
1456                                         'player_url':   player_url,
1457                                 })
1458                         except UnavailableVideoError, err:
1459                                 self._downloader.trouble(u'\nERROR: unable to download video')
1460
1461
1462 class MetacafeIE(InfoExtractor):
1463         """Information Extractor for metacafe.com."""
1464
1465         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1466         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1467         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1468         _youtube_ie = None
1469         IE_NAME = u'metacafe'
1470
1471         def __init__(self, youtube_ie, downloader=None):
1472                 InfoExtractor.__init__(self, downloader)
1473                 self._youtube_ie = youtube_ie
1474
1475         def report_disclaimer(self):
1476                 """Report disclaimer retrieval."""
1477                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1478
1479         def report_age_confirmation(self):
1480                 """Report attempt to confirm age."""
1481                 self._downloader.to_screen(u'[metacafe] Confirming age')
1482
1483         def report_download_webpage(self, video_id):
1484                 """Report webpage download."""
1485                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1486
1487         def report_extraction(self, video_id):
1488                 """Report information extraction."""
1489                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1490
1491         def _real_initialize(self):
1492                 # Retrieve disclaimer
1493                 request = urllib2.Request(self._DISCLAIMER)
1494                 try:
1495                         self.report_disclaimer()
1496                         disclaimer = urllib2.urlopen(request).read()
1497                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1498                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1499                         return
1500
1501                 # Confirm age
1502                 disclaimer_form = {
1503                         'filters': '0',
1504                         'submit': "Continue - I'm over 18",
1505                         }
1506                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1507                 try:
1508                         self.report_age_confirmation()
1509                         disclaimer = urllib2.urlopen(request).read()
1510                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1511                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1512                         return
1513
1514         def _real_extract(self, url):
1515                 # Extract id and simplified title from URL
1516                 mobj = re.match(self._VALID_URL, url)
1517                 if mobj is None:
1518                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1519                         return
1520
1521                 video_id = mobj.group(1)
1522
1523                 # Check if video comes from YouTube
1524                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1525                 if mobj2 is not None:
1526                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1527                         return
1528
1529                 # At this point we have a new video
1530                 self._downloader.increment_downloads()
1531
1532                 simple_title = mobj.group(2).decode('utf-8')
1533
1534                 # Retrieve video webpage to extract further information
1535                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1536                 try:
1537                         self.report_download_webpage(video_id)
1538                         webpage = urllib2.urlopen(request).read()
1539                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1540                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1541                         return
1542
1543                 # Extract URL, uploader and title from webpage
1544                 self.report_extraction(video_id)
1545                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1546                 if mobj is not None:
1547                         mediaURL = urllib.unquote(mobj.group(1))
1548                         video_extension = mediaURL[-3:]
1549
1550                         # Extract gdaKey if available
1551                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1552                         if mobj is None:
1553                                 video_url = mediaURL
1554                         else:
1555                                 gdaKey = mobj.group(1)
1556                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1557                 else:
1558                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1559                         if mobj is None:
1560                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1561                                 return
1562                         vardict = parse_qs(mobj.group(1))
1563                         if 'mediaData' not in vardict:
1564                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1565                                 return
1566                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1567                         if mobj is None:
1568                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1569                                 return
1570                         mediaURL = mobj.group(1).replace('\\/', '/')
1571                         video_extension = mediaURL[-3:]
1572                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1573
1574                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1575                 if mobj is None:
1576                         self._downloader.trouble(u'ERROR: unable to extract title')
1577                         return
1578                 video_title = mobj.group(1).decode('utf-8')
1579                 video_title = sanitize_title(video_title)
1580
1581                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1582                 if mobj is None:
1583                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1584                         return
1585                 video_uploader = mobj.group(1)
1586
1587                 try:
1588                         # Process video information
1589                         self._downloader.process_info({
1590                                 'id':           video_id.decode('utf-8'),
1591                                 'url':          video_url.decode('utf-8'),
1592                                 'uploader':     video_uploader.decode('utf-8'),
1593                                 'upload_date':  u'NA',
1594                                 'title':        video_title,
1595                                 'stitle':       simple_title,
1596                                 'ext':          video_extension.decode('utf-8'),
1597                                 'format':       u'NA',
1598                                 'player_url':   None,
1599                         })
1600                 except UnavailableVideoError:
1601                         self._downloader.trouble(u'\nERROR: unable to download video')
1602
1603
1604 class DailymotionIE(InfoExtractor):
1605         """Information Extractor for Dailymotion"""
1606
1607         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1608         IE_NAME = u'dailymotion'
1609
1610         def __init__(self, downloader=None):
1611                 InfoExtractor.__init__(self, downloader)
1612
1613         def report_download_webpage(self, video_id):
1614                 """Report webpage download."""
1615                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1616
1617         def report_extraction(self, video_id):
1618                 """Report information extraction."""
1619                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1620
1621         def _real_extract(self, url):
1622                 # Extract id and simplified title from URL
1623                 mobj = re.match(self._VALID_URL, url)
1624                 if mobj is None:
1625                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1626                         return
1627
1628                 # At this point we have a new video
1629                 self._downloader.increment_downloads()
1630                 video_id = mobj.group(1)
1631
1632                 video_extension = 'flv'
1633
1634                 # Retrieve video webpage to extract further information
1635                 request = urllib2.Request(url)
1636                 request.add_header('Cookie', 'family_filter=off')
1637                 try:
1638                         self.report_download_webpage(video_id)
1639                         webpage = urllib2.urlopen(request).read()
1640                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1641                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1642                         return
1643
1644                 # Extract URL, uploader and title from webpage
1645                 self.report_extraction(video_id)
1646                 mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
1647                 if mobj is None:
1648                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1649                         return
1650                 sequence = urllib.unquote(mobj.group(1))
1651                 mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
1652                 if mobj is None:
1653                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1654                         return
1655                 mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
1656
1657                 # if needed add http://www.dailymotion.com/ if relative URL
1658
1659                 video_url = mediaURL
1660
1661                 mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
1662                 if mobj is None:
1663                         self._downloader.trouble(u'ERROR: unable to extract title')
1664                         return
1665                 video_title = _unescapeHTML(mobj.group('title').decode('utf-8'))
1666                 video_title = sanitize_title(video_title)
1667                 simple_title = _simplify_title(video_title)
1668
1669                 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
1670                 if mobj is None:
1671                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1672                         return
1673                 video_uploader = mobj.group(1)
1674
1675                 try:
1676                         # Process video information
1677                         self._downloader.process_info({
1678                                 'id':           video_id.decode('utf-8'),
1679                                 'url':          video_url.decode('utf-8'),
1680                                 'uploader':     video_uploader.decode('utf-8'),
1681                                 'upload_date':  u'NA',
1682                                 'title':        video_title,
1683                                 'stitle':       simple_title,
1684                                 'ext':          video_extension.decode('utf-8'),
1685                                 'format':       u'NA',
1686                                 'player_url':   None,
1687                         })
1688                 except UnavailableVideoError:
1689                         self._downloader.trouble(u'\nERROR: unable to download video')
1690
1691
1692 class GoogleIE(InfoExtractor):
1693         """Information extractor for video.google.com."""
1694
1695         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1696         IE_NAME = u'video.google'
1697
1698         def __init__(self, downloader=None):
1699                 InfoExtractor.__init__(self, downloader)
1700
1701         def report_download_webpage(self, video_id):
1702                 """Report webpage download."""
1703                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1704
1705         def report_extraction(self, video_id):
1706                 """Report information extraction."""
1707                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1708
1709         def _real_extract(self, url):
1710                 # Extract id from URL
1711                 mobj = re.match(self._VALID_URL, url)
1712                 if mobj is None:
1713                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1714                         return
1715
1716                 # At this point we have a new video
1717                 self._downloader.increment_downloads()
1718                 video_id = mobj.group(1)
1719
1720                 video_extension = 'mp4'
1721
1722                 # Retrieve video webpage to extract further information
1723                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1724                 try:
1725                         self.report_download_webpage(video_id)
1726                         webpage = urllib2.urlopen(request).read()
1727                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1728                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1729                         return
1730
1731                 # Extract URL, uploader, and title from webpage
1732                 self.report_extraction(video_id)
1733                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1734                 if mobj is None:
1735                         video_extension = 'flv'
1736                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1737                 if mobj is None:
1738                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1739                         return
1740                 mediaURL = urllib.unquote(mobj.group(1))
1741                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1742                 mediaURL = mediaURL.replace('\\x26', '\x26')
1743
1744                 video_url = mediaURL
1745
1746                 mobj = re.search(r'<title>(.*)</title>', webpage)
1747                 if mobj is None:
1748                         self._downloader.trouble(u'ERROR: unable to extract title')
1749                         return
1750                 video_title = mobj.group(1).decode('utf-8')
1751                 video_title = sanitize_title(video_title)
1752                 simple_title = _simplify_title(video_title)
1753
1754                 # Extract video description
1755                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1756                 if mobj is None:
1757                         self._downloader.trouble(u'ERROR: unable to extract video description')
1758                         return
1759                 video_description = mobj.group(1).decode('utf-8')
1760                 if not video_description:
1761                         video_description = 'No description available.'
1762
1763                 # Extract video thumbnail
1764                 if self._downloader.params.get('forcethumbnail', False):
1765                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1766                         try:
1767                                 webpage = urllib2.urlopen(request).read()
1768                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1769                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1770                                 return
1771                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1772                         if mobj is None:
1773                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1774                                 return
1775                         video_thumbnail = mobj.group(1)
1776                 else:   # we need something to pass to process_info
1777                         video_thumbnail = ''
1778
1779                 try:
1780                         # Process video information
1781                         self._downloader.process_info({
1782                                 'id':           video_id.decode('utf-8'),
1783                                 'url':          video_url.decode('utf-8'),
1784                                 'uploader':     u'NA',
1785                                 'upload_date':  u'NA',
1786                                 'title':        video_title,
1787                                 'stitle':       simple_title,
1788                                 'ext':          video_extension.decode('utf-8'),
1789                                 'format':       u'NA',
1790                                 'player_url':   None,
1791                         })
1792                 except UnavailableVideoError:
1793                         self._downloader.trouble(u'\nERROR: unable to download video')
1794
1795
1796 class PhotobucketIE(InfoExtractor):
1797         """Information extractor for photobucket.com."""
1798
1799         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1800         IE_NAME = u'photobucket'
1801
1802         def __init__(self, downloader=None):
1803                 InfoExtractor.__init__(self, downloader)
1804
1805         def report_download_webpage(self, video_id):
1806                 """Report webpage download."""
1807                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1808
1809         def report_extraction(self, video_id):
1810                 """Report information extraction."""
1811                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1812
1813         def _real_extract(self, url):
1814                 # Extract id from URL
1815                 mobj = re.match(self._VALID_URL, url)
1816                 if mobj is None:
1817                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1818                         return
1819
1820                 # At this point we have a new video
1821                 self._downloader.increment_downloads()
1822                 video_id = mobj.group(1)
1823
1824                 video_extension = 'flv'
1825
1826                 # Retrieve video webpage to extract further information
1827                 request = urllib2.Request(url)
1828                 try:
1829                         self.report_download_webpage(video_id)
1830                         webpage = urllib2.urlopen(request).read()
1831                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1832                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1833                         return
1834
1835                 # Extract URL, uploader, and title from webpage
1836                 self.report_extraction(video_id)
1837                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1838                 if mobj is None:
1839                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1840                         return
1841                 mediaURL = urllib.unquote(mobj.group(1))
1842
1843                 video_url = mediaURL
1844
1845                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1846                 if mobj is None:
1847                         self._downloader.trouble(u'ERROR: unable to extract title')
1848                         return
1849                 video_title = mobj.group(1).decode('utf-8')
1850                 video_title = sanitize_title(video_title)
1851                 simple_title = _simplify_title(vide_title)
1852
1853                 video_uploader = mobj.group(2).decode('utf-8')
1854
1855                 try:
1856                         # Process video information
1857                         self._downloader.process_info({
1858                                 'id':           video_id.decode('utf-8'),
1859                                 'url':          video_url.decode('utf-8'),
1860                                 'uploader':     video_uploader,
1861                                 'upload_date':  u'NA',
1862                                 'title':        video_title,
1863                                 'stitle':       simple_title,
1864                                 'ext':          video_extension.decode('utf-8'),
1865                                 'format':       u'NA',
1866                                 'player_url':   None,
1867                         })
1868                 except UnavailableVideoError:
1869                         self._downloader.trouble(u'\nERROR: unable to download video')
1870
1871
1872 class YahooIE(InfoExtractor):
1873         """Information extractor for video.yahoo.com."""
1874
1875         # _VALID_URL matches all Yahoo! Video URLs
1876         # _VPAGE_URL matches only the extractable '/watch/' URLs
1877         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1878         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1879         IE_NAME = u'video.yahoo'
1880
1881         def __init__(self, downloader=None):
1882                 InfoExtractor.__init__(self, downloader)
1883
1884         def report_download_webpage(self, video_id):
1885                 """Report webpage download."""
1886                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1887
1888         def report_extraction(self, video_id):
1889                 """Report information extraction."""
1890                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1891
1892         def _real_extract(self, url, new_video=True):
1893                 # Extract ID from URL
1894                 mobj = re.match(self._VALID_URL, url)
1895                 if mobj is None:
1896                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1897                         return
1898
1899                 # At this point we have a new video
1900                 self._downloader.increment_downloads()
1901                 video_id = mobj.group(2)
1902                 video_extension = 'flv'
1903
1904                 # Rewrite valid but non-extractable URLs as
1905                 # extractable English language /watch/ URLs
1906                 if re.match(self._VPAGE_URL, url) is None:
1907                         request = urllib2.Request(url)
1908                         try:
1909                                 webpage = urllib2.urlopen(request).read()
1910                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1911                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1912                                 return
1913
1914                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1915                         if mobj is None:
1916                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1917                                 return
1918                         yahoo_id = mobj.group(1)
1919
1920                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1921                         if mobj is None:
1922                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1923                                 return
1924                         yahoo_vid = mobj.group(1)
1925
1926                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1927                         return self._real_extract(url, new_video=False)
1928
1929                 # Retrieve video webpage to extract further information
1930                 request = urllib2.Request(url)
1931                 try:
1932                         self.report_download_webpage(video_id)
1933                         webpage = urllib2.urlopen(request).read()
1934                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1935                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1936                         return
1937
1938                 # Extract uploader and title from webpage
1939                 self.report_extraction(video_id)
1940                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1941                 if mobj is None:
1942                         self._downloader.trouble(u'ERROR: unable to extract video title')
1943                         return
1944                 video_title = mobj.group(1).decode('utf-8')
1945                 simple_title = _simplify_title(video_title)
1946
1947                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1948                 if mobj is None:
1949                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1950                         return
1951                 video_uploader = mobj.group(1).decode('utf-8')
1952
1953                 # Extract video thumbnail
1954                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1955                 if mobj is None:
1956                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1957                         return
1958                 video_thumbnail = mobj.group(1).decode('utf-8')
1959
1960                 # Extract video description
1961                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1962                 if mobj is None:
1963                         self._downloader.trouble(u'ERROR: unable to extract video description')
1964                         return
1965                 video_description = mobj.group(1).decode('utf-8')
1966                 if not video_description:
1967                         video_description = 'No description available.'
1968
1969                 # Extract video height and width
1970                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1971                 if mobj is None:
1972                         self._downloader.trouble(u'ERROR: unable to extract video height')
1973                         return
1974                 yv_video_height = mobj.group(1)
1975
1976                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1977                 if mobj is None:
1978                         self._downloader.trouble(u'ERROR: unable to extract video width')
1979                         return
1980                 yv_video_width = mobj.group(1)
1981
1982                 # Retrieve video playlist to extract media URL
1983                 # I'm not completely sure what all these options are, but we
1984                 # seem to need most of them, otherwise the server sends a 401.
1985                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1986                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1987                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1988                                 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1989                                 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1990                 try:
1991                         self.report_download_webpage(video_id)
1992                         webpage = urllib2.urlopen(request).read()
1993                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1994                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1995                         return
1996
1997                 # Extract media URL from playlist XML
1998                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1999                 if mobj is None:
2000                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
2001                         return
2002                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
2003                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
2004
2005                 try:
2006                         # Process video information
2007                         self._downloader.process_info({
2008                                 'id':           video_id.decode('utf-8'),
2009                                 'url':          video_url,
2010                                 'uploader':     video_uploader,
2011                                 'upload_date':  u'NA',
2012                                 'title':        video_title,
2013                                 'stitle':       simple_title,
2014                                 'ext':          video_extension.decode('utf-8'),
2015                                 'thumbnail':    video_thumbnail.decode('utf-8'),
2016                                 'description':  video_description,
2017                                 'thumbnail':    video_thumbnail,
2018                                 'player_url':   None,
2019                         })
2020                 except UnavailableVideoError:
2021                         self._downloader.trouble(u'\nERROR: unable to download video')
2022
2023
2024 class VimeoIE(InfoExtractor):
2025         """Information extractor for vimeo.com."""
2026
2027         # _VALID_URL matches Vimeo URLs
2028         _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
2029         IE_NAME = u'vimeo'
2030
2031         def __init__(self, downloader=None):
2032                 InfoExtractor.__init__(self, downloader)
2033
2034         def report_download_webpage(self, video_id):
2035                 """Report webpage download."""
2036                 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
2037
2038         def report_extraction(self, video_id):
2039                 """Report information extraction."""
2040                 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
2041
2042         def _real_extract(self, url, new_video=True):
2043                 # Extract ID from URL
2044                 mobj = re.match(self._VALID_URL, url)
2045                 if mobj is None:
2046                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2047                         return
2048
2049                 # At this point we have a new video
2050                 self._downloader.increment_downloads()
2051                 video_id = mobj.group(1)
2052
2053                 # Retrieve video webpage to extract further information
2054                 request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
2055                 try:
2056                         self.report_download_webpage(video_id)
2057                         webpage = urllib2.urlopen(request).read()
2058                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2059                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2060                         return
2061
2062                 # Now we begin extracting as much information as we can from what we
2063                 # retrieved. First we extract the information common to all extractors,
2064                 # and latter we extract those that are Vimeo specific.
2065                 self.report_extraction(video_id)
2066
2067                 # Extract title
2068                 mobj = re.search(r'<caption>(.*?)</caption>', webpage)
2069                 if mobj is None:
2070                         self._downloader.trouble(u'ERROR: unable to extract video title')
2071                         return
2072                 video_title = mobj.group(1).decode('utf-8')
2073                 simple_title = _simplify_title(video_title)
2074
2075                 # Extract uploader
2076                 mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
2077                 if mobj is None:
2078                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
2079                         return
2080                 video_uploader = mobj.group(1).decode('utf-8')
2081
2082                 # Extract video thumbnail
2083                 mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
2084                 if mobj is None:
2085                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
2086                         return
2087                 video_thumbnail = mobj.group(1).decode('utf-8')
2088
2089                 # # Extract video description
2090                 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
2091                 # if mobj is None:
2092                 #       self._downloader.trouble(u'ERROR: unable to extract video description')
2093                 #       return
2094                 # video_description = mobj.group(1).decode('utf-8')
2095                 # if not video_description: video_description = 'No description available.'
2096                 video_description = 'Foo.'
2097
2098                 # Vimeo specific: extract request signature
2099                 mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
2100                 if mobj is None:
2101                         self._downloader.trouble(u'ERROR: unable to extract request signature')
2102                         return
2103                 sig = mobj.group(1).decode('utf-8')
2104
2105                 # Vimeo specific: extract video quality information
2106                 mobj = re.search(r'<isHD>(\d+)</isHD>', webpage)
2107                 if mobj is None:
2108                         self._downloader.trouble(u'ERROR: unable to extract video quality information')
2109                         return
2110                 quality = mobj.group(1).decode('utf-8')
2111
2112                 if int(quality) == 1:
2113                         quality = 'hd'
2114                 else:
2115                         quality = 'sd'
2116
2117                 # Vimeo specific: Extract request signature expiration
2118                 mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
2119                 if mobj is None:
2120                         self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
2121                         return
2122                 sig_exp = mobj.group(1).decode('utf-8')
2123
2124                 video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (video_id, sig, sig_exp, quality)
2125
2126                 try:
2127                         # Process video information
2128                         self._downloader.process_info({
2129                                 'id':           video_id.decode('utf-8'),
2130                                 'url':          video_url,
2131                                 'uploader':     video_uploader,
2132                                 'upload_date':  u'NA',
2133                                 'title':        video_title,
2134                                 'stitle':       simple_title,
2135                                 'ext':          u'mp4',
2136                                 'thumbnail':    video_thumbnail.decode('utf-8'),
2137                                 'description':  video_description,
2138                                 'thumbnail':    video_thumbnail,
2139                                 'description':  video_description,
2140                                 'player_url':   None,
2141                         })
2142                 except UnavailableVideoError:
2143                         self._downloader.trouble(u'ERROR: unable to download video')
2144
2145
2146 class GenericIE(InfoExtractor):
2147         """Generic last-resort information extractor."""
2148
2149         _VALID_URL = r'.*'
2150         IE_NAME = u'generic'
2151
2152         def __init__(self, downloader=None):
2153                 InfoExtractor.__init__(self, downloader)
2154
2155         def report_download_webpage(self, video_id):
2156                 """Report webpage download."""
2157                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
2158                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
2159
2160         def report_extraction(self, video_id):
2161                 """Report information extraction."""
2162                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
2163
2164         def _real_extract(self, url):
2165                 # At this point we have a new video
2166                 self._downloader.increment_downloads()
2167
2168                 video_id = url.split('/')[-1]
2169                 request = urllib2.Request(url)
2170                 try:
2171                         self.report_download_webpage(video_id)
2172                         webpage = urllib2.urlopen(request).read()
2173                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2174                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2175                         return
2176                 except ValueError, err:
2177                         # since this is the last-resort InfoExtractor, if
2178                         # this error is thrown, it'll be thrown here
2179                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2180                         return
2181
2182                 self.report_extraction(video_id)
2183                 # Start with something easy: JW Player in SWFObject
2184                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
2185                 if mobj is None:
2186                         # Broaden the search a little bit
2187                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
2188                 if mobj is None:
2189                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2190                         return
2191
2192                 # It's possible that one of the regexes
2193                 # matched, but returned an empty group:
2194                 if mobj.group(1) is None:
2195                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2196                         return
2197
2198                 video_url = urllib.unquote(mobj.group(1))
2199                 video_id = os.path.basename(video_url)
2200
2201                 # here's a fun little line of code for you:
2202                 video_extension = os.path.splitext(video_id)[1][1:]
2203                 video_id = os.path.splitext(video_id)[0]
2204
2205                 # it's tempting to parse this further, but you would
2206                 # have to take into account all the variations like
2207                 #   Video Title - Site Name
2208                 #   Site Name | Video Title
2209                 #   Video Title - Tagline | Site Name
2210                 # and so on and so forth; it's just not practical
2211                 mobj = re.search(r'<title>(.*)</title>', webpage)
2212                 if mobj is None:
2213                         self._downloader.trouble(u'ERROR: unable to extract title')
2214                         return
2215                 video_title = mobj.group(1).decode('utf-8')
2216                 video_title = sanitize_title(video_title)
2217                 simple_title = _simplify_title(video_title)
2218
2219                 # video uploader is domain name
2220                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
2221                 if mobj is None:
2222                         self._downloader.trouble(u'ERROR: unable to extract title')
2223                         return
2224                 video_uploader = mobj.group(1).decode('utf-8')
2225
2226                 try:
2227                         # Process video information
2228                         self._downloader.process_info({
2229                                 'id':           video_id.decode('utf-8'),
2230                                 'url':          video_url.decode('utf-8'),
2231                                 'uploader':     video_uploader,
2232                                 'upload_date':  u'NA',
2233                                 'title':        video_title,
2234                                 'stitle':       simple_title,
2235                                 'ext':          video_extension.decode('utf-8'),
2236                                 'format':       u'NA',
2237                                 'player_url':   None,
2238                         })
2239                 except UnavailableVideoError, err:
2240                         self._downloader.trouble(u'\nERROR: unable to download video')
2241
2242
2243 class YoutubeSearchIE(InfoExtractor):
2244         """Information Extractor for YouTube search queries."""
2245         _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
2246         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
2247         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
2248         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2249         _youtube_ie = None
2250         _max_youtube_results = 1000
2251         IE_NAME = u'youtube:search'
2252
2253         def __init__(self, youtube_ie, downloader=None):
2254                 InfoExtractor.__init__(self, downloader)
2255                 self._youtube_ie = youtube_ie
2256
2257         def report_download_page(self, query, pagenum):
2258                 """Report attempt to download playlist page with given number."""
2259                 query = query.decode(preferredencoding())
2260                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
2261
2262         def _real_initialize(self):
2263                 self._youtube_ie.initialize()
2264
2265         def _real_extract(self, query):
2266                 mobj = re.match(self._VALID_URL, query)
2267                 if mobj is None:
2268                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2269                         return
2270
2271                 prefix, query = query.split(':')
2272                 prefix = prefix[8:]
2273                 query = query.encode('utf-8')
2274                 if prefix == '':
2275                         self._download_n_results(query, 1)
2276                         return
2277                 elif prefix == 'all':
2278                         self._download_n_results(query, self._max_youtube_results)
2279                         return
2280                 else:
2281                         try:
2282                                 n = long(prefix)
2283                                 if n <= 0:
2284                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2285                                         return
2286                                 elif n > self._max_youtube_results:
2287                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
2288                                         n = self._max_youtube_results
2289                                 self._download_n_results(query, n)
2290                                 return
2291                         except ValueError: # parsing prefix as integer fails
2292                                 self._download_n_results(query, 1)
2293                                 return
2294
2295         def _download_n_results(self, query, n):
2296                 """Downloads a specified number of results for a query"""
2297
2298                 video_ids = []
2299                 already_seen = set()
2300                 pagenum = 1
2301
2302                 while True:
2303                         self.report_download_page(query, pagenum)
2304                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2305                         request = urllib2.Request(result_url)
2306                         try:
2307                                 page = urllib2.urlopen(request).read()
2308                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2309                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2310                                 return
2311
2312                         # Extract video identifiers
2313                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2314                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2315                                 if video_id not in already_seen:
2316                                         video_ids.append(video_id)
2317                                         already_seen.add(video_id)
2318                                         if len(video_ids) == n:
2319                                                 # Specified n videos reached
2320                                                 for id in video_ids:
2321                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2322                                                 return
2323
2324                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2325                                 for id in video_ids:
2326                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2327                                 return
2328
2329                         pagenum = pagenum + 1
2330
2331
2332 class GoogleSearchIE(InfoExtractor):
2333         """Information Extractor for Google Video search queries."""
2334         _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'
2335         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2336         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2337         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2338         _google_ie = None
2339         _max_google_results = 1000
2340         IE_NAME = u'video.google:search'
2341
2342         def __init__(self, google_ie, downloader=None):
2343                 InfoExtractor.__init__(self, downloader)
2344                 self._google_ie = google_ie
2345
2346         def report_download_page(self, query, pagenum):
2347                 """Report attempt to download playlist page with given number."""
2348                 query = query.decode(preferredencoding())
2349                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
2350
2351         def _real_initialize(self):
2352                 self._google_ie.initialize()
2353
2354         def _real_extract(self, query):
2355                 mobj = re.match(self._VALID_URL, query)
2356                 if mobj is None:
2357                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2358                         return
2359
2360                 prefix, query = query.split(':')
2361                 prefix = prefix[8:]
2362                 query = query.encode('utf-8')
2363                 if prefix == '':
2364                         self._download_n_results(query, 1)
2365                         return
2366                 elif prefix == 'all':
2367                         self._download_n_results(query, self._max_google_results)
2368                         return
2369                 else:
2370                         try:
2371                                 n = long(prefix)
2372                                 if n <= 0:
2373                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2374                                         return
2375                                 elif n > self._max_google_results:
2376                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
2377                                         n = self._max_google_results
2378                                 self._download_n_results(query, n)
2379                                 return
2380                         except ValueError: # parsing prefix as integer fails
2381                                 self._download_n_results(query, 1)
2382                                 return
2383
2384         def _download_n_results(self, query, n):
2385                 """Downloads a specified number of results for a query"""
2386
2387                 video_ids = []
2388                 already_seen = set()
2389                 pagenum = 1
2390
2391                 while True:
2392                         self.report_download_page(query, pagenum)
2393                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2394                         request = urllib2.Request(result_url)
2395                         try:
2396                                 page = urllib2.urlopen(request).read()
2397                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2398                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2399                                 return
2400
2401                         # Extract video identifiers
2402                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2403                                 video_id = mobj.group(1)
2404                                 if video_id not in already_seen:
2405                                         video_ids.append(video_id)
2406                                         already_seen.add(video_id)
2407                                         if len(video_ids) == n:
2408                                                 # Specified n videos reached
2409                                                 for id in video_ids:
2410                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2411                                                 return
2412
2413                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2414                                 for id in video_ids:
2415                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2416                                 return
2417
2418                         pagenum = pagenum + 1
2419
2420
2421 class YahooSearchIE(InfoExtractor):
2422         """Information Extractor for Yahoo! Video search queries."""
2423         _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
2424         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2425         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2426         _MORE_PAGES_INDICATOR = r'\s*Next'
2427         _yahoo_ie = None
2428         _max_yahoo_results = 1000
2429         IE_NAME = u'video.yahoo:search'
2430
2431         def __init__(self, yahoo_ie, downloader=None):
2432                 InfoExtractor.__init__(self, downloader)
2433                 self._yahoo_ie = yahoo_ie
2434
2435         def report_download_page(self, query, pagenum):
2436                 """Report attempt to download playlist page with given number."""
2437                 query = query.decode(preferredencoding())
2438                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2439
2440         def _real_initialize(self):
2441                 self._yahoo_ie.initialize()
2442
2443         def _real_extract(self, query):
2444                 mobj = re.match(self._VALID_URL, query)
2445                 if mobj is None:
2446                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2447                         return
2448
2449                 prefix, query = query.split(':')
2450                 prefix = prefix[8:]
2451                 query = query.encode('utf-8')
2452                 if prefix == '':
2453                         self._download_n_results(query, 1)
2454                         return
2455                 elif prefix == 'all':
2456                         self._download_n_results(query, self._max_yahoo_results)
2457                         return
2458                 else:
2459                         try:
2460                                 n = long(prefix)
2461                                 if n <= 0:
2462                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2463                                         return
2464                                 elif n > self._max_yahoo_results:
2465                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
2466                                         n = self._max_yahoo_results
2467                                 self._download_n_results(query, n)
2468                                 return
2469                         except ValueError: # parsing prefix as integer fails
2470                                 self._download_n_results(query, 1)
2471                                 return
2472
2473         def _download_n_results(self, query, n):
2474                 """Downloads a specified number of results for a query"""
2475
2476                 video_ids = []
2477                 already_seen = set()
2478                 pagenum = 1
2479
2480                 while True:
2481                         self.report_download_page(query, pagenum)
2482                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2483                         request = urllib2.Request(result_url)
2484                         try:
2485                                 page = urllib2.urlopen(request).read()
2486                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2487                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2488                                 return
2489
2490                         # Extract video identifiers
2491                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2492                                 video_id = mobj.group(1)
2493                                 if video_id not in already_seen:
2494                                         video_ids.append(video_id)
2495                                         already_seen.add(video_id)
2496                                         if len(video_ids) == n:
2497                                                 # Specified n videos reached
2498                                                 for id in video_ids:
2499                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2500                                                 return
2501
2502                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2503                                 for id in video_ids:
2504                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2505                                 return
2506
2507                         pagenum = pagenum + 1
2508
2509
2510 class YoutubePlaylistIE(InfoExtractor):
2511         """Information Extractor for YouTube playlists."""
2512
2513         _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2514         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2515         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2516         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2517         _youtube_ie = None
2518         IE_NAME = u'youtube:playlist'
2519
2520         def __init__(self, youtube_ie, downloader=None):
2521                 InfoExtractor.__init__(self, downloader)
2522                 self._youtube_ie = youtube_ie
2523
2524         def report_download_page(self, playlist_id, pagenum):
2525                 """Report attempt to download playlist page with given number."""
2526                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2527
2528         def _real_initialize(self):
2529                 self._youtube_ie.initialize()
2530
2531         def _real_extract(self, url):
2532                 # Extract playlist id
2533                 mobj = re.match(self._VALID_URL, url)
2534                 if mobj is None:
2535                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2536                         return
2537
2538                 # Single video case
2539                 if mobj.group(3) is not None:
2540                         self._youtube_ie.extract(mobj.group(3))
2541                         return
2542
2543                 # Download playlist pages
2544                 # prefix is 'p' as default for playlists but there are other types that need extra care
2545                 playlist_prefix = mobj.group(1)
2546                 if playlist_prefix == 'a':
2547                         playlist_access = 'artist'
2548                 else:
2549                         playlist_prefix = 'p'
2550                         playlist_access = 'view_play_list'
2551                 playlist_id = mobj.group(2)
2552                 video_ids = []
2553                 pagenum = 1
2554
2555                 while True:
2556                         self.report_download_page(playlist_id, pagenum)
2557                         url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
2558                         request = urllib2.Request(url)
2559                         try:
2560                                 page = urllib2.urlopen(request).read()
2561                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2562                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2563                                 return
2564
2565                         # Extract video identifiers
2566                         ids_in_page = []
2567                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2568                                 if mobj.group(1) not in ids_in_page:
2569                                         ids_in_page.append(mobj.group(1))
2570                         video_ids.extend(ids_in_page)
2571
2572                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2573                                 break
2574                         pagenum = pagenum + 1
2575
2576                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2577                 playlistend = self._downloader.params.get('playlistend', -1)
2578                 video_ids = video_ids[playliststart:playlistend]
2579
2580                 for id in video_ids:
2581                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2582                 return
2583
2584
2585 class YoutubeUserIE(InfoExtractor):
2586         """Information Extractor for YouTube users."""
2587
2588         _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2589         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2590         _GDATA_PAGE_SIZE = 50
2591         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2592         _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
2593         _youtube_ie = None
2594         IE_NAME = u'youtube:user'
2595
2596         def __init__(self, youtube_ie, downloader=None):
2597                 InfoExtractor.__init__(self, downloader)
2598                 self._youtube_ie = youtube_ie
2599
2600         def report_download_page(self, username, start_index):
2601                 """Report attempt to download user page."""
2602                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2603                                 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2604
2605         def _real_initialize(self):
2606                 self._youtube_ie.initialize()
2607
2608         def _real_extract(self, url):
2609                 # Extract username
2610                 mobj = re.match(self._VALID_URL, url)
2611                 if mobj is None:
2612                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2613                         return
2614
2615                 username = mobj.group(1)
2616
2617                 # Download video ids using YouTube Data API. Result size per
2618                 # query is limited (currently to 50 videos) so we need to query
2619                 # page by page until there are no video ids - it means we got
2620                 # all of them.
2621
2622                 video_ids = []
2623                 pagenum = 0
2624
2625                 while True:
2626                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2627                         self.report_download_page(username, start_index)
2628
2629                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2630
2631                         try:
2632                                 page = urllib2.urlopen(request).read()
2633                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2634                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2635                                 return
2636
2637                         # Extract video identifiers
2638                         ids_in_page = []
2639
2640                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2641                                 if mobj.group(1) not in ids_in_page:
2642                                         ids_in_page.append(mobj.group(1))
2643
2644                         video_ids.extend(ids_in_page)
2645
2646                         # A little optimization - if current page is not
2647                         # "full", ie. does not contain PAGE_SIZE video ids then
2648                         # we can assume that this page is the last one - there
2649                         # are no more ids on further pages - no need to query
2650                         # again.
2651
2652                         if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2653                                 break
2654
2655                         pagenum += 1
2656
2657                 all_ids_count = len(video_ids)
2658                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2659                 playlistend = self._downloader.params.get('playlistend', -1)
2660
2661                 if playlistend == -1:
2662                         video_ids = video_ids[playliststart:]
2663                 else:
2664                         video_ids = video_ids[playliststart:playlistend]
2665
2666                 self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2667                                 (username, all_ids_count, len(video_ids)))
2668
2669                 for video_id in video_ids:
2670                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2671
2672
2673 class DepositFilesIE(InfoExtractor):
2674         """Information extractor for depositfiles.com"""
2675
2676         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
2677         IE_NAME = u'DepositFiles'
2678
2679         def __init__(self, downloader=None):
2680                 InfoExtractor.__init__(self, downloader)
2681
2682         def report_download_webpage(self, file_id):
2683                 """Report webpage download."""
2684                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2685
2686         def report_extraction(self, file_id):
2687                 """Report information extraction."""
2688                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2689
2690         def _real_extract(self, url):
2691                 # At this point we have a new file
2692                 self._downloader.increment_downloads()
2693
2694                 file_id = url.split('/')[-1]
2695                 # Rebuild url in english locale
2696                 url = 'http://depositfiles.com/en/files/' + file_id
2697
2698                 # Retrieve file webpage with 'Free download' button pressed
2699                 free_download_indication = { 'gateway_result' : '1' }
2700                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2701                 try:
2702                         self.report_download_webpage(file_id)
2703                         webpage = urllib2.urlopen(request).read()
2704                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2705                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2706                         return
2707
2708                 # Search for the real file URL
2709                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2710                 if (mobj is None) or (mobj.group(1) is None):
2711                         # Try to figure out reason of the error.
2712                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2713                         if (mobj is not None) and (mobj.group(1) is not None):
2714                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2715                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2716                         else:
2717                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2718                         return
2719
2720                 file_url = mobj.group(1)
2721                 file_extension = os.path.splitext(file_url)[1][1:]
2722
2723                 # Search for file title
2724                 mobj = re.search(r'<b title="(.*?)">', webpage)
2725                 if mobj is None:
2726                         self._downloader.trouble(u'ERROR: unable to extract title')
2727                         return
2728                 file_title = mobj.group(1).decode('utf-8')
2729
2730                 try:
2731                         # Process file information
2732                         self._downloader.process_info({
2733                                 'id':           file_id.decode('utf-8'),
2734                                 'url':          file_url.decode('utf-8'),
2735                                 'uploader':     u'NA',
2736                                 'upload_date':  u'NA',
2737                                 'title':        file_title,
2738                                 'stitle':       file_title,
2739                                 'ext':          file_extension.decode('utf-8'),
2740                                 'format':       u'NA',
2741                                 'player_url':   None,
2742                         })
2743                 except UnavailableVideoError, err:
2744                         self._downloader.trouble(u'ERROR: unable to download file')
2745
2746
2747 class FacebookIE(InfoExtractor):
2748         """Information Extractor for Facebook"""
2749
2750         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2751         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2752         _NETRC_MACHINE = 'facebook'
2753         _available_formats = ['video', 'highqual', 'lowqual']
2754         _video_extensions = {
2755                 'video': 'mp4',
2756                 'highqual': 'mp4',
2757                 'lowqual': 'mp4',
2758         }
2759         IE_NAME = u'facebook'
2760
2761         def __init__(self, downloader=None):
2762                 InfoExtractor.__init__(self, downloader)
2763
2764         def _reporter(self, message):
2765                 """Add header and report message."""
2766                 self._downloader.to_screen(u'[facebook] %s' % message)
2767
2768         def report_login(self):
2769                 """Report attempt to log in."""
2770                 self._reporter(u'Logging in')
2771
2772         def report_video_webpage_download(self, video_id):
2773                 """Report attempt to download video webpage."""
2774                 self._reporter(u'%s: Downloading video webpage' % video_id)
2775
2776         def report_information_extraction(self, video_id):
2777                 """Report attempt to extract video information."""
2778                 self._reporter(u'%s: Extracting video information' % video_id)
2779
2780         def _parse_page(self, video_webpage):
2781                 """Extract video information from page"""
2782                 # General data
2783                 data = {'title': r'\("video_title", "(.*?)"\)',
2784                         'description': r'<div class="datawrap">(.*?)</div>',
2785                         'owner': r'\("video_owner_name", "(.*?)"\)',
2786                         'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2787                         }
2788                 video_info = {}
2789                 for piece in data.keys():
2790                         mobj = re.search(data[piece], video_webpage)
2791                         if mobj is not None:
2792                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2793
2794                 # Video urls
2795                 video_urls = {}
2796                 for fmt in self._available_formats:
2797                         mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2798                         if mobj is not None:
2799                                 # URL is in a Javascript segment inside an escaped Unicode format within
2800                                 # the generally utf-8 page
2801                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2802                 video_info['video_urls'] = video_urls
2803
2804                 return video_info
2805
2806         def _real_initialize(self):
2807                 if self._downloader is None:
2808                         return
2809
2810                 useremail = None
2811                 password = None
2812                 downloader_params = self._downloader.params
2813
2814                 # Attempt to use provided username and password or .netrc data
2815                 if downloader_params.get('username', None) is not None:
2816                         useremail = downloader_params['username']
2817                         password = downloader_params['password']
2818                 elif downloader_params.get('usenetrc', False):
2819                         try:
2820                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2821                                 if info is not None:
2822                                         useremail = info[0]
2823                                         password = info[2]
2824                                 else:
2825                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2826                         except (IOError, netrc.NetrcParseError), err:
2827                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2828                                 return
2829
2830                 if useremail is None:
2831                         return
2832
2833                 # Log in
2834                 login_form = {
2835                         'email': useremail,
2836                         'pass': password,
2837                         'login': 'Log+In'
2838                         }
2839                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2840                 try:
2841                         self.report_login()
2842                         login_results = urllib2.urlopen(request).read()
2843                         if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2844                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2845                                 return
2846                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2847                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2848                         return
2849
2850         def _real_extract(self, url):
2851                 mobj = re.match(self._VALID_URL, url)
2852                 if mobj is None:
2853                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2854                         return
2855                 video_id = mobj.group('ID')
2856
2857                 # Get video webpage
2858                 self.report_video_webpage_download(video_id)
2859                 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2860                 try:
2861                         page = urllib2.urlopen(request)
2862                         video_webpage = page.read()
2863                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2864                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2865                         return
2866
2867                 # Start extracting information
2868                 self.report_information_extraction(video_id)
2869
2870                 # Extract information
2871                 video_info = self._parse_page(video_webpage)
2872
2873                 # uploader
2874                 if 'owner' not in video_info:
2875                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2876                         return
2877                 video_uploader = video_info['owner']
2878
2879                 # title
2880                 if 'title' not in video_info:
2881                         self._downloader.trouble(u'ERROR: unable to extract video title')
2882                         return
2883                 video_title = video_info['title']
2884                 video_title = video_title.decode('utf-8')
2885                 video_title = sanitize_title(video_title)
2886
2887                 simple_title = _simplify_title(video_title)
2888
2889                 # thumbnail image
2890                 if 'thumbnail' not in video_info:
2891                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2892                         video_thumbnail = ''
2893                 else:
2894                         video_thumbnail = video_info['thumbnail']
2895
2896                 # upload date
2897                 upload_date = u'NA'
2898                 if 'upload_date' in video_info:
2899                         upload_time = video_info['upload_date']
2900                         timetuple = email.utils.parsedate_tz(upload_time)
2901                         if timetuple is not None:
2902                                 try:
2903                                         upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2904                                 except:
2905                                         pass
2906
2907                 # description
2908                 video_description = video_info.get('description', 'No description available.')
2909
2910                 url_map = video_info['video_urls']
2911                 if len(url_map.keys()) > 0:
2912                         # Decide which formats to download
2913                         req_format = self._downloader.params.get('format', None)
2914                         format_limit = self._downloader.params.get('format_limit', None)
2915
2916                         if format_limit is not None and format_limit in self._available_formats:
2917                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
2918                         else:
2919                                 format_list = self._available_formats
2920                         existing_formats = [x for x in format_list if x in url_map]
2921                         if len(existing_formats) == 0:
2922                                 self._downloader.trouble(u'ERROR: no known formats available for video')
2923                                 return
2924                         if req_format is None:
2925                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2926                         elif req_format == 'worst':
2927                                 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
2928                         elif req_format == '-1':
2929                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2930                         else:
2931                                 # Specific format
2932                                 if req_format not in url_map:
2933                                         self._downloader.trouble(u'ERROR: requested format not available')
2934                                         return
2935                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
2936
2937                 for format_param, video_real_url in video_url_list:
2938
2939                         # At this point we have a new video
2940                         self._downloader.increment_downloads()
2941
2942                         # Extension
2943                         video_extension = self._video_extensions.get(format_param, 'mp4')
2944
2945                         try:
2946                                 # Process video information
2947                                 self._downloader.process_info({
2948                                         'id':           video_id.decode('utf-8'),
2949                                         'url':          video_real_url.decode('utf-8'),
2950                                         'uploader':     video_uploader.decode('utf-8'),
2951                                         'upload_date':  upload_date,
2952                                         'title':        video_title,
2953                                         'stitle':       simple_title,
2954                                         'ext':          video_extension.decode('utf-8'),
2955                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
2956                                         'thumbnail':    video_thumbnail.decode('utf-8'),
2957                                         'description':  video_description.decode('utf-8'),
2958                                         'player_url':   None,
2959                                 })
2960                         except UnavailableVideoError, err:
2961                                 self._downloader.trouble(u'\nERROR: unable to download video')
2962
2963 class BlipTVIE(InfoExtractor):
2964         """Information extractor for blip.tv"""
2965
2966         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
2967         _URL_EXT = r'^.*\.([a-z0-9]+)$'
2968         IE_NAME = u'blip.tv'
2969
2970         def report_extraction(self, file_id):
2971                 """Report information extraction."""
2972                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
2973
2974         def report_direct_download(self, title):
2975                 """Report information extraction."""
2976                 self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title))
2977
2978         def _real_extract(self, url):
2979                 mobj = re.match(self._VALID_URL, url)
2980                 if mobj is None:
2981                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2982                         return
2983
2984                 if '?' in url:
2985                         cchar = '&'
2986                 else:
2987                         cchar = '?'
2988                 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
2989                 request = urllib2.Request(json_url)
2990                 self.report_extraction(mobj.group(1))
2991                 info = None
2992                 try:
2993                         urlh = urllib2.urlopen(request)
2994                         if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
2995                                 basename = url.split('/')[-1]
2996                                 title,ext = os.path.splitext(basename)
2997                                 title = title.decode('UTF-8')
2998                                 ext = ext.replace('.', '')
2999                                 self.report_direct_download(title)
3000                                 info = {
3001                                         'id': title,
3002                                         'url': url,
3003                                         'title': title,
3004                                         'stitle': _simplify_title(title),
3005                                         'ext': ext,
3006                                         'urlhandle': urlh
3007                                 }
3008                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3009                         self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
3010                         return
3011                 if info is None: # Regular URL
3012                         try:
3013                                 json_code = urlh.read()
3014                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3015                                 self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
3016                                 return
3017
3018                         try:
3019                                 json_data = json.loads(json_code)
3020                                 if 'Post' in json_data:
3021                                         data = json_data['Post']
3022                                 else:
3023                                         data = json_data
3024         
3025                                 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
3026                                 video_url = data['media']['url']
3027                                 umobj = re.match(self._URL_EXT, video_url)
3028                                 if umobj is None:
3029                                         raise ValueError('Can not determine filename extension')
3030                                 ext = umobj.group(1)
3031         
3032                                 info = {
3033                                         'id': data['item_id'],
3034                                         'url': video_url,
3035                                         'uploader': data['display_name'],
3036                                         'upload_date': upload_date,
3037                                         'title': data['title'],
3038                                         'stitle': _simplify_title(data['title']),
3039                                         'ext': ext,
3040                                         'format': data['media']['mimeType'],
3041                                         'thumbnail': data['thumbnailUrl'],
3042                                         'description': data['description'],
3043                                         'player_url': data['embedUrl']
3044                                 }
3045                         except (ValueError,KeyError), err:
3046                                 self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
3047                                 return
3048
3049                 self._downloader.increment_downloads()
3050
3051                 try:
3052                         self._downloader.process_info(info)
3053                 except UnavailableVideoError, err:
3054                         self._downloader.trouble(u'\nERROR: unable to download video')
3055
3056
3057 class MyVideoIE(InfoExtractor):
3058         """Information Extractor for myvideo.de."""
3059
3060         _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
3061         IE_NAME = u'myvideo'
3062
3063         def __init__(self, downloader=None):
3064                 InfoExtractor.__init__(self, downloader)
3065         
3066         def report_download_webpage(self, video_id):
3067                 """Report webpage download."""
3068                 self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
3069
3070         def report_extraction(self, video_id):
3071                 """Report information extraction."""
3072                 self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
3073
3074         def _real_extract(self,url):
3075                 mobj = re.match(self._VALID_URL, url)
3076                 if mobj is None:
3077                         self._download.trouble(u'ERROR: invalid URL: %s' % url)
3078                         return
3079
3080                 video_id = mobj.group(1)
3081
3082                 # Get video webpage
3083                 request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
3084                 try:
3085                         self.report_download_webpage(video_id)
3086                         webpage = urllib2.urlopen(request).read()
3087                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3088                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
3089                         return
3090
3091                 self.report_extraction(video_id)
3092                 mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
3093                                  webpage)
3094                 if mobj is None:
3095                         self._downloader.trouble(u'ERROR: unable to extract media URL')
3096                         return
3097                 video_url = mobj.group(1) + ('/%s.flv' % video_id)
3098
3099                 mobj = re.search('<title>([^<]+)</title>', webpage)
3100                 if mobj is None:
3101                         self._downloader.trouble(u'ERROR: unable to extract title')
3102                         return
3103
3104                 video_title = mobj.group(1)
3105                 video_title = sanitize_title(video_title)
3106
3107                 simple_title = _simplify_title(video_title)
3108
3109                 try:
3110                         self._downloader.process_info({
3111                                 'id':           video_id,
3112                                 'url':          video_url,
3113                                 'uploader':     u'NA',
3114                                 'upload_date':  u'NA',
3115                                 'title':        video_title,
3116                                 'stitle':       simple_title,
3117                                 'ext':          u'flv',
3118                                 'format':       u'NA',
3119                                 'player_url':   None,
3120                         })
3121                 except UnavailableVideoError:
3122                         self._downloader.trouble(u'\nERROR: Unable to download video')
3123
3124 class ComedyCentralIE(InfoExtractor):
3125         """Information extractor for The Daily Show and Colbert Report """
3126
3127         _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
3128         IE_NAME = u'comedycentral'
3129
3130         def report_extraction(self, episode_id):
3131                 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
3132         
3133         def report_config_download(self, episode_id):
3134                 self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
3135
3136         def report_index_download(self, episode_id):
3137                 self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
3138
3139         def report_player_url(self, episode_id):
3140                 self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
3141
3142         def _real_extract(self, url):
3143                 mobj = re.match(self._VALID_URL, url)
3144                 if mobj is None:
3145                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3146                         return
3147
3148                 if mobj.group('shortname'):
3149                         if mobj.group('shortname') in ('tds', 'thedailyshow'):
3150                                 url = u'http://www.thedailyshow.com/full-episodes/'
3151                         else:
3152                                 url = u'http://www.colbertnation.com/full-episodes/'
3153                         mobj = re.match(self._VALID_URL, url)
3154                         assert mobj is not None
3155
3156                 dlNewest = not mobj.group('episode')
3157                 if dlNewest:
3158                         epTitle = mobj.group('showname')
3159                 else:
3160                         epTitle = mobj.group('episode')
3161
3162                 req = urllib2.Request(url)
3163                 self.report_extraction(epTitle)
3164                 try:
3165                         htmlHandle = urllib2.urlopen(req)
3166                         html = htmlHandle.read()
3167                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3168                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3169                         return
3170                 if dlNewest:
3171                         url = htmlHandle.geturl()
3172                         mobj = re.match(self._VALID_URL, url)
3173                         if mobj is None:
3174                                 self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
3175                                 return
3176                         if mobj.group('episode') == '':
3177                                 self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
3178                                 return
3179                         epTitle = mobj.group('episode')
3180
3181                 mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
3182                 if len(mMovieParams) == 0:
3183                         self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
3184                         return
3185
3186                 playerUrl_raw = mMovieParams[0][0]
3187                 self.report_player_url(epTitle)
3188                 try:
3189                         urlHandle = urllib2.urlopen(playerUrl_raw)
3190                         playerUrl = urlHandle.geturl()
3191                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3192                         self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
3193                         return
3194
3195                 uri = mMovieParams[0][1]
3196                 indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
3197                 self.report_index_download(epTitle)
3198                 try:
3199                         indexXml = urllib2.urlopen(indexUrl).read()
3200                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3201                         self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
3202                         return
3203
3204                 idoc = xml.etree.ElementTree.fromstring(indexXml)
3205                 itemEls = idoc.findall('.//item')
3206                 for itemEl in itemEls:
3207                         mediaId = itemEl.findall('./guid')[0].text
3208                         shortMediaId = mediaId.split(':')[-1]
3209                         showId = mediaId.split(':')[-2].replace('.com', '')
3210                         officialTitle = itemEl.findall('./title')[0].text
3211                         officialDate = itemEl.findall('./pubDate')[0].text
3212
3213                         configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
3214                                                 urllib.urlencode({'uri': mediaId}))
3215                         configReq = urllib2.Request(configUrl)
3216                         self.report_config_download(epTitle)
3217                         try:
3218                                 configXml = urllib2.urlopen(configReq).read()
3219                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3220                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3221                                 return
3222
3223                         cdoc = xml.etree.ElementTree.fromstring(configXml)
3224                         turls = []
3225                         for rendition in cdoc.findall('.//rendition'):
3226                                 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
3227                                 turls.append(finfo)
3228
3229                         if len(turls) == 0:
3230                                 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
3231                                 continue
3232
3233                         # For now, just pick the highest bitrate
3234                         format,video_url = turls[-1]
3235
3236                         self._downloader.increment_downloads()
3237
3238                         effTitle = showId + u'-' + epTitle
3239                         info = {
3240                                 'id': shortMediaId,
3241                                 'url': video_url,
3242                                 'uploader': showId,
3243                                 'upload_date': officialDate,
3244                                 'title': effTitle,
3245                                 'stitle': _simplify_title(effTitle),
3246                                 'ext': 'mp4',
3247                                 'format': format,
3248                                 'thumbnail': None,
3249                                 'description': officialTitle,
3250                                 'player_url': playerUrl
3251                         }
3252
3253                         try:
3254                                 self._downloader.process_info(info)
3255                         except UnavailableVideoError, err:
3256                                 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
3257                                 continue
3258
3259
3260 class EscapistIE(InfoExtractor):
3261         """Information extractor for The Escapist """
3262
3263         _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
3264         IE_NAME = u'escapist'
3265
3266         def report_extraction(self, showName):
3267                 self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
3268
3269         def report_config_download(self, showName):
3270                 self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
3271
3272         def _real_extract(self, url):
3273                 htmlParser = HTMLParser.HTMLParser()
3274
3275                 mobj = re.match(self._VALID_URL, url)
3276                 if mobj is None:
3277                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3278                         return
3279                 showName = mobj.group('showname')
3280                 videoId = mobj.group('episode')
3281
3282                 self.report_extraction(showName)
3283                 try:
3284                         webPage = urllib2.urlopen(url).read()
3285                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3286                         self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
3287                         return
3288
3289                 descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
3290                 description = htmlParser.unescape(descMatch.group(1))
3291                 imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
3292                 imgUrl = htmlParser.unescape(imgMatch.group(1))
3293                 playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
3294                 playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
3295                 configUrlMatch = re.search('config=(.*)$', playerUrl)
3296                 configUrl = urllib2.unquote(configUrlMatch.group(1))
3297
3298                 self.report_config_download(showName)
3299                 try:
3300                         configJSON = urllib2.urlopen(configUrl).read()
3301                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3302                         self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
3303                         return
3304
3305                 # Technically, it's JavaScript, not JSON
3306                 configJSON = configJSON.replace("'", '"')
3307
3308                 try:
3309                         config = json.loads(configJSON)
3310                 except (ValueError,), err:
3311                         self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
3312                         return
3313
3314                 playlist = config['playlist']
3315                 videoUrl = playlist[1]['url']
3316
3317                 self._downloader.increment_downloads()
3318                 info = {
3319                         'id': videoId,
3320                         'url': videoUrl,
3321                         'uploader': showName,
3322                         'upload_date': None,
3323                         'title': showName,
3324                         'stitle': _simplify_title(showName),
3325                         'ext': 'flv',
3326                         'format': 'flv',
3327                         'thumbnail': imgUrl,
3328                         'description': description,
3329                         'player_url': playerUrl,
3330                 }
3331
3332                 try:
3333                         self._downloader.process_info(info)
3334                 except UnavailableVideoError, err:
3335                         self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
3336
3337
3338 class CollegeHumorIE(InfoExtractor):
3339         """Information extractor for collegehumor.com"""
3340
3341         _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
3342         IE_NAME = u'collegehumor'
3343
3344         def report_webpage(self, video_id):
3345                 """Report information extraction."""
3346                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3347
3348         def report_extraction(self, video_id):
3349                 """Report information extraction."""
3350                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3351
3352         def _real_extract(self, url):
3353                 htmlParser = HTMLParser.HTMLParser()
3354
3355                 mobj = re.match(self._VALID_URL, url)
3356                 if mobj is None:
3357                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3358                         return
3359                 video_id = mobj.group('videoid')
3360
3361                 self.report_webpage(video_id)
3362                 request = urllib2.Request(url)
3363                 try:
3364                         webpage = urllib2.urlopen(request).read()
3365                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3366                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3367                         return
3368
3369                 m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
3370                 if m is None:
3371                         self._downloader.trouble(u'ERROR: Cannot extract internal video ID')
3372                         return
3373                 internal_video_id = m.group('internalvideoid')
3374
3375                 info = {
3376                         'id': video_id,
3377                         'internal_id': internal_video_id,
3378                 }
3379
3380                 self.report_extraction(video_id)
3381                 xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id
3382                 try:
3383                         metaXml = urllib2.urlopen(xmlUrl).read()
3384                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3385                         self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
3386                         return
3387
3388                 mdoc = xml.etree.ElementTree.fromstring(metaXml)
3389                 try:
3390                         videoNode = mdoc.findall('./video')[0]
3391                         info['description'] = videoNode.findall('./description')[0].text
3392                         info['title'] = videoNode.findall('./caption')[0].text
3393                         info['stitle'] = _simplify_title(info['title'])
3394                         info['url'] = videoNode.findall('./file')[0].text
3395                         info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
3396                         info['ext'] = info['url'].rpartition('.')[2]
3397                         info['format'] = info['ext']
3398                 except IndexError:
3399                         self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
3400                         return
3401
3402                 self._downloader.increment_downloads()
3403
3404                 try:
3405                         self._downloader.process_info(info)
3406                 except UnavailableVideoError, err:
3407                         self._downloader.trouble(u'\nERROR: unable to download video')
3408
3409
3410 class XVideosIE(InfoExtractor):
3411         """Information extractor for xvideos.com"""
3412
3413         _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
3414         IE_NAME = u'xvideos'
3415
3416         def report_webpage(self, video_id):
3417                 """Report information extraction."""
3418                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3419
3420         def report_extraction(self, video_id):
3421                 """Report information extraction."""
3422                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3423
3424         def _real_extract(self, url):
3425                 htmlParser = HTMLParser.HTMLParser()
3426
3427                 mobj = re.match(self._VALID_URL, url)
3428                 if mobj is None:
3429                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3430                         return
3431                 video_id = mobj.group(1).decode('utf-8')
3432
3433                 self.report_webpage(video_id)
3434
3435                 request = urllib2.Request(r'http://www.xvideos.com/video' + video_id)
3436                 try:
3437                         webpage = urllib2.urlopen(request).read()
3438                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3439                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3440                         return
3441
3442                 self.report_extraction(video_id)
3443
3444
3445                 # Extract video URL
3446                 mobj = re.search(r'flv_url=(.+?)&', webpage)
3447                 if mobj is None:
3448                         self._downloader.trouble(u'ERROR: unable to extract video url')
3449                         return
3450                 video_url = urllib2.unquote(mobj.group(1).decode('utf-8'))
3451
3452
3453                 # Extract title
3454                 mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage)
3455                 if mobj is None:
3456                         self._downloader.trouble(u'ERROR: unable to extract video title')
3457                         return
3458                 video_title = mobj.group(1).decode('utf-8')
3459
3460
3461                 # Extract video thumbnail
3462                 mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]/[a-fA-F0-9]/[a-fA-F0-9]/([a-fA-F0-9.]+jpg)', webpage)
3463                 if mobj is None:
3464                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
3465                         return
3466                 video_thumbnail = mobj.group(1).decode('utf-8')
3467
3468
3469
3470                 self._downloader.increment_downloads()
3471                 info = {
3472                         'id': video_id,
3473                         'url': video_url,
3474                         'uploader': None,
3475                         'upload_date': None,
3476                         'title': video_title,
3477                         'stitle': _simplify_title(video_title),
3478                         'ext': 'flv',
3479                         'format': 'flv',
3480                         'thumbnail': video_thumbnail,
3481                         'description': None,
3482                         'player_url': None,
3483                 }
3484
3485                 try:
3486                         self._downloader.process_info(info)
3487                 except UnavailableVideoError, err:
3488                         self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
3489
3490
3491 class SoundcloudIE(InfoExtractor):
3492         """Information extractor for soundcloud.com
3493            To access the media, the uid of the song and a stream token
3494            must be extracted from the page source and the script must make
3495            a request to media.soundcloud.com/crossdomain.xml. Then
3496            the media can be grabbed by requesting from an url composed
3497            of the stream token and uid
3498          """
3499
3500         _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
3501         IE_NAME = u'soundcloud'
3502
3503         def __init__(self, downloader=None):
3504                 InfoExtractor.__init__(self, downloader)
3505
3506         def report_webpage(self, video_id):
3507                 """Report information extraction."""
3508                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3509
3510         def report_extraction(self, video_id):
3511                 """Report information extraction."""
3512                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3513
3514         def _real_extract(self, url):
3515                 htmlParser = HTMLParser.HTMLParser()
3516
3517                 mobj = re.match(self._VALID_URL, url)
3518                 if mobj is None:
3519                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3520                         return
3521
3522                 # extract uploader (which is in the url)
3523                 uploader = mobj.group(1).decode('utf-8')
3524                 # extract simple title (uploader + slug of song title)
3525                 slug_title =  mobj.group(2).decode('utf-8')
3526                 simple_title = uploader + '-' + slug_title
3527
3528                 self.report_webpage('%s/%s' % (uploader, slug_title))
3529
3530                 request = urllib2.Request('http://soundcloud.com/%s/%s' % (uploader, slug_title))
3531                 try:
3532                         webpage = urllib2.urlopen(request).read()
3533                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3534                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3535                         return
3536
3537                 self.report_extraction('%s/%s' % (uploader, slug_title))
3538
3539                 # extract uid and stream token that soundcloud hands out for access
3540                 mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', webpage)
3541                 if mobj:
3542                         video_id = mobj.group(1)
3543                         stream_token = mobj.group(2)
3544
3545                 # extract unsimplified title
3546                 mobj = re.search('"title":"(.*?)",', webpage)
3547                 if mobj:
3548                         title = mobj.group(1)
3549
3550                 # construct media url (with uid/token)
3551                 mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
3552                 mediaURL = mediaURL % (video_id, stream_token)
3553
3554                 # description
3555                 description = u'No description available'
3556                 mobj = re.search('track-description-value"><p>(.*?)</p>', webpage)
3557                 if mobj:
3558                         description = mobj.group(1)
3559                 
3560                 # upload date
3561                 upload_date = None
3562                 mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
3563                 if mobj:
3564                         try:
3565                                 upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
3566                         except Exception, e:
3567                                 print str(e)
3568
3569                 # for soundcloud, a request to a cross domain is required for cookies
3570                 request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
3571
3572                 try:
3573                         self._downloader.process_info({
3574                                 'id':           video_id.decode('utf-8'),
3575                                 'url':          mediaURL,
3576                                 'uploader':     uploader.decode('utf-8'),
3577                                 'upload_date':  upload_date,
3578                                 'title':        simple_title.decode('utf-8'),
3579                                 'stitle':       simple_title.decode('utf-8'),
3580                                 'ext':          u'mp3',
3581                                 'format':       u'NA',
3582                                 'player_url':   None,
3583                                 'description': description.decode('utf-8')
3584                         })
3585                 except UnavailableVideoError:
3586                         self._downloader.trouble(u'\nERROR: unable to download video')
3587
3588
3589 class InfoQIE(InfoExtractor):
3590         """Information extractor for infoq.com"""
3591
3592         _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
3593         IE_NAME = u'infoq'
3594
3595         def report_webpage(self, video_id):
3596                 """Report information extraction."""
3597                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3598
3599         def report_extraction(self, video_id):
3600                 """Report information extraction."""
3601                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3602
3603         def _real_extract(self, url):
3604                 htmlParser = HTMLParser.HTMLParser()
3605
3606                 mobj = re.match(self._VALID_URL, url)
3607                 if mobj is None:
3608                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3609                         return
3610
3611                 self.report_webpage(url)
3612
3613                 request = urllib2.Request(url)
3614                 try:
3615                         webpage = urllib2.urlopen(request).read()
3616                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3617                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3618                         return
3619
3620                 self.report_extraction(url)
3621
3622
3623                 # Extract video URL
3624                 mobj = re.search(r"jsclassref='([^']*)'", webpage)
3625                 if mobj is None:
3626                         self._downloader.trouble(u'ERROR: unable to extract video url')
3627                         return
3628                 video_url = 'rtmpe://video.infoq.com/cfx/st/' + urllib2.unquote(mobj.group(1).decode('base64'))
3629
3630
3631                 # Extract title
3632                 mobj = re.search(r'contentTitle = "(.*?)";', webpage)
3633                 if mobj is None:
3634                         self._downloader.trouble(u'ERROR: unable to extract video title')
3635                         return
3636                 video_title = mobj.group(1).decode('utf-8')
3637
3638                 # Extract description
3639                 video_description = u'No description available.'
3640                 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', webpage)
3641                 if mobj is not None:
3642                         video_description = mobj.group(1).decode('utf-8')
3643
3644                 video_filename = video_url.split('/')[-1]
3645                 video_id, extension = video_filename.split('.')
3646
3647                 self._downloader.increment_downloads()
3648                 info = {
3649                         'id': video_id,
3650                         'url': video_url,
3651                         'uploader': None,
3652                         'upload_date': None,
3653                         'title': video_title,
3654                         'stitle': _simplify_title(video_title),
3655                         'ext': extension,
3656                         'format': extension, # Extension is always(?) mp4, but seems to be flv
3657                         'thumbnail': None,
3658                         'description': video_description,
3659                         'player_url': None,
3660                 }
3661
3662                 try:
3663                         self._downloader.process_info(info)
3664                 except UnavailableVideoError, err:
3665                         self._downloader.trouble(u'\nERROR: unable to download ' + video_url)
3666
3667 class MixcloudIE(InfoExtractor):
3668         """Information extractor for www.mixcloud.com"""
3669         _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
3670         IE_NAME = u'mixcloud'
3671
3672         def __init__(self, downloader=None):
3673                 InfoExtractor.__init__(self, downloader)
3674
3675         def report_download_json(self, file_id):
3676                 """Report JSON download."""
3677                 self._downloader.to_screen(u'[%s] Downloading json' % self.IE_NAME)
3678
3679         def report_extraction(self, file_id):
3680                 """Report information extraction."""
3681                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
3682
3683         def get_urls(self, jsonData, fmt, bitrate='best'):
3684                 """Get urls from 'audio_formats' section in json"""
3685                 file_url = None
3686                 try:
3687                         bitrate_list = jsonData[fmt]
3688                         if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
3689                                 bitrate = max(bitrate_list) # select highest
3690
3691                         url_list = jsonData[fmt][bitrate]
3692                 except TypeError: # we have no bitrate info.
3693                         url_list = jsonData[fmt]
3694                                 
3695                 return url_list
3696
3697         def check_urls(self, url_list):
3698                 """Returns 1st active url from list"""
3699                 for url in url_list:
3700                         try:
3701                                 urllib2.urlopen(url)
3702                                 return url
3703                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3704                                 url = None
3705
3706                 return None
3707
3708         def _print_formats(self, formats):
3709                 print 'Available formats:'
3710                 for fmt in formats.keys():
3711                         for b in formats[fmt]:
3712                                 try:
3713                                         ext = formats[fmt][b][0]
3714                                         print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])
3715                                 except TypeError: # we have no bitrate info
3716                                         ext = formats[fmt][0]
3717                                         print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])
3718                                         break
3719
3720         def _real_extract(self, url):
3721                 mobj = re.match(self._VALID_URL, url)
3722                 if mobj is None:
3723                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3724                         return
3725                 # extract uploader & filename from url
3726                 uploader = mobj.group(1).decode('utf-8')
3727                 file_id = uploader + "-" + mobj.group(2).decode('utf-8')
3728
3729                 # construct API request
3730                 file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
3731                 # retrieve .json file with links to files
3732                 request = urllib2.Request(file_url)
3733                 try:
3734                         self.report_download_json(file_url)
3735                         jsonData = urllib2.urlopen(request).read()
3736                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3737                         self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err))
3738                         return
3739
3740                 # parse JSON
3741                 json_data = json.loads(jsonData)
3742                 player_url = json_data['player_swf_url']
3743                 formats = dict(json_data['audio_formats'])
3744
3745                 req_format = self._downloader.params.get('format', None)
3746                 bitrate = None
3747
3748                 if self._downloader.params.get('listformats', None):
3749                         self._print_formats(formats)
3750                         return
3751
3752                 if req_format is None or req_format == 'best':
3753                         for format_param in formats.keys():
3754                                 url_list = self.get_urls(formats, format_param)
3755                                 # check urls
3756                                 file_url = self.check_urls(url_list)
3757                                 if file_url is not None:
3758                                         break # got it!
3759                 else:
3760                         if req_format not in formats.keys():
3761                                 self._downloader.trouble(u'ERROR: format is not available')
3762                                 return
3763
3764                         url_list = self.get_urls(formats, req_format)
3765                         file_url = self.check_urls(url_list)
3766                         format_param = req_format
3767
3768                 # We have audio
3769                 self._downloader.increment_downloads()
3770                 try:
3771                         # Process file information
3772                         self._downloader.process_info({
3773                                 'id': file_id.decode('utf-8'),
3774                                 'url': file_url.decode('utf-8'),
3775                                 'uploader':     uploader.decode('utf-8'),
3776                                 'upload_date': u'NA',
3777                                 'title': json_data['name'],
3778                                 'stitle': _simplify_title(json_data['name']),
3779                                 'ext': file_url.split('.')[-1].decode('utf-8'),
3780                                 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
3781                                 'thumbnail': json_data['thumbnail_url'],
3782                                 'description': json_data['description'],
3783                                 'player_url': player_url.decode('utf-8'),
3784                         })
3785                 except UnavailableVideoError, err:
3786                         self._downloader.trouble(u'ERROR: unable to download file')
3787
3788 class StanfordOpenClassroomIE(InfoExtractor):
3789         """Information extractor for Stanford's Open ClassRoom"""
3790
3791         _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
3792         IE_NAME = u'stanfordoc'
3793
3794         def report_download_webpage(self, objid):
3795                 """Report information extraction."""
3796                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, objid))
3797
3798         def report_extraction(self, video_id):
3799                 """Report information extraction."""
3800                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3801
3802         def _real_extract(self, url):
3803                 mobj = re.match(self._VALID_URL, url)
3804                 if mobj is None:
3805                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3806                         return
3807
3808                 if mobj.group('course') and mobj.group('video'): # A specific video
3809                         course = mobj.group('course')
3810                         video = mobj.group('video')
3811                         info = {
3812                                 'id': _simplify_title(course + '_' + video),
3813                         }
3814         
3815                         self.report_extraction(info['id'])
3816                         baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
3817                         xmlUrl = baseUrl + video + '.xml'
3818                         try:
3819                                 metaXml = urllib2.urlopen(xmlUrl).read()
3820                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3821                                 self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % unicode(err))
3822                                 return
3823                         mdoc = xml.etree.ElementTree.fromstring(metaXml)
3824                         try:
3825                                 info['title'] = mdoc.findall('./title')[0].text
3826                                 info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
3827                         except IndexError:
3828                                 self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
3829                                 return
3830                         info['stitle'] = _simplify_title(info['title'])
3831                         info['ext'] = info['url'].rpartition('.')[2]
3832                         info['format'] = info['ext']
3833                         self._downloader.increment_downloads()
3834                         try:
3835                                 self._downloader.process_info(info)
3836                         except UnavailableVideoError, err:
3837                                 self._downloader.trouble(u'\nERROR: unable to download video')
3838                 elif mobj.group('course'): # A course page
3839                         unescapeHTML = HTMLParser.HTMLParser().unescape
3840
3841                         course = mobj.group('course')
3842                         info = {
3843                                 'id': _simplify_title(course),
3844                                 'type': 'playlist',
3845                         }
3846
3847                         self.report_download_webpage(info['id'])
3848                         try:
3849                                 coursepage = urllib2.urlopen(url).read()
3850                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3851                                 self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err))
3852                                 return
3853
3854                         m = re.search('<h1>([^<]+)</h1>', coursepage)
3855                         if m:
3856                                 info['title'] = unescapeHTML(m.group(1))
3857                         else:
3858                                 info['title'] = info['id']
3859                         info['stitle'] = _simplify_title(info['title'])
3860
3861                         m = re.search('<description>([^<]+)</description>', coursepage)
3862                         if m:
3863                                 info['description'] = unescapeHTML(m.group(1))
3864
3865                         links = _orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
3866                         info['list'] = [
3867                                 {
3868                                         'type': 'reference',
3869                                         'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
3870                                 }
3871                                         for vpage in links]
3872
3873                         for entry in info['list']:
3874                                 assert entry['type'] == 'reference'
3875                                 self.extract(entry['url'])
3876                 else: # Root page
3877                         unescapeHTML = HTMLParser.HTMLParser().unescape
3878
3879                         info = {
3880                                 'id': 'Stanford OpenClassroom',
3881                                 'type': 'playlist',
3882                         }
3883
3884                         self.report_download_webpage(info['id'])
3885                         rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
3886                         try:
3887                                 rootpage = urllib2.urlopen(rootURL).read()
3888                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3889                                 self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err))
3890                                 return
3891
3892                         info['title'] = info['id']
3893                         info['stitle'] = _simplify_title(info['title'])
3894
3895                         links = _orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
3896                         info['list'] = [
3897                                 {
3898                                         'type': 'reference',
3899                                         'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
3900                                 }
3901                                         for cpage in links]
3902
3903                         for entry in info['list']:
3904                                 assert entry['type'] == 'reference'
3905                                 self.extract(entry['url'])
3906
3907 class MTVIE(InfoExtractor):
3908         """Information extractor for MTV.com"""
3909
3910         _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
3911         IE_NAME = u'mtv'
3912
3913         def report_webpage(self, video_id):
3914                 """Report information extraction."""
3915                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3916
3917         def report_extraction(self, video_id):
3918                 """Report information extraction."""
3919                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3920
3921         def _real_extract(self, url):
3922                 mobj = re.match(self._VALID_URL, url)
3923                 if mobj is None:
3924                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3925                         return
3926                 if not mobj.group('proto'):
3927                         url = 'http://' + url
3928                 video_id = mobj.group('videoid')
3929                 self.report_webpage(video_id)
3930
3931                 request = urllib2.Request(url)
3932                 try:
3933                         webpage = urllib2.urlopen(request).read()
3934                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3935                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3936                         return
3937
3938                 mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
3939                 if mobj is None:
3940                         self._downloader.trouble(u'ERROR: unable to extract song name')
3941                         return
3942                 song_name = _unescapeHTML(mobj.group(1).decode('iso-8859-1'))
3943                 mobj = re.search(r'<meta name="mtv_an" content="([^"]+)"/>', webpage)
3944                 if mobj is None:
3945                         self._downloader.trouble(u'ERROR: unable to extract performer')
3946                         return
3947                 performer = _unescapeHTML(mobj.group(1).decode('iso-8859-1'))
3948                 video_title = performer + ' - ' + song_name 
3949
3950                 mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
3951                 if mobj is None:
3952                         self._downloader.trouble(u'ERROR: unable to mtvn_uri')
3953                         return
3954                 mtvn_uri = mobj.group(1)
3955
3956                 mobj = re.search(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', webpage)
3957                 if mobj is None:
3958                         self._downloader.trouble(u'ERROR: unable to extract content id')
3959                         return
3960                 content_id = mobj.group(1)
3961
3962                 videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
3963                 self.report_extraction(video_id)
3964                 request = urllib2.Request(videogen_url)
3965                 try:
3966                         metadataXml = urllib2.urlopen(request).read()
3967                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3968                         self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err))
3969                         return
3970
3971                 mdoc = xml.etree.ElementTree.fromstring(metadataXml)
3972                 renditions = mdoc.findall('.//rendition')
3973
3974                 # For now, always pick the highest quality.
3975                 rendition = renditions[-1]
3976
3977                 try:
3978                         _,_,ext = rendition.attrib['type'].partition('/')
3979                         format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
3980                         video_url = rendition.find('./src').text
3981                 except KeyError:
3982                         self._downloader.trouble('Invalid rendition field.')
3983                         return
3984
3985                 self._downloader.increment_downloads()
3986                 info = {
3987                         'id': video_id,
3988                         'url': video_url,
3989                         'uploader': performer,
3990                         'title': video_title,
3991                         'stitle': _simplify_title(video_title),
3992                         'ext': ext,
3993                         'format': format,
3994                 }
3995
3996                 try:
3997                         self._downloader.process_info(info)
3998                 except UnavailableVideoError, err:
3999                         self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
4000
4001
4002 class PostProcessor(object):
4003         """Post Processor class.
4004
4005         PostProcessor objects can be added to downloaders with their
4006         add_post_processor() method. When the downloader has finished a
4007         successful download, it will take its internal chain of PostProcessors
4008         and start calling the run() method on each one of them, first with
4009         an initial argument and then with the returned value of the previous
4010         PostProcessor.
4011
4012         The chain will be stopped if one of them ever returns None or the end
4013         of the chain is reached.
4014
4015         PostProcessor objects follow a "mutual registration" process similar
4016         to InfoExtractor objects.
4017         """
4018
4019         _downloader = None
4020
4021         def __init__(self, downloader=None):
4022                 self._downloader = downloader
4023
4024         def set_downloader(self, downloader):
4025                 """Sets the downloader for this PP."""
4026                 self._downloader = downloader
4027
4028         def run(self, information):
4029                 """Run the PostProcessor.
4030
4031                 The "information" argument is a dictionary like the ones
4032                 composed by InfoExtractors. The only difference is that this
4033                 one has an extra field called "filepath" that points to the
4034                 downloaded file.
4035
4036                 When this method returns None, the postprocessing chain is
4037                 stopped. However, this method may return an information
4038                 dictionary that will be passed to the next postprocessing
4039                 object in the chain. It can be the one it received after
4040                 changing some fields.
4041
4042                 In addition, this method may raise a PostProcessingError
4043                 exception that will be taken into account by the downloader
4044                 it was called from.
4045                 """
4046                 return information # by default, do nothing
4047
4048 class AudioConversionError(BaseException):
4049         def __init__(self, message):
4050                 self.message = message
4051
4052 class FFmpegExtractAudioPP(PostProcessor):
4053
4054         def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
4055                 PostProcessor.__init__(self, downloader)
4056                 if preferredcodec is None:
4057                         preferredcodec = 'best'
4058                 self._preferredcodec = preferredcodec
4059                 self._preferredquality = preferredquality
4060                 self._keepvideo = keepvideo
4061
4062         @staticmethod
4063         def get_audio_codec(path):
4064                 try:
4065                         cmd = ['ffprobe', '-show_streams', '--', _encodeFilename(path)]
4066                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
4067                         output = handle.communicate()[0]
4068                         if handle.wait() != 0:
4069                                 return None
4070                 except (IOError, OSError):
4071                         return None
4072                 audio_codec = None
4073                 for line in output.split('\n'):
4074                         if line.startswith('codec_name='):
4075                                 audio_codec = line.split('=')[1].strip()
4076                         elif line.strip() == 'codec_type=audio' and audio_codec is not None:
4077                                 return audio_codec
4078                 return None
4079
4080         @staticmethod
4081         def run_ffmpeg(path, out_path, codec, more_opts):
4082                 if codec is None:
4083                         acodec_opts = []
4084                 else:
4085                         acodec_opts = ['-acodec', codec]
4086                 cmd = ['ffmpeg', '-y', '-i', _encodeFilename(path), '-vn'] + acodec_opts + more_opts + ['--', _encodeFilename(out_path)]
4087                 try:
4088                         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
4089                         stdout,stderr = p.communicate()
4090                 except (IOError, OSError):
4091                         e = sys.exc_info()[1]
4092                         if isinstance(e, OSError) and e.errno == 2:
4093                                 raise AudioConversionError('ffmpeg not found. Please install ffmpeg.')
4094                         else:
4095                                 raise e
4096                 if p.returncode != 0:
4097                         msg = stderr.strip().split('\n')[-1]
4098                         raise AudioConversionError(msg)
4099
4100         def run(self, information):
4101                 path = information['filepath']
4102
4103                 filecodec = self.get_audio_codec(path)
4104                 if filecodec is None:
4105                         self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
4106                         return None
4107
4108                 more_opts = []
4109                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
4110                         if self._preferredcodec == 'm4a' and filecodec == 'aac':
4111                                 # Lossless, but in another container
4112                                 acodec = 'copy'
4113                                 extension = self._preferredcodec
4114                                 more_opts = ['-absf', 'aac_adtstoasc']
4115                         elif filecodec in ['aac', 'mp3', 'vorbis']:
4116                                 # Lossless if possible
4117                                 acodec = 'copy'
4118                                 extension = filecodec
4119                                 if filecodec == 'aac':
4120                                         more_opts = ['-f', 'adts']
4121                                 if filecodec == 'vorbis':
4122                                         extension = 'ogg'
4123                         else:
4124                                 # MP3 otherwise.
4125                                 acodec = 'libmp3lame'
4126                                 extension = 'mp3'
4127                                 more_opts = []
4128                                 if self._preferredquality is not None:
4129                                         more_opts += ['-ab', self._preferredquality]
4130                 else:
4131                         # We convert the audio (lossy)
4132                         acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
4133                         extension = self._preferredcodec
4134                         more_opts = []
4135                         if self._preferredquality is not None:
4136                                 more_opts += ['-ab', self._preferredquality]
4137                         if self._preferredcodec == 'aac':
4138                                 more_opts += ['-f', 'adts']
4139                         if self._preferredcodec == 'm4a':
4140                                 more_opts += ['-absf', 'aac_adtstoasc']
4141                         if self._preferredcodec == 'vorbis':
4142                                 extension = 'ogg'
4143                         if self._preferredcodec == 'wav':
4144                                 extension = 'wav'
4145                                 more_opts += ['-f', 'wav']
4146
4147                 prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
4148                 new_path = prefix + sep + extension
4149                 self._downloader.to_screen(u'[ffmpeg] Destination: ' + new_path)
4150                 try:
4151                         self.run_ffmpeg(path, new_path, acodec, more_opts)
4152                 except:
4153                         etype,e,tb = sys.exc_info()
4154                         if isinstance(e, AudioConversionError):
4155                                 self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message)
4156                         else:
4157                                 self._downloader.to_stderr(u'ERROR: error running ffmpeg')
4158                         return None
4159
4160                 # Try to update the date time for extracted audio file.
4161                 if information.get('filetime') is not None:
4162                         try:
4163                                 os.utime(_encodeFilename(new_path), (time.time(), information['filetime']))
4164                         except:
4165                                 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
4166
4167                 if not self._keepvideo:
4168                         try:
4169                                 os.remove(_encodeFilename(path))
4170                         except (IOError, OSError):
4171                                 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
4172                                 return None
4173
4174                 information['filepath'] = new_path
4175                 return information
4176
4177
4178 def updateSelf(downloader, filename):
4179         ''' Update the program file with the latest version from the repository '''
4180         # Note: downloader only used for options
4181         if not os.access(filename, os.W_OK):
4182                 sys.exit('ERROR: no write permissions on %s' % filename)
4183
4184         downloader.to_screen(u'Updating to latest version...')
4185
4186         try:
4187                 try:
4188                         urlh = urllib.urlopen(UPDATE_URL)
4189                         newcontent = urlh.read()
4190                         
4191                         vmatch = re.search("__version__ = '([^']+)'", newcontent)
4192                         if vmatch is not None and vmatch.group(1) == __version__:
4193                                 downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
4194                                 return
4195                 finally:
4196                         urlh.close()
4197         except (IOError, OSError), err:
4198                 sys.exit('ERROR: unable to download latest version')
4199
4200         try:
4201                 outf = open(filename, 'wb')
4202                 try:
4203                         outf.write(newcontent)
4204                 finally:
4205                         outf.close()
4206         except (IOError, OSError), err:
4207                 sys.exit('ERROR: unable to overwrite current version')
4208
4209         downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
4210
4211 def parseOpts():
4212         def _readOptions(filename_bytes):
4213                 try:
4214                         optionf = open(filename_bytes)
4215                 except IOError:
4216                         return [] # silently skip if file is not present
4217                 try:
4218                         res = []
4219                         for l in optionf:
4220                                 res += shlex.split(l, comments=True)
4221                 finally:
4222                         optionf.close()
4223                 return res
4224
4225         def _format_option_string(option):
4226                 ''' ('-o', '--option') -> -o, --format METAVAR'''
4227
4228                 opts = []
4229
4230                 if option._short_opts: opts.append(option._short_opts[0])
4231                 if option._long_opts: opts.append(option._long_opts[0])
4232                 if len(opts) > 1: opts.insert(1, ', ')
4233
4234                 if option.takes_value(): opts.append(' %s' % option.metavar)
4235
4236                 return "".join(opts)
4237
4238         def _find_term_columns():
4239                 columns = os.environ.get('COLUMNS', None)
4240                 if columns:
4241                         return int(columns)
4242
4243                 try:
4244                         sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
4245                         out,err = sp.communicate()
4246                         return int(out.split()[1])
4247                 except:
4248                         pass
4249                 return None
4250
4251         max_width = 80
4252         max_help_position = 80
4253
4254         # No need to wrap help messages if we're on a wide console
4255         columns = _find_term_columns()
4256         if columns: max_width = columns
4257
4258         fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
4259         fmt.format_option_strings = _format_option_string
4260
4261         kw = {
4262                 'version'   : __version__,
4263                 'formatter' : fmt,
4264                 'usage' : '%prog [options] url [url...]',
4265                 'conflict_handler' : 'resolve',
4266         }
4267
4268         parser = optparse.OptionParser(**kw)
4269
4270         # option groups
4271         general        = optparse.OptionGroup(parser, 'General Options')
4272         selection      = optparse.OptionGroup(parser, 'Video Selection')
4273         authentication = optparse.OptionGroup(parser, 'Authentication Options')
4274         video_format   = optparse.OptionGroup(parser, 'Video Format Options')
4275         postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
4276         filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
4277         verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
4278
4279         general.add_option('-h', '--help',
4280                         action='help', help='print this help text and exit')
4281         general.add_option('-v', '--version',
4282                         action='version', help='print program version and exit')
4283         general.add_option('-U', '--update',
4284                         action='store_true', dest='update_self', help='update this program to latest version')
4285         general.add_option('-i', '--ignore-errors',
4286                         action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
4287         general.add_option('-r', '--rate-limit',
4288                         dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
4289         general.add_option('-R', '--retries',
4290                         dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
4291         general.add_option('--dump-user-agent',
4292                         action='store_true', dest='dump_user_agent',
4293                         help='display the current browser identification', default=False)
4294         general.add_option('--list-extractors',
4295                         action='store_true', dest='list_extractors',
4296                         help='List all supported extractors and the URLs they would handle', default=False)
4297
4298         selection.add_option('--playlist-start',
4299                         dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
4300         selection.add_option('--playlist-end',
4301                         dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
4302         selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
4303         selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
4304         selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
4305
4306         authentication.add_option('-u', '--username',
4307                         dest='username', metavar='USERNAME', help='account username')
4308         authentication.add_option('-p', '--password',
4309                         dest='password', metavar='PASSWORD', help='account password')
4310         authentication.add_option('-n', '--netrc',
4311                         action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
4312
4313
4314         video_format.add_option('-f', '--format',
4315                         action='store', dest='format', metavar='FORMAT', help='video format code')
4316         video_format.add_option('--all-formats',
4317                         action='store_const', dest='format', help='download all available video formats', const='all')
4318         video_format.add_option('--prefer-free-formats',
4319                         action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
4320         video_format.add_option('--max-quality',
4321                         action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
4322         video_format.add_option('-F', '--list-formats',
4323                         action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
4324
4325
4326         verbosity.add_option('-q', '--quiet',
4327                         action='store_true', dest='quiet', help='activates quiet mode', default=False)
4328         verbosity.add_option('-s', '--simulate',
4329                         action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
4330         verbosity.add_option('--skip-download',
4331                         action='store_true', dest='skip_download', help='do not download the video', default=False)
4332         verbosity.add_option('-g', '--get-url',
4333                         action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
4334         verbosity.add_option('-e', '--get-title',
4335                         action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
4336         verbosity.add_option('--get-thumbnail',
4337                         action='store_true', dest='getthumbnail',
4338                         help='simulate, quiet but print thumbnail URL', default=False)
4339         verbosity.add_option('--get-description',
4340                         action='store_true', dest='getdescription',
4341                         help='simulate, quiet but print video description', default=False)
4342         verbosity.add_option('--get-filename',
4343                         action='store_true', dest='getfilename',
4344                         help='simulate, quiet but print output filename', default=False)
4345         verbosity.add_option('--get-format',
4346                         action='store_true', dest='getformat',
4347                         help='simulate, quiet but print output format', default=False)
4348         verbosity.add_option('--no-progress',
4349                         action='store_true', dest='noprogress', help='do not print progress bar', default=False)
4350         verbosity.add_option('--console-title',
4351                         action='store_true', dest='consoletitle',
4352                         help='display progress in console titlebar', default=False)
4353         verbosity.add_option('-v', '--verbose',
4354                         action='store_true', dest='verbose', help='print various debugging information', default=False)
4355
4356
4357         filesystem.add_option('-t', '--title',
4358                         action='store_true', dest='usetitle', help='use title in file name', default=False)
4359         filesystem.add_option('-l', '--literal',
4360                         action='store_true', dest='useliteral', help='use literal title in file name', default=False)
4361         filesystem.add_option('-A', '--auto-number',
4362                         action='store_true', dest='autonumber',
4363                         help='number downloaded files starting from 00000', default=False)
4364         filesystem.add_option('-o', '--output',
4365                         dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
4366         filesystem.add_option('-a', '--batch-file',
4367                         dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
4368         filesystem.add_option('-w', '--no-overwrites',
4369                         action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
4370         filesystem.add_option('-c', '--continue',
4371                         action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
4372         filesystem.add_option('--no-continue',
4373                         action='store_false', dest='continue_dl',
4374                         help='do not resume partially downloaded files (restart from beginning)')
4375         filesystem.add_option('--cookies',
4376                         dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
4377         filesystem.add_option('--no-part',
4378                         action='store_true', dest='nopart', help='do not use .part files', default=False)
4379         filesystem.add_option('--no-mtime',
4380                         action='store_false', dest='updatetime',
4381                         help='do not use the Last-modified header to set the file modification time', default=True)
4382         filesystem.add_option('--write-description',
4383                         action='store_true', dest='writedescription',
4384                         help='write video description to a .description file', default=False)
4385         filesystem.add_option('--write-info-json',
4386                         action='store_true', dest='writeinfojson',
4387                         help='write video metadata to a .info.json file', default=False)
4388
4389
4390         postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
4391                         help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
4392         postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
4393                         help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
4394         postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
4395                         help='ffmpeg audio bitrate specification, 128k by default')
4396         postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
4397                         help='keeps the video file on disk after the post-processing; the video is erased by default')
4398
4399
4400         parser.add_option_group(general)
4401         parser.add_option_group(selection)
4402         parser.add_option_group(filesystem)
4403         parser.add_option_group(verbosity)
4404         parser.add_option_group(video_format)
4405         parser.add_option_group(authentication)
4406         parser.add_option_group(postproc)
4407
4408         xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
4409         if xdg_config_home:
4410                 userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
4411         else:
4412                 userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
4413         argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
4414         opts, args = parser.parse_args(argv)
4415
4416         return parser, opts, args
4417
4418 def gen_extractors():
4419         """ Return a list of an instance of every supported extractor.
4420         The order does matter; the first extractor matched is the one handling the URL.
4421         """
4422         youtube_ie = YoutubeIE()
4423         google_ie = GoogleIE()
4424         yahoo_ie = YahooIE()
4425         return [
4426                 YoutubePlaylistIE(youtube_ie),
4427                 YoutubeUserIE(youtube_ie),
4428                 YoutubeSearchIE(youtube_ie),
4429                 youtube_ie,
4430                 MetacafeIE(youtube_ie),
4431                 DailymotionIE(),
4432                 google_ie,
4433                 GoogleSearchIE(google_ie),
4434                 PhotobucketIE(),
4435                 yahoo_ie,
4436                 YahooSearchIE(yahoo_ie),
4437                 DepositFilesIE(),
4438                 FacebookIE(),
4439                 BlipTVIE(),
4440                 VimeoIE(),
4441                 MyVideoIE(),
4442                 ComedyCentralIE(),
4443                 EscapistIE(),
4444                 CollegeHumorIE(),
4445                 XVideosIE(),
4446                 SoundcloudIE(),
4447                 InfoQIE(),
4448                 MixcloudIE(),
4449                 StanfordOpenClassroomIE(),
4450                 MTVIE(),
4451
4452                 GenericIE()
4453         ]
4454
4455 def _real_main():
4456         parser, opts, args = parseOpts()
4457
4458         # Open appropriate CookieJar
4459         if opts.cookiefile is None:
4460                 jar = cookielib.CookieJar()
4461         else:
4462                 try:
4463                         jar = cookielib.MozillaCookieJar(opts.cookiefile)
4464                         if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
4465                                 jar.load()
4466                 except (IOError, OSError), err:
4467                         sys.exit(u'ERROR: unable to open cookie file')
4468
4469         # Dump user agent
4470         if opts.dump_user_agent:
4471                 print std_headers['User-Agent']
4472                 sys.exit(0)
4473
4474         # Batch file verification
4475         batchurls = []
4476         if opts.batchfile is not None:
4477                 try:
4478                         if opts.batchfile == '-':
4479                                 batchfd = sys.stdin
4480                         else:
4481                                 batchfd = open(opts.batchfile, 'r')
4482                         batchurls = batchfd.readlines()
4483                         batchurls = [x.strip() for x in batchurls]
4484                         batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
4485                 except IOError:
4486                         sys.exit(u'ERROR: batch file could not be read')
4487         all_urls = batchurls + args
4488
4489         # General configuration
4490         cookie_processor = urllib2.HTTPCookieProcessor(jar)
4491         proxy_handler = urllib2.ProxyHandler()
4492         opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
4493         urllib2.install_opener(opener)
4494         socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
4495
4496         if opts.verbose:
4497                 print(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
4498
4499         extractors = gen_extractors()
4500
4501         if opts.list_extractors:
4502                 for ie in extractors:
4503                         print(ie.IE_NAME)
4504                         matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
4505                         all_urls = filter(lambda url: url not in matchedUrls, all_urls)
4506                         for mu in matchedUrls:
4507                                 print(u'  ' + mu)
4508                 sys.exit(0)
4509
4510         # Conflicting, missing and erroneous options
4511         if opts.usenetrc and (opts.username is not None or opts.password is not None):
4512                 parser.error(u'using .netrc conflicts with giving username/password')
4513         if opts.password is not None and opts.username is None:
4514                 parser.error(u'account username missing')
4515         if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
4516                 parser.error(u'using output template conflicts with using title, literal title or auto number')
4517         if opts.usetitle and opts.useliteral:
4518                 parser.error(u'using title conflicts with using literal title')
4519         if opts.username is not None and opts.password is None:
4520                 opts.password = getpass.getpass(u'Type account password and press return:')
4521         if opts.ratelimit is not None:
4522                 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
4523                 if numeric_limit is None:
4524                         parser.error(u'invalid rate limit specified')
4525                 opts.ratelimit = numeric_limit
4526         if opts.retries is not None:
4527                 try:
4528                         opts.retries = long(opts.retries)
4529                 except (TypeError, ValueError), err:
4530                         parser.error(u'invalid retry count specified')
4531         try:
4532                 opts.playliststart = int(opts.playliststart)
4533                 if opts.playliststart <= 0:
4534                         raise ValueError(u'Playlist start must be positive')
4535         except (TypeError, ValueError), err:
4536                 parser.error(u'invalid playlist start number specified')
4537         try:
4538                 opts.playlistend = int(opts.playlistend)
4539                 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
4540                         raise ValueError(u'Playlist end must be greater than playlist start')
4541         except (TypeError, ValueError), err:
4542                 parser.error(u'invalid playlist end number specified')
4543         if opts.extractaudio:
4544                 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
4545                         parser.error(u'invalid audio format specified')
4546
4547         # File downloader
4548         fd = FileDownloader({
4549                 'usenetrc': opts.usenetrc,
4550                 'username': opts.username,
4551                 'password': opts.password,
4552                 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
4553                 'forceurl': opts.geturl,
4554                 'forcetitle': opts.gettitle,
4555                 'forcethumbnail': opts.getthumbnail,
4556                 'forcedescription': opts.getdescription,
4557                 'forcefilename': opts.getfilename,
4558                 'forceformat': opts.getformat,
4559                 'simulate': opts.simulate,
4560                 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
4561                 'format': opts.format,
4562                 'format_limit': opts.format_limit,
4563                 'listformats': opts.listformats,
4564                 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
4565                         or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
4566                         or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
4567                         or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
4568                         or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
4569                         or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
4570                         or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
4571                         or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
4572                         or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
4573                         or u'%(id)s.%(ext)s'),
4574                 'ignoreerrors': opts.ignoreerrors,
4575                 'ratelimit': opts.ratelimit,
4576                 'nooverwrites': opts.nooverwrites,
4577                 'retries': opts.retries,
4578                 'continuedl': opts.continue_dl,
4579                 'noprogress': opts.noprogress,
4580                 'playliststart': opts.playliststart,
4581                 'playlistend': opts.playlistend,
4582                 'logtostderr': opts.outtmpl == '-',
4583                 'consoletitle': opts.consoletitle,
4584                 'nopart': opts.nopart,
4585                 'updatetime': opts.updatetime,
4586                 'writedescription': opts.writedescription,
4587                 'writeinfojson': opts.writeinfojson,
4588                 'matchtitle': opts.matchtitle,
4589                 'rejecttitle': opts.rejecttitle,
4590                 'max_downloads': opts.max_downloads,
4591                 'prefer_free_formats': opts.prefer_free_formats,
4592                 })
4593         for extractor in extractors:
4594                 fd.add_info_extractor(extractor)
4595
4596         # PostProcessors
4597         if opts.extractaudio:
4598                 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
4599
4600         # Update version
4601         if opts.update_self:
4602                 updateSelf(fd, sys.argv[0])
4603
4604         # Maybe do nothing
4605         if len(all_urls) < 1:
4606                 if not opts.update_self:
4607                         parser.error(u'you must provide at least one URL')
4608                 else:
4609                         sys.exit()
4610         
4611         try:
4612                 retcode = fd.download(all_urls)
4613         except MaxDownloadsReached:
4614                 fd.to_screen(u'--max-download limit reached, aborting.')
4615                 retcode = 101
4616
4617         # Dump cookie jar if requested
4618         if opts.cookiefile is not None:
4619                 try:
4620                         jar.save()
4621                 except (IOError, OSError), err:
4622                         sys.exit(u'ERROR: unable to save cookie jar')
4623
4624         sys.exit(retcode)
4625
4626 def main():
4627         try:
4628                 _real_main()
4629         except DownloadError:
4630                 sys.exit(1)
4631         except SameFileError:
4632                 sys.exit(u'ERROR: fixed output name but more than one file to download')
4633         except KeyboardInterrupt:
4634                 sys.exit(u'\nERROR: Interrupted by user')
4635
4636 if __name__ == '__main__':
4637         main()
4638
4639 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: