Facebook info extractor
[youtube-dl.git] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # Author: Vasyl' Vavrychuk
7 # Author: Witold Baryluk
8 # Author: PaweÅ‚ Paprota
9 # License: Public domain code
10 import cookielib
11 import ctypes
12 import datetime
13 import email.utils
14 import gzip
15 import htmlentitydefs
16 import httplib
17 import locale
18 import math
19 import netrc
20 import os
21 import os.path
22 import re
23 import socket
24 import string
25 import StringIO
26 import subprocess
27 import sys
28 import time
29 import urllib
30 import urllib2
31 import zlib
32
33 # parse_qs was moved from the cgi module to the urlparse module recently.
34 try:
35         from urlparse import parse_qs
36 except ImportError:
37         from cgi import parse_qs
38
39 std_headers = {
40         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b10) Gecko/20100101 Firefox/4.0b10',
41         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
42         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
43         'Accept-Encoding': 'gzip, deflate',
44         'Accept-Language': 'en-us,en;q=0.5',
45 }
46
47 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
48
49 def preferredencoding():
50         """Get preferred encoding.
51
52         Returns the best encoding scheme for the system, based on
53         locale.getpreferredencoding() and some further tweaks.
54         """
55         def yield_preferredencoding():
56                 try:
57                         pref = locale.getpreferredencoding()
58                         u'TEST'.encode(pref)
59                 except:
60                         pref = 'UTF-8'
61                 while True:
62                         yield pref
63         return yield_preferredencoding().next()
64
65 def htmlentity_transform(matchobj):
66         """Transforms an HTML entity to a Unicode character.
67
68         This function receives a match object and is intended to be used with
69         the re.sub() function.
70         """
71         entity = matchobj.group(1)
72
73         # Known non-numeric HTML entity
74         if entity in htmlentitydefs.name2codepoint:
75                 return unichr(htmlentitydefs.name2codepoint[entity])
76
77         # Unicode character
78         mobj = re.match(ur'(?u)#(x?\d+)', entity)
79         if mobj is not None:
80                 numstr = mobj.group(1)
81                 if numstr.startswith(u'x'):
82                         base = 16
83                         numstr = u'0%s' % numstr
84                 else:
85                         base = 10
86                 return unichr(long(numstr, base))
87
88         # Unknown entity in name, return its literal representation
89         return (u'&%s;' % entity)
90
91 def sanitize_title(utitle):
92         """Sanitizes a video title so it could be used as part of a filename."""
93         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
94         return utitle.replace(unicode(os.sep), u'%')
95
96 def sanitize_open(filename, open_mode):
97         """Try to open the given filename, and slightly tweak it if this fails.
98
99         Attempts to open the given filename. If this fails, it tries to change
100         the filename slightly, step by step, until it's either able to open it
101         or it fails and raises a final exception, like the standard open()
102         function.
103
104         It returns the tuple (stream, definitive_file_name).
105         """
106         try:
107                 if filename == u'-':
108                         if sys.platform == 'win32':
109                                 import msvcrt
110                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
111                         return (sys.stdout, filename)
112                 stream = open(filename, open_mode)
113                 return (stream, filename)
114         except (IOError, OSError), err:
115                 # In case of error, try to remove win32 forbidden chars
116                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
117
118                 # An exception here should be caught in the caller
119                 stream = open(filename, open_mode)
120                 return (stream, filename)
121
122 def timeconvert(timestr):
123     """Convert RFC 2822 defined time string into system timestamp"""
124     timestamp = None
125     timetuple = email.utils.parsedate_tz(timestr)
126     if timetuple is not None:
127         timestamp = email.utils.mktime_tz(timetuple)
128     return timestamp
129
130 class DownloadError(Exception):
131         """Download Error exception.
132
133         This exception may be thrown by FileDownloader objects if they are not
134         configured to continue on errors. They will contain the appropriate
135         error message.
136         """
137         pass
138
139 class SameFileError(Exception):
140         """Same File exception.
141
142         This exception will be thrown by FileDownloader objects if they detect
143         multiple files would have to be downloaded to the same file on disk.
144         """
145         pass
146
147 class PostProcessingError(Exception):
148         """Post Processing exception.
149
150         This exception may be raised by PostProcessor's .run() method to
151         indicate an error in the postprocessing task.
152         """
153         pass
154
155 class UnavailableVideoError(Exception):
156         """Unavailable Format exception.
157
158         This exception will be thrown when a video is requested
159         in a format that is not available for that video.
160         """
161         pass
162
163 class ContentTooShortError(Exception):
164         """Content Too Short exception.
165
166         This exception may be raised by FileDownloader objects when a file they
167         download is too small for what the server announced first, indicating
168         the connection was probably interrupted.
169         """
170         # Both in bytes
171         downloaded = None
172         expected = None
173
174         def __init__(self, downloaded, expected):
175                 self.downloaded = downloaded
176                 self.expected = expected
177
178 class YoutubeDLHandler(urllib2.HTTPHandler):
179         """Handler for HTTP requests and responses.
180
181         This class, when installed with an OpenerDirector, automatically adds
182         the standard headers to every HTTP request and handles gzipped and
183         deflated responses from web servers. If compression is to be avoided in
184         a particular request, the original request in the program code only has
185         to include the HTTP header "Youtubedl-No-Compression", which will be
186         removed before making the real request.
187         
188         Part of this code was copied from:
189
190           http://techknack.net/python-urllib2-handlers/
191           
192         Andrew Rowls, the author of that code, agreed to release it to the
193         public domain.
194         """
195
196         @staticmethod
197         def deflate(data):
198                 try:
199                         return zlib.decompress(data, -zlib.MAX_WBITS)
200                 except zlib.error:
201                         return zlib.decompress(data)
202         
203         @staticmethod
204         def addinfourl_wrapper(stream, headers, url, code):
205                 if hasattr(urllib2.addinfourl, 'getcode'):
206                         return urllib2.addinfourl(stream, headers, url, code)
207                 ret = urllib2.addinfourl(stream, headers, url)
208                 ret.code = code
209                 return ret
210         
211         def http_request(self, req):
212                 for h in std_headers:
213                         if h in req.headers:
214                                 del req.headers[h]
215                         req.add_header(h, std_headers[h])
216                 if 'Youtubedl-no-compression' in req.headers:
217                         if 'Accept-encoding' in req.headers:
218                                 del req.headers['Accept-encoding']
219                         del req.headers['Youtubedl-no-compression']
220                 return req
221
222         def http_response(self, req, resp):
223                 old_resp = resp
224                 # gzip
225                 if resp.headers.get('Content-encoding', '') == 'gzip':
226                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
227                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
228                         resp.msg = old_resp.msg
229                 # deflate
230                 if resp.headers.get('Content-encoding', '') == 'deflate':
231                         gz = StringIO.StringIO(self.deflate(resp.read()))
232                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
233                         resp.msg = old_resp.msg
234                 return resp
235
236 class FileDownloader(object):
237         """File Downloader class.
238
239         File downloader objects are the ones responsible of downloading the
240         actual video file and writing it to disk if the user has requested
241         it, among some other tasks. In most cases there should be one per
242         program. As, given a video URL, the downloader doesn't know how to
243         extract all the needed information, task that InfoExtractors do, it
244         has to pass the URL to one of them.
245
246         For this, file downloader objects have a method that allows
247         InfoExtractors to be registered in a given order. When it is passed
248         a URL, the file downloader handles it to the first InfoExtractor it
249         finds that reports being able to handle it. The InfoExtractor extracts
250         all the information about the video or videos the URL refers to, and
251         asks the FileDownloader to process the video information, possibly
252         downloading the video.
253
254         File downloaders accept a lot of parameters. In order not to saturate
255         the object constructor with arguments, it receives a dictionary of
256         options instead. These options are available through the params
257         attribute for the InfoExtractors to use. The FileDownloader also
258         registers itself as the downloader in charge for the InfoExtractors
259         that are added to it, so this is a "mutual registration".
260
261         Available options:
262
263         username:         Username for authentication purposes.
264         password:         Password for authentication purposes.
265         usenetrc:         Use netrc for authentication instead.
266         quiet:            Do not print messages to stdout.
267         forceurl:         Force printing final URL.
268         forcetitle:       Force printing title.
269         forcethumbnail:   Force printing thumbnail URL.
270         forcedescription: Force printing description.
271         forcefilename:    Force printing final filename.
272         simulate:         Do not download the video files.
273         format:           Video format code.
274         format_limit:     Highest quality format to try.
275         outtmpl:          Template for output names.
276         ignoreerrors:     Do not stop on download errors.
277         ratelimit:        Download speed limit, in bytes/sec.
278         nooverwrites:     Prevent overwriting files.
279         retries:          Number of times to retry for HTTP error 5xx
280         continuedl:       Try to continue downloads if possible.
281         noprogress:       Do not print the progress bar.
282         playliststart:    Playlist item to start at.
283         playlistend:      Playlist item to end at.
284         logtostderr:      Log messages to stderr instead of stdout.
285         consoletitle:     Display progress in console window's titlebar.
286         nopart:           Do not use temporary .part files.
287         updatetime:       Use the Last-modified header to set output file timestamps.
288         """
289
290         params = None
291         _ies = []
292         _pps = []
293         _download_retcode = None
294         _num_downloads = None
295         _screen_file = None
296
297         def __init__(self, params):
298                 """Create a FileDownloader object with the given options."""
299                 self._ies = []
300                 self._pps = []
301                 self._download_retcode = 0
302                 self._num_downloads = 0
303                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
304                 self.params = params
305
306         @staticmethod
307         def pmkdir(filename):
308                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
309                 components = filename.split(os.sep)
310                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
311                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
312                 for dir in aggregate:
313                         if not os.path.exists(dir):
314                                 os.mkdir(dir)
315
316         @staticmethod
317         def format_bytes(bytes):
318                 if bytes is None:
319                         return 'N/A'
320                 if type(bytes) is str:
321                         bytes = float(bytes)
322                 if bytes == 0.0:
323                         exponent = 0
324                 else:
325                         exponent = long(math.log(bytes, 1024.0))
326                 suffix = 'bkMGTPEZY'[exponent]
327                 converted = float(bytes) / float(1024**exponent)
328                 return '%.2f%s' % (converted, suffix)
329
330         @staticmethod
331         def calc_percent(byte_counter, data_len):
332                 if data_len is None:
333                         return '---.-%'
334                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
335
336         @staticmethod
337         def calc_eta(start, now, total, current):
338                 if total is None:
339                         return '--:--'
340                 dif = now - start
341                 if current == 0 or dif < 0.001: # One millisecond
342                         return '--:--'
343                 rate = float(current) / dif
344                 eta = long((float(total) - float(current)) / rate)
345                 (eta_mins, eta_secs) = divmod(eta, 60)
346                 if eta_mins > 99:
347                         return '--:--'
348                 return '%02d:%02d' % (eta_mins, eta_secs)
349
350         @staticmethod
351         def calc_speed(start, now, bytes):
352                 dif = now - start
353                 if bytes == 0 or dif < 0.001: # One millisecond
354                         return '%10s' % '---b/s'
355                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
356
357         @staticmethod
358         def best_block_size(elapsed_time, bytes):
359                 new_min = max(bytes / 2.0, 1.0)
360                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
361                 if elapsed_time < 0.001:
362                         return long(new_max)
363                 rate = bytes / elapsed_time
364                 if rate > new_max:
365                         return long(new_max)
366                 if rate < new_min:
367                         return long(new_min)
368                 return long(rate)
369
370         @staticmethod
371         def parse_bytes(bytestr):
372                 """Parse a string indicating a byte quantity into a long integer."""
373                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
374                 if matchobj is None:
375                         return None
376                 number = float(matchobj.group(1))
377                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
378                 return long(round(number * multiplier))
379
380         def add_info_extractor(self, ie):
381                 """Add an InfoExtractor object to the end of the list."""
382                 self._ies.append(ie)
383                 ie.set_downloader(self)
384
385         def add_post_processor(self, pp):
386                 """Add a PostProcessor object to the end of the chain."""
387                 self._pps.append(pp)
388                 pp.set_downloader(self)
389
390         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
391                 """Print message to stdout if not in quiet mode."""
392                 try:
393                         if not self.params.get('quiet', False):
394                                 terminator = [u'\n', u''][skip_eol]
395                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
396                         self._screen_file.flush()
397                 except (UnicodeEncodeError), err:
398                         if not ignore_encoding_errors:
399                                 raise
400
401         def to_stderr(self, message):
402                 """Print message to stderr."""
403                 print >>sys.stderr, message.encode(preferredencoding())
404
405         def to_cons_title(self, message):
406                 """Set console/terminal window title to message."""
407                 if not self.params.get('consoletitle', False):
408                         return
409                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
410                         # c_wchar_p() might not be necessary if `message` is
411                         # already of type unicode()
412                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
413                 elif 'TERM' in os.environ:
414                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
415
416         def fixed_template(self):
417                 """Checks if the output template is fixed."""
418                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
419
420         def trouble(self, message=None):
421                 """Determine action to take when a download problem appears.
422
423                 Depending on if the downloader has been configured to ignore
424                 download errors or not, this method may throw an exception or
425                 not when errors are found, after printing the message.
426                 """
427                 if message is not None:
428                         self.to_stderr(message)
429                 if not self.params.get('ignoreerrors', False):
430                         raise DownloadError(message)
431                 self._download_retcode = 1
432
433         def slow_down(self, start_time, byte_counter):
434                 """Sleep if the download speed is over the rate limit."""
435                 rate_limit = self.params.get('ratelimit', None)
436                 if rate_limit is None or byte_counter == 0:
437                         return
438                 now = time.time()
439                 elapsed = now - start_time
440                 if elapsed <= 0.0:
441                         return
442                 speed = float(byte_counter) / elapsed
443                 if speed > rate_limit:
444                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
445
446         def temp_name(self, filename):
447                 """Returns a temporary filename for the given filename."""
448                 if self.params.get('nopart', False) or filename == u'-' or \
449                                 (os.path.exists(filename) and not os.path.isfile(filename)):
450                         return filename
451                 return filename + u'.part'
452
453         def undo_temp_name(self, filename):
454                 if filename.endswith(u'.part'):
455                         return filename[:-len(u'.part')]
456                 return filename
457
458         def try_rename(self, old_filename, new_filename):
459                 try:
460                         if old_filename == new_filename:
461                                 return
462                         os.rename(old_filename, new_filename)
463                 except (IOError, OSError), err:
464                         self.trouble(u'ERROR: unable to rename file')
465         
466         def try_utime(self, filename, last_modified_hdr):
467                 """Try to set the last-modified time of the given file."""
468                 if last_modified_hdr is None:
469                         return
470                 if not os.path.isfile(filename):
471                         return
472                 timestr = last_modified_hdr
473                 if timestr is None:
474                         return
475                 filetime = timeconvert(timestr)
476                 if filetime is None:
477                         return
478                 try:
479                         os.utime(filename,(time.time(), filetime))
480                 except:
481                         pass
482
483         def report_destination(self, filename):
484                 """Report destination filename."""
485                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
486
487         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
488                 """Report download progress."""
489                 if self.params.get('noprogress', False):
490                         return
491                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
492                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
493                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
494                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
495
496         def report_resuming_byte(self, resume_len):
497                 """Report attempt to resume at given byte."""
498                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
499
500         def report_retry(self, count, retries):
501                 """Report retry in case of HTTP error 5xx"""
502                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
503
504         def report_file_already_downloaded(self, file_name):
505                 """Report file has already been fully downloaded."""
506                 try:
507                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
508                 except (UnicodeEncodeError), err:
509                         self.to_screen(u'[download] The file has already been downloaded')
510
511         def report_unable_to_resume(self):
512                 """Report it was impossible to resume download."""
513                 self.to_screen(u'[download] Unable to resume')
514
515         def report_finish(self):
516                 """Report download finished."""
517                 if self.params.get('noprogress', False):
518                         self.to_screen(u'[download] Download completed')
519                 else:
520                         self.to_screen(u'')
521
522         def increment_downloads(self):
523                 """Increment the ordinal that assigns a number to each file."""
524                 self._num_downloads += 1
525
526         def prepare_filename(self, info_dict):
527                 """Generate the output filename."""
528                 try:
529                         template_dict = dict(info_dict)
530                         template_dict['epoch'] = unicode(long(time.time()))
531                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
532                         filename = self.params['outtmpl'] % template_dict
533                         return filename
534                 except (ValueError, KeyError), err:
535                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
536                         return None
537
538         def process_info(self, info_dict):
539                 """Process a single dictionary returned by an InfoExtractor."""
540                 filename = self.prepare_filename(info_dict)
541                 # Do nothing else if in simulate mode
542                 if self.params.get('simulate', False):
543                         # Forced printings
544                         if self.params.get('forcetitle', False):
545                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
546                         if self.params.get('forceurl', False):
547                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
548                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
549                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
550                         if self.params.get('forcedescription', False) and 'description' in info_dict:
551                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
552                         if self.params.get('forcefilename', False) and filename is not None:
553                                 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
554
555                         return
556
557                 if filename is None:
558                         return
559                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
560                         self.to_stderr(u'WARNING: file exists and will be skipped')
561                         return
562
563                 try:
564                         self.pmkdir(filename)
565                 except (OSError, IOError), err:
566                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
567                         return
568
569                 try:
570                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
571                 except (OSError, IOError), err:
572                         raise UnavailableVideoError
573                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
574                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
575                         return
576                 except (ContentTooShortError, ), err:
577                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
578                         return
579
580                 if success:
581                         try:
582                                 self.post_process(filename, info_dict)
583                         except (PostProcessingError), err:
584                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
585                                 return
586
587         def download(self, url_list):
588                 """Download a given list of URLs."""
589                 if len(url_list) > 1 and self.fixed_template():
590                         raise SameFileError(self.params['outtmpl'])
591
592                 for url in url_list:
593                         suitable_found = False
594                         for ie in self._ies:
595                                 # Go to next InfoExtractor if not suitable
596                                 if not ie.suitable(url):
597                                         continue
598
599                                 # Suitable InfoExtractor found
600                                 suitable_found = True
601
602                                 # Extract information from URL and process it
603                                 ie.extract(url)
604
605                                 # Suitable InfoExtractor had been found; go to next URL
606                                 break
607
608                         if not suitable_found:
609                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
610
611                 return self._download_retcode
612
613         def post_process(self, filename, ie_info):
614                 """Run the postprocessing chain on the given file."""
615                 info = dict(ie_info)
616                 info['filepath'] = filename
617                 for pp in self._pps:
618                         info = pp.run(info)
619                         if info is None:
620                                 break
621
622         def _download_with_rtmpdump(self, filename, url, player_url):
623                 self.report_destination(filename)
624                 tmpfilename = self.temp_name(filename)
625
626                 # Check for rtmpdump first
627                 try:
628                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
629                 except (OSError, IOError):
630                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
631                         return False
632
633                 # Download using rtmpdump. rtmpdump returns exit code 2 when
634                 # the connection was interrumpted and resuming appears to be
635                 # possible. This is part of rtmpdump's normal usage, AFAIK.
636                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
637                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
638                 while retval == 2 or retval == 1:
639                         prevsize = os.path.getsize(tmpfilename)
640                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
641                         time.sleep(5.0) # This seems to be needed
642                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
643                         cursize = os.path.getsize(tmpfilename)
644                         if prevsize == cursize and retval == 1:
645                                 break
646                 if retval == 0:
647                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
648                         self.try_rename(tmpfilename, filename)
649                         return True
650                 else:
651                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
652                         return False
653
654         def _do_download(self, filename, url, player_url):
655                 # Check file already present
656                 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
657                         self.report_file_already_downloaded(filename)
658                         return True
659
660                 # Attempt to download using rtmpdump
661                 if url.startswith('rtmp'):
662                         return self._download_with_rtmpdump(filename, url, player_url)
663
664                 tmpfilename = self.temp_name(filename)
665                 stream = None
666                 open_mode = 'wb'
667
668                 # Do not include the Accept-Encoding header
669                 headers = {'Youtubedl-no-compression': 'True'}
670                 basic_request = urllib2.Request(url, None, headers)
671                 request = urllib2.Request(url, None, headers)
672
673                 # Establish possible resume length
674                 if os.path.isfile(tmpfilename):
675                         resume_len = os.path.getsize(tmpfilename)
676                 else:
677                         resume_len = 0
678
679                 # Request parameters in case of being able to resume
680                 if self.params.get('continuedl', False) and resume_len != 0:
681                         self.report_resuming_byte(resume_len)
682                         request.add_header('Range','bytes=%d-' % resume_len)
683                         open_mode = 'ab'
684
685                 count = 0
686                 retries = self.params.get('retries', 0)
687                 while count <= retries:
688                         # Establish connection
689                         try:
690                                 data = urllib2.urlopen(request)
691                                 break
692                         except (urllib2.HTTPError, ), err:
693                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
694                                         # Unexpected HTTP error
695                                         raise
696                                 elif err.code == 416:
697                                         # Unable to resume (requested range not satisfiable)
698                                         try:
699                                                 # Open the connection again without the range header
700                                                 data = urllib2.urlopen(basic_request)
701                                                 content_length = data.info()['Content-Length']
702                                         except (urllib2.HTTPError, ), err:
703                                                 if err.code < 500 or err.code >= 600:
704                                                         raise
705                                         else:
706                                                 # Examine the reported length
707                                                 if (content_length is not None and
708                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
709                                                         # The file had already been fully downloaded.
710                                                         # Explanation to the above condition: in issue #175 it was revealed that
711                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
712                                                         # changing the file size slightly and causing problems for some users. So
713                                                         # I decided to implement a suggested change and consider the file
714                                                         # completely downloaded if the file size differs less than 100 bytes from
715                                                         # the one in the hard drive.
716                                                         self.report_file_already_downloaded(filename)
717                                                         self.try_rename(tmpfilename, filename)
718                                                         return True
719                                                 else:
720                                                         # The length does not match, we start the download over
721                                                         self.report_unable_to_resume()
722                                                         open_mode = 'wb'
723                                                         break
724                         # Retry
725                         count += 1
726                         if count <= retries:
727                                 self.report_retry(count, retries)
728
729                 if count > retries:
730                         self.trouble(u'ERROR: giving up after %s retries' % retries)
731                         return False
732
733                 data_len = data.info().get('Content-length', None)
734                 if data_len is not None:
735                         data_len = long(data_len) + resume_len
736                 data_len_str = self.format_bytes(data_len)
737                 byte_counter = 0 + resume_len
738                 block_size = 1024
739                 start = time.time()
740                 while True:
741                         # Download and write
742                         before = time.time()
743                         data_block = data.read(block_size)
744                         after = time.time()
745                         if len(data_block) == 0:
746                                 break
747                         byte_counter += len(data_block)
748
749                         # Open file just in time
750                         if stream is None:
751                                 try:
752                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
753                                         filename = self.undo_temp_name(tmpfilename)
754                                         self.report_destination(filename)
755                                 except (OSError, IOError), err:
756                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
757                                         return False
758                         try:
759                                 stream.write(data_block)
760                         except (IOError, OSError), err:
761                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
762                                 return False
763                         block_size = self.best_block_size(after - before, len(data_block))
764
765                         # Progress message
766                         percent_str = self.calc_percent(byte_counter, data_len)
767                         eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
768                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
769                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
770
771                         # Apply rate limit
772                         self.slow_down(start, byte_counter - resume_len)
773
774                 stream.close()
775                 self.report_finish()
776                 if data_len is not None and byte_counter != data_len:
777                         raise ContentTooShortError(byte_counter, long(data_len))
778                 self.try_rename(tmpfilename, filename)
779
780                 # Update file modification time
781                 if self.params.get('updatetime', True):
782                         self.try_utime(filename, data.info().get('last-modified', None))
783
784                 return True
785
786 class InfoExtractor(object):
787         """Information Extractor class.
788
789         Information extractors are the classes that, given a URL, extract
790         information from the video (or videos) the URL refers to. This
791         information includes the real video URL, the video title and simplified
792         title, author and others. The information is stored in a dictionary
793         which is then passed to the FileDownloader. The FileDownloader
794         processes this information possibly downloading the video to the file
795         system, among other possible outcomes. The dictionaries must include
796         the following fields:
797
798         id:             Video identifier.
799         url:            Final video URL.
800         uploader:       Nickname of the video uploader.
801         title:          Literal title.
802         stitle:         Simplified title.
803         ext:            Video filename extension.
804         format:         Video format.
805         player_url:     SWF Player URL (may be None).
806
807         The following fields are optional. Their primary purpose is to allow
808         youtube-dl to serve as the backend for a video search function, such
809         as the one in youtube2mp3.  They are only used when their respective
810         forced printing functions are called:
811
812         thumbnail:      Full URL to a video thumbnail image.
813         description:    One-line video description.
814
815         Subclasses of this one should re-define the _real_initialize() and
816         _real_extract() methods, as well as the suitable() static method.
817         Probably, they should also be instantiated and added to the main
818         downloader.
819         """
820
821         _ready = False
822         _downloader = None
823
824         def __init__(self, downloader=None):
825                 """Constructor. Receives an optional downloader."""
826                 self._ready = False
827                 self.set_downloader(downloader)
828
829         @staticmethod
830         def suitable(url):
831                 """Receives a URL and returns True if suitable for this IE."""
832                 return False
833
834         def initialize(self):
835                 """Initializes an instance (authentication, etc)."""
836                 if not self._ready:
837                         self._real_initialize()
838                         self._ready = True
839
840         def extract(self, url):
841                 """Extracts URL information and returns it in list of dicts."""
842                 self.initialize()
843                 return self._real_extract(url)
844
845         def set_downloader(self, downloader):
846                 """Sets the downloader for this IE."""
847                 self._downloader = downloader
848
849         def _real_initialize(self):
850                 """Real initialization process. Redefine in subclasses."""
851                 pass
852
853         def _real_extract(self, url):
854                 """Real extraction process. Redefine in subclasses."""
855                 pass
856
857 class YoutubeIE(InfoExtractor):
858         """Information extractor for youtube.com."""
859
860         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
861         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
862         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
863         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
864         _NETRC_MACHINE = 'youtube'
865         # Listed in order of quality
866         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
867         _video_extensions = {
868                 '13': '3gp',
869                 '17': 'mp4',
870                 '18': 'mp4',
871                 '22': 'mp4',
872                 '37': 'mp4',
873                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
874                 '43': 'webm',
875                 '45': 'webm',
876         }
877
878         @staticmethod
879         def suitable(url):
880                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
881
882         def report_lang(self):
883                 """Report attempt to set language."""
884                 self._downloader.to_screen(u'[youtube] Setting language')
885
886         def report_login(self):
887                 """Report attempt to log in."""
888                 self._downloader.to_screen(u'[youtube] Logging in')
889
890         def report_age_confirmation(self):
891                 """Report attempt to confirm age."""
892                 self._downloader.to_screen(u'[youtube] Confirming age')
893
894         def report_video_webpage_download(self, video_id):
895                 """Report attempt to download video webpage."""
896                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
897
898         def report_video_info_webpage_download(self, video_id):
899                 """Report attempt to download video info webpage."""
900                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
901
902         def report_information_extraction(self, video_id):
903                 """Report attempt to extract video information."""
904                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
905
906         def report_unavailable_format(self, video_id, format):
907                 """Report extracted video URL."""
908                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
909
910         def report_rtmp_download(self):
911                 """Indicate the download will use the RTMP protocol."""
912                 self._downloader.to_screen(u'[youtube] RTMP download detected')
913
914         def _real_initialize(self):
915                 if self._downloader is None:
916                         return
917
918                 username = None
919                 password = None
920                 downloader_params = self._downloader.params
921
922                 # Attempt to use provided username and password or .netrc data
923                 if downloader_params.get('username', None) is not None:
924                         username = downloader_params['username']
925                         password = downloader_params['password']
926                 elif downloader_params.get('usenetrc', False):
927                         try:
928                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
929                                 if info is not None:
930                                         username = info[0]
931                                         password = info[2]
932                                 else:
933                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
934                         except (IOError, netrc.NetrcParseError), err:
935                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
936                                 return
937
938                 # Set language
939                 request = urllib2.Request(self._LANG_URL)
940                 try:
941                         self.report_lang()
942                         urllib2.urlopen(request).read()
943                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
944                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
945                         return
946
947                 # No authentication to be performed
948                 if username is None:
949                         return
950
951                 # Log in
952                 login_form = {
953                                 'current_form': 'loginForm',
954                                 'next':         '/',
955                                 'action_login': 'Log In',
956                                 'username':     username,
957                                 'password':     password,
958                                 }
959                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
960                 try:
961                         self.report_login()
962                         login_results = urllib2.urlopen(request).read()
963                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
964                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
965                                 return
966                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
967                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
968                         return
969
970                 # Confirm age
971                 age_form = {
972                                 'next_url':             '/',
973                                 'action_confirm':       'Confirm',
974                                 }
975                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
976                 try:
977                         self.report_age_confirmation()
978                         age_results = urllib2.urlopen(request).read()
979                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
980                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
981                         return
982
983         def _real_extract(self, url):
984                 # Extract video id from URL
985                 mobj = re.match(self._VALID_URL, url)
986                 if mobj is None:
987                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
988                         return
989                 video_id = mobj.group(2)
990
991                 # Get video webpage
992                 self.report_video_webpage_download(video_id)
993                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
994                 try:
995                         video_webpage = urllib2.urlopen(request).read()
996                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
997                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
998                         return
999
1000                 # Attempt to extract SWF player URL
1001                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1002                 if mobj is not None:
1003                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1004                 else:
1005                         player_url = None
1006
1007                 # Get video info
1008                 self.report_video_info_webpage_download(video_id)
1009                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1010                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1011                                            % (video_id, el_type))
1012                         request = urllib2.Request(video_info_url)
1013                         try:
1014                                 video_info_webpage = urllib2.urlopen(request).read()
1015                                 video_info = parse_qs(video_info_webpage)
1016                                 if 'token' in video_info:
1017                                         break
1018                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1019                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1020                                 return
1021                 if 'token' not in video_info:
1022                         if 'reason' in video_info:
1023                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1024                         else:
1025                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1026                         return
1027
1028                 # Start extracting information
1029                 self.report_information_extraction(video_id)
1030
1031                 # uploader
1032                 if 'author' not in video_info:
1033                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1034                         return
1035                 video_uploader = urllib.unquote_plus(video_info['author'][0])
1036
1037                 # title
1038                 if 'title' not in video_info:
1039                         self._downloader.trouble(u'ERROR: unable to extract video title')
1040                         return
1041                 video_title = urllib.unquote_plus(video_info['title'][0])
1042                 video_title = video_title.decode('utf-8')
1043                 video_title = sanitize_title(video_title)
1044
1045                 # simplified title
1046                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1047                 simple_title = simple_title.strip(ur'_')
1048
1049                 # thumbnail image
1050                 if 'thumbnail_url' not in video_info:
1051                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1052                         video_thumbnail = ''
1053                 else:   # don't panic if we can't find it
1054                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1055
1056                 # upload date
1057                 upload_date = u'NA'
1058                 mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
1059                 if mobj is not None:
1060                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1061                         format_expressions = ['%d %B %Y', '%B %d %Y']
1062                         for expression in format_expressions:
1063                                 try:
1064                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1065                                 except:
1066                                         pass
1067
1068                 # description
1069                 video_description = 'No description available.'
1070                 if self._downloader.params.get('forcedescription', False):
1071                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1072                         if mobj is not None:
1073                                 video_description = mobj.group(1)
1074
1075                 # token
1076                 video_token = urllib.unquote_plus(video_info['token'][0])
1077
1078                 # Decide which formats to download
1079                 req_format = self._downloader.params.get('format', None)
1080
1081                 if 'fmt_url_map' in video_info:
1082                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
1083                         format_limit = self._downloader.params.get('format_limit', None)
1084                         if format_limit is not None and format_limit in self._available_formats:
1085                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
1086                         else:
1087                                 format_list = self._available_formats
1088                         existing_formats = [x for x in format_list if x in url_map]
1089                         if len(existing_formats) == 0:
1090                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1091                                 return
1092                         if req_format is None:
1093                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1094                         elif req_format == '-1':
1095                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1096                         else:
1097                                 # Specific format
1098                                 if req_format not in url_map:
1099                                         self._downloader.trouble(u'ERROR: requested format not available')
1100                                         return
1101                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
1102
1103                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1104                         self.report_rtmp_download()
1105                         video_url_list = [(None, video_info['conn'][0])]
1106
1107                 else:
1108                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
1109                         return
1110
1111                 for format_param, video_real_url in video_url_list:
1112                         # At this point we have a new video
1113                         self._downloader.increment_downloads()
1114
1115                         # Extension
1116                         video_extension = self._video_extensions.get(format_param, 'flv')
1117
1118                         # Find the video URL in fmt_url_map or conn paramters
1119                         try:
1120                                 # Process video information
1121                                 self._downloader.process_info({
1122                                         'id':           video_id.decode('utf-8'),
1123                                         'url':          video_real_url.decode('utf-8'),
1124                                         'uploader':     video_uploader.decode('utf-8'),
1125                                         'upload_date':  upload_date,
1126                                         'title':        video_title,
1127                                         'stitle':       simple_title,
1128                                         'ext':          video_extension.decode('utf-8'),
1129                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1130                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1131                                         'description':  video_description.decode('utf-8'),
1132                                         'player_url':   player_url,
1133                                 })
1134                         except UnavailableVideoError, err:
1135                                 self._downloader.trouble(u'\nERROR: unable to download video')
1136
1137
1138 class MetacafeIE(InfoExtractor):
1139         """Information Extractor for metacafe.com."""
1140
1141         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1142         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1143         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1144         _youtube_ie = None
1145
1146         def __init__(self, youtube_ie, downloader=None):
1147                 InfoExtractor.__init__(self, downloader)
1148                 self._youtube_ie = youtube_ie
1149
1150         @staticmethod
1151         def suitable(url):
1152                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1153
1154         def report_disclaimer(self):
1155                 """Report disclaimer retrieval."""
1156                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1157
1158         def report_age_confirmation(self):
1159                 """Report attempt to confirm age."""
1160                 self._downloader.to_screen(u'[metacafe] Confirming age')
1161
1162         def report_download_webpage(self, video_id):
1163                 """Report webpage download."""
1164                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1165
1166         def report_extraction(self, video_id):
1167                 """Report information extraction."""
1168                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1169
1170         def _real_initialize(self):
1171                 # Retrieve disclaimer
1172                 request = urllib2.Request(self._DISCLAIMER)
1173                 try:
1174                         self.report_disclaimer()
1175                         disclaimer = urllib2.urlopen(request).read()
1176                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1177                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1178                         return
1179
1180                 # Confirm age
1181                 disclaimer_form = {
1182                         'filters': '0',
1183                         'submit': "Continue - I'm over 18",
1184                         }
1185                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1186                 try:
1187                         self.report_age_confirmation()
1188                         disclaimer = urllib2.urlopen(request).read()
1189                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1190                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1191                         return
1192
1193         def _real_extract(self, url):
1194                 # Extract id and simplified title from URL
1195                 mobj = re.match(self._VALID_URL, url)
1196                 if mobj is None:
1197                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1198                         return
1199
1200                 video_id = mobj.group(1)
1201
1202                 # Check if video comes from YouTube
1203                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1204                 if mobj2 is not None:
1205                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1206                         return
1207
1208                 # At this point we have a new video
1209                 self._downloader.increment_downloads()
1210
1211                 simple_title = mobj.group(2).decode('utf-8')
1212
1213                 # Retrieve video webpage to extract further information
1214                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1215                 try:
1216                         self.report_download_webpage(video_id)
1217                         webpage = urllib2.urlopen(request).read()
1218                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1219                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1220                         return
1221
1222                 # Extract URL, uploader and title from webpage
1223                 self.report_extraction(video_id)
1224                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1225                 if mobj is not None:
1226                         mediaURL = urllib.unquote(mobj.group(1))
1227                         video_extension = mediaURL[-3:]
1228
1229                         # Extract gdaKey if available
1230                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1231                         if mobj is None:
1232                                 video_url = mediaURL
1233                         else:
1234                                 gdaKey = mobj.group(1)
1235                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1236                 else:
1237                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1238                         if mobj is None:
1239                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1240                                 return
1241                         vardict = parse_qs(mobj.group(1))
1242                         if 'mediaData' not in vardict:
1243                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1244                                 return
1245                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1246                         if mobj is None:
1247                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1248                                 return
1249                         mediaURL = mobj.group(1).replace('\\/', '/')
1250                         video_extension = mediaURL[-3:]
1251                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1252
1253                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1254                 if mobj is None:
1255                         self._downloader.trouble(u'ERROR: unable to extract title')
1256                         return
1257                 video_title = mobj.group(1).decode('utf-8')
1258                 video_title = sanitize_title(video_title)
1259
1260                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1261                 if mobj is None:
1262                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1263                         return
1264                 video_uploader = mobj.group(1)
1265
1266                 try:
1267                         # Process video information
1268                         self._downloader.process_info({
1269                                 'id':           video_id.decode('utf-8'),
1270                                 'url':          video_url.decode('utf-8'),
1271                                 'uploader':     video_uploader.decode('utf-8'),
1272                                 'upload_date':  u'NA',
1273                                 'title':        video_title,
1274                                 'stitle':       simple_title,
1275                                 'ext':          video_extension.decode('utf-8'),
1276                                 'format':       u'NA',
1277                                 'player_url':   None,
1278                         })
1279                 except UnavailableVideoError:
1280                         self._downloader.trouble(u'\nERROR: unable to download video')
1281
1282
1283 class DailymotionIE(InfoExtractor):
1284         """Information Extractor for Dailymotion"""
1285
1286         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1287
1288         def __init__(self, downloader=None):
1289                 InfoExtractor.__init__(self, downloader)
1290
1291         @staticmethod
1292         def suitable(url):
1293                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1294
1295         def report_download_webpage(self, video_id):
1296                 """Report webpage download."""
1297                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1298
1299         def report_extraction(self, video_id):
1300                 """Report information extraction."""
1301                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1302
1303         def _real_initialize(self):
1304                 return
1305
1306         def _real_extract(self, url):
1307                 # Extract id and simplified title from URL
1308                 mobj = re.match(self._VALID_URL, url)
1309                 if mobj is None:
1310                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1311                         return
1312
1313                 # At this point we have a new video
1314                 self._downloader.increment_downloads()
1315                 video_id = mobj.group(1)
1316
1317                 simple_title = mobj.group(2).decode('utf-8')
1318                 video_extension = 'flv'
1319
1320                 # Retrieve video webpage to extract further information
1321                 request = urllib2.Request(url)
1322                 try:
1323                         self.report_download_webpage(video_id)
1324                         webpage = urllib2.urlopen(request).read()
1325                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1326                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1327                         return
1328
1329                 # Extract URL, uploader and title from webpage
1330                 self.report_extraction(video_id)
1331                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1332                 if mobj is None:
1333                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1334                         return
1335                 mediaURL = urllib.unquote(mobj.group(1))
1336
1337                 # if needed add http://www.dailymotion.com/ if relative URL
1338
1339                 video_url = mediaURL
1340
1341                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1342                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1343                 if mobj is None:
1344                         self._downloader.trouble(u'ERROR: unable to extract title')
1345                         return
1346                 video_title = mobj.group(1).decode('utf-8')
1347                 video_title = sanitize_title(video_title)
1348
1349                 mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
1350                 if mobj is None:
1351                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1352                         return
1353                 video_uploader = mobj.group(1)
1354
1355                 try:
1356                         # Process video information
1357                         self._downloader.process_info({
1358                                 'id':           video_id.decode('utf-8'),
1359                                 'url':          video_url.decode('utf-8'),
1360                                 'uploader':     video_uploader.decode('utf-8'),
1361                                 'upload_date':  u'NA',
1362                                 'title':        video_title,
1363                                 'stitle':       simple_title,
1364                                 'ext':          video_extension.decode('utf-8'),
1365                                 'format':       u'NA',
1366                                 'player_url':   None,
1367                         })
1368                 except UnavailableVideoError:
1369                         self._downloader.trouble(u'\nERROR: unable to download video')
1370
1371 class GoogleIE(InfoExtractor):
1372         """Information extractor for video.google.com."""
1373
1374         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1375
1376         def __init__(self, downloader=None):
1377                 InfoExtractor.__init__(self, downloader)
1378
1379         @staticmethod
1380         def suitable(url):
1381                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1382
1383         def report_download_webpage(self, video_id):
1384                 """Report webpage download."""
1385                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1386
1387         def report_extraction(self, video_id):
1388                 """Report information extraction."""
1389                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1390
1391         def _real_initialize(self):
1392                 return
1393
1394         def _real_extract(self, url):
1395                 # Extract id from URL
1396                 mobj = re.match(self._VALID_URL, url)
1397                 if mobj is None:
1398                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1399                         return
1400
1401                 # At this point we have a new video
1402                 self._downloader.increment_downloads()
1403                 video_id = mobj.group(1)
1404
1405                 video_extension = 'mp4'
1406
1407                 # Retrieve video webpage to extract further information
1408                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1409                 try:
1410                         self.report_download_webpage(video_id)
1411                         webpage = urllib2.urlopen(request).read()
1412                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1413                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1414                         return
1415
1416                 # Extract URL, uploader, and title from webpage
1417                 self.report_extraction(video_id)
1418                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1419                 if mobj is None:
1420                         video_extension = 'flv'
1421                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1422                 if mobj is None:
1423                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1424                         return
1425                 mediaURL = urllib.unquote(mobj.group(1))
1426                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1427                 mediaURL = mediaURL.replace('\\x26', '\x26')
1428
1429                 video_url = mediaURL
1430
1431                 mobj = re.search(r'<title>(.*)</title>', webpage)
1432                 if mobj is None:
1433                         self._downloader.trouble(u'ERROR: unable to extract title')
1434                         return
1435                 video_title = mobj.group(1).decode('utf-8')
1436                 video_title = sanitize_title(video_title)
1437                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1438
1439                 # Extract video description
1440                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1441                 if mobj is None:
1442                         self._downloader.trouble(u'ERROR: unable to extract video description')
1443                         return
1444                 video_description = mobj.group(1).decode('utf-8')
1445                 if not video_description:
1446                         video_description = 'No description available.'
1447
1448                 # Extract video thumbnail
1449                 if self._downloader.params.get('forcethumbnail', False):
1450                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1451                         try:
1452                                 webpage = urllib2.urlopen(request).read()
1453                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1454                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1455                                 return
1456                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1457                         if mobj is None:
1458                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1459                                 return
1460                         video_thumbnail = mobj.group(1)
1461                 else:   # we need something to pass to process_info
1462                         video_thumbnail = ''
1463
1464
1465                 try:
1466                         # Process video information
1467                         self._downloader.process_info({
1468                                 'id':           video_id.decode('utf-8'),
1469                                 'url':          video_url.decode('utf-8'),
1470                                 'uploader':     u'NA',
1471                                 'upload_date':  u'NA',
1472                                 'title':        video_title,
1473                                 'stitle':       simple_title,
1474                                 'ext':          video_extension.decode('utf-8'),
1475                                 'format':       u'NA',
1476                                 'player_url':   None,
1477                         })
1478                 except UnavailableVideoError:
1479                         self._downloader.trouble(u'\nERROR: unable to download video')
1480
1481
1482 class PhotobucketIE(InfoExtractor):
1483         """Information extractor for photobucket.com."""
1484
1485         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1486
1487         def __init__(self, downloader=None):
1488                 InfoExtractor.__init__(self, downloader)
1489
1490         @staticmethod
1491         def suitable(url):
1492                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1493
1494         def report_download_webpage(self, video_id):
1495                 """Report webpage download."""
1496                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1497
1498         def report_extraction(self, video_id):
1499                 """Report information extraction."""
1500                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1501
1502         def _real_initialize(self):
1503                 return
1504
1505         def _real_extract(self, url):
1506                 # Extract id from URL
1507                 mobj = re.match(self._VALID_URL, url)
1508                 if mobj is None:
1509                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1510                         return
1511
1512                 # At this point we have a new video
1513                 self._downloader.increment_downloads()
1514                 video_id = mobj.group(1)
1515
1516                 video_extension = 'flv'
1517
1518                 # Retrieve video webpage to extract further information
1519                 request = urllib2.Request(url)
1520                 try:
1521                         self.report_download_webpage(video_id)
1522                         webpage = urllib2.urlopen(request).read()
1523                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1524                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1525                         return
1526
1527                 # Extract URL, uploader, and title from webpage
1528                 self.report_extraction(video_id)
1529                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1530                 if mobj is None:
1531                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1532                         return
1533                 mediaURL = urllib.unquote(mobj.group(1))
1534
1535                 video_url = mediaURL
1536
1537                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1538                 if mobj is None:
1539                         self._downloader.trouble(u'ERROR: unable to extract title')
1540                         return
1541                 video_title = mobj.group(1).decode('utf-8')
1542                 video_title = sanitize_title(video_title)
1543                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1544
1545                 video_uploader = mobj.group(2).decode('utf-8')
1546
1547                 try:
1548                         # Process video information
1549                         self._downloader.process_info({
1550                                 'id':           video_id.decode('utf-8'),
1551                                 'url':          video_url.decode('utf-8'),
1552                                 'uploader':     video_uploader,
1553                                 'upload_date':  u'NA',
1554                                 'title':        video_title,
1555                                 'stitle':       simple_title,
1556                                 'ext':          video_extension.decode('utf-8'),
1557                                 'format':       u'NA',
1558                                 'player_url':   None,
1559                         })
1560                 except UnavailableVideoError:
1561                         self._downloader.trouble(u'\nERROR: unable to download video')
1562
1563
1564 class YahooIE(InfoExtractor):
1565         """Information extractor for video.yahoo.com."""
1566
1567         # _VALID_URL matches all Yahoo! Video URLs
1568         # _VPAGE_URL matches only the extractable '/watch/' URLs
1569         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1570         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1571
1572         def __init__(self, downloader=None):
1573                 InfoExtractor.__init__(self, downloader)
1574
1575         @staticmethod
1576         def suitable(url):
1577                 return (re.match(YahooIE._VALID_URL, url) is not None)
1578
1579         def report_download_webpage(self, video_id):
1580                 """Report webpage download."""
1581                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1582
1583         def report_extraction(self, video_id):
1584                 """Report information extraction."""
1585                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1586
1587         def _real_initialize(self):
1588                 return
1589
1590         def _real_extract(self, url, new_video=True):
1591                 # Extract ID from URL
1592                 mobj = re.match(self._VALID_URL, url)
1593                 if mobj is None:
1594                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1595                         return
1596
1597                 # At this point we have a new video
1598                 self._downloader.increment_downloads()
1599                 video_id = mobj.group(2)
1600                 video_extension = 'flv'
1601
1602                 # Rewrite valid but non-extractable URLs as
1603                 # extractable English language /watch/ URLs
1604                 if re.match(self._VPAGE_URL, url) is None:
1605                         request = urllib2.Request(url)
1606                         try:
1607                                 webpage = urllib2.urlopen(request).read()
1608                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1609                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1610                                 return
1611
1612                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1613                         if mobj is None:
1614                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1615                                 return
1616                         yahoo_id = mobj.group(1)
1617
1618                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1619                         if mobj is None:
1620                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1621                                 return
1622                         yahoo_vid = mobj.group(1)
1623
1624                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1625                         return self._real_extract(url, new_video=False)
1626
1627                 # Retrieve video webpage to extract further information
1628                 request = urllib2.Request(url)
1629                 try:
1630                         self.report_download_webpage(video_id)
1631                         webpage = urllib2.urlopen(request).read()
1632                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1633                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1634                         return
1635
1636                 # Extract uploader and title from webpage
1637                 self.report_extraction(video_id)
1638                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1639                 if mobj is None:
1640                         self._downloader.trouble(u'ERROR: unable to extract video title')
1641                         return
1642                 video_title = mobj.group(1).decode('utf-8')
1643                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1644
1645                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1646                 if mobj is None:
1647                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1648                         return
1649                 video_uploader = mobj.group(1).decode('utf-8')
1650
1651                 # Extract video thumbnail
1652                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1653                 if mobj is None:
1654                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1655                         return
1656                 video_thumbnail = mobj.group(1).decode('utf-8')
1657
1658                 # Extract video description
1659                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1660                 if mobj is None:
1661                         self._downloader.trouble(u'ERROR: unable to extract video description')
1662                         return
1663                 video_description = mobj.group(1).decode('utf-8')
1664                 if not video_description: video_description = 'No description available.'
1665
1666                 # Extract video height and width
1667                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1668                 if mobj is None:
1669                         self._downloader.trouble(u'ERROR: unable to extract video height')
1670                         return
1671                 yv_video_height = mobj.group(1)
1672
1673                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1674                 if mobj is None:
1675                         self._downloader.trouble(u'ERROR: unable to extract video width')
1676                         return
1677                 yv_video_width = mobj.group(1)
1678
1679                 # Retrieve video playlist to extract media URL
1680                 # I'm not completely sure what all these options are, but we
1681                 # seem to need most of them, otherwise the server sends a 401.
1682                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1683                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1684                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1685                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1686                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1687                 try:
1688                         self.report_download_webpage(video_id)
1689                         webpage = urllib2.urlopen(request).read()
1690                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1691                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1692                         return
1693
1694                 # Extract media URL from playlist XML
1695                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1696                 if mobj is None:
1697                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1698                         return
1699                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1700                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1701
1702                 try:
1703                         # Process video information
1704                         self._downloader.process_info({
1705                                 'id':           video_id.decode('utf-8'),
1706                                 'url':          video_url,
1707                                 'uploader':     video_uploader,
1708                                 'upload_date':  u'NA',
1709                                 'title':        video_title,
1710                                 'stitle':       simple_title,
1711                                 'ext':          video_extension.decode('utf-8'),
1712                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1713                                 'description':  video_description,
1714                                 'thumbnail':    video_thumbnail,
1715                                 'description':  video_description,
1716                                 'player_url':   None,
1717                         })
1718                 except UnavailableVideoError:
1719                         self._downloader.trouble(u'\nERROR: unable to download video')
1720
1721
1722 class GenericIE(InfoExtractor):
1723         """Generic last-resort information extractor."""
1724
1725         def __init__(self, downloader=None):
1726                 InfoExtractor.__init__(self, downloader)
1727
1728         @staticmethod
1729         def suitable(url):
1730                 return True
1731
1732         def report_download_webpage(self, video_id):
1733                 """Report webpage download."""
1734                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1735                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1736
1737         def report_extraction(self, video_id):
1738                 """Report information extraction."""
1739                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1740
1741         def _real_initialize(self):
1742                 return
1743
1744         def _real_extract(self, url):
1745                 # At this point we have a new video
1746                 self._downloader.increment_downloads()
1747
1748                 video_id = url.split('/')[-1]
1749                 request = urllib2.Request(url)
1750                 try:
1751                         self.report_download_webpage(video_id)
1752                         webpage = urllib2.urlopen(request).read()
1753                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1754                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1755                         return
1756                 except ValueError, err:
1757                         # since this is the last-resort InfoExtractor, if
1758                         # this error is thrown, it'll be thrown here
1759                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1760                         return
1761
1762                 self.report_extraction(video_id)
1763                 # Start with something easy: JW Player in SWFObject
1764                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1765                 if mobj is None:
1766                         # Broaden the search a little bit
1767                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1768                 if mobj is None:
1769                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1770                         return
1771
1772                 # It's possible that one of the regexes
1773                 # matched, but returned an empty group:
1774                 if mobj.group(1) is None:
1775                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1776                         return
1777
1778                 video_url = urllib.unquote(mobj.group(1))
1779                 video_id  = os.path.basename(video_url)
1780
1781                 # here's a fun little line of code for you:
1782                 video_extension = os.path.splitext(video_id)[1][1:]
1783                 video_id        = os.path.splitext(video_id)[0]
1784
1785                 # it's tempting to parse this further, but you would
1786                 # have to take into account all the variations like
1787                 #   Video Title - Site Name
1788                 #   Site Name | Video Title
1789                 #   Video Title - Tagline | Site Name
1790                 # and so on and so forth; it's just not practical
1791                 mobj = re.search(r'<title>(.*)</title>', webpage)
1792                 if mobj is None:
1793                         self._downloader.trouble(u'ERROR: unable to extract title')
1794                         return
1795                 video_title = mobj.group(1).decode('utf-8')
1796                 video_title = sanitize_title(video_title)
1797                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1798
1799                 # video uploader is domain name
1800                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1801                 if mobj is None:
1802                         self._downloader.trouble(u'ERROR: unable to extract title')
1803                         return
1804                 video_uploader = mobj.group(1).decode('utf-8')
1805
1806                 try:
1807                         # Process video information
1808                         self._downloader.process_info({
1809                                 'id':           video_id.decode('utf-8'),
1810                                 'url':          video_url.decode('utf-8'),
1811                                 'uploader':     video_uploader,
1812                                 'upload_date':  u'NA',
1813                                 'title':        video_title,
1814                                 'stitle':       simple_title,
1815                                 'ext':          video_extension.decode('utf-8'),
1816                                 'format':       u'NA',
1817                                 'player_url':   None,
1818                         })
1819                 except UnavailableVideoError, err:
1820                         self._downloader.trouble(u'\nERROR: unable to download video')
1821
1822
1823 class YoutubeSearchIE(InfoExtractor):
1824         """Information Extractor for YouTube search queries."""
1825         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1826         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1827         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1828         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1829         _youtube_ie = None
1830         _max_youtube_results = 1000
1831
1832         def __init__(self, youtube_ie, downloader=None):
1833                 InfoExtractor.__init__(self, downloader)
1834                 self._youtube_ie = youtube_ie
1835
1836         @staticmethod
1837         def suitable(url):
1838                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1839
1840         def report_download_page(self, query, pagenum):
1841                 """Report attempt to download playlist page with given number."""
1842                 query = query.decode(preferredencoding())
1843                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1844
1845         def _real_initialize(self):
1846                 self._youtube_ie.initialize()
1847
1848         def _real_extract(self, query):
1849                 mobj = re.match(self._VALID_QUERY, query)
1850                 if mobj is None:
1851                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1852                         return
1853
1854                 prefix, query = query.split(':')
1855                 prefix = prefix[8:]
1856                 query  = query.encode('utf-8')
1857                 if prefix == '':
1858                         self._download_n_results(query, 1)
1859                         return
1860                 elif prefix == 'all':
1861                         self._download_n_results(query, self._max_youtube_results)
1862                         return
1863                 else:
1864                         try:
1865                                 n = long(prefix)
1866                                 if n <= 0:
1867                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1868                                         return
1869                                 elif n > self._max_youtube_results:
1870                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1871                                         n = self._max_youtube_results
1872                                 self._download_n_results(query, n)
1873                                 return
1874                         except ValueError: # parsing prefix as integer fails
1875                                 self._download_n_results(query, 1)
1876                                 return
1877
1878         def _download_n_results(self, query, n):
1879                 """Downloads a specified number of results for a query"""
1880
1881                 video_ids = []
1882                 already_seen = set()
1883                 pagenum = 1
1884
1885                 while True:
1886                         self.report_download_page(query, pagenum)
1887                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1888                         request = urllib2.Request(result_url)
1889                         try:
1890                                 page = urllib2.urlopen(request).read()
1891                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1892                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1893                                 return
1894
1895                         # Extract video identifiers
1896                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1897                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1898                                 if video_id not in already_seen:
1899                                         video_ids.append(video_id)
1900                                         already_seen.add(video_id)
1901                                         if len(video_ids) == n:
1902                                                 # Specified n videos reached
1903                                                 for id in video_ids:
1904                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1905                                                 return
1906
1907                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1908                                 for id in video_ids:
1909                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1910                                 return
1911
1912                         pagenum = pagenum + 1
1913
1914 class GoogleSearchIE(InfoExtractor):
1915         """Information Extractor for Google Video search queries."""
1916         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1917         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1918         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1919         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1920         _google_ie = None
1921         _max_google_results = 1000
1922
1923         def __init__(self, google_ie, downloader=None):
1924                 InfoExtractor.__init__(self, downloader)
1925                 self._google_ie = google_ie
1926
1927         @staticmethod
1928         def suitable(url):
1929                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1930
1931         def report_download_page(self, query, pagenum):
1932                 """Report attempt to download playlist page with given number."""
1933                 query = query.decode(preferredencoding())
1934                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1935
1936         def _real_initialize(self):
1937                 self._google_ie.initialize()
1938
1939         def _real_extract(self, query):
1940                 mobj = re.match(self._VALID_QUERY, query)
1941                 if mobj is None:
1942                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1943                         return
1944
1945                 prefix, query = query.split(':')
1946                 prefix = prefix[8:]
1947                 query  = query.encode('utf-8')
1948                 if prefix == '':
1949                         self._download_n_results(query, 1)
1950                         return
1951                 elif prefix == 'all':
1952                         self._download_n_results(query, self._max_google_results)
1953                         return
1954                 else:
1955                         try:
1956                                 n = long(prefix)
1957                                 if n <= 0:
1958                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1959                                         return
1960                                 elif n > self._max_google_results:
1961                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1962                                         n = self._max_google_results
1963                                 self._download_n_results(query, n)
1964                                 return
1965                         except ValueError: # parsing prefix as integer fails
1966                                 self._download_n_results(query, 1)
1967                                 return
1968
1969         def _download_n_results(self, query, n):
1970                 """Downloads a specified number of results for a query"""
1971
1972                 video_ids = []
1973                 already_seen = set()
1974                 pagenum = 1
1975
1976                 while True:
1977                         self.report_download_page(query, pagenum)
1978                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1979                         request = urllib2.Request(result_url)
1980                         try:
1981                                 page = urllib2.urlopen(request).read()
1982                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1983                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1984                                 return
1985
1986                         # Extract video identifiers
1987                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1988                                 video_id = mobj.group(1)
1989                                 if video_id not in already_seen:
1990                                         video_ids.append(video_id)
1991                                         already_seen.add(video_id)
1992                                         if len(video_ids) == n:
1993                                                 # Specified n videos reached
1994                                                 for id in video_ids:
1995                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1996                                                 return
1997
1998                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1999                                 for id in video_ids:
2000                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2001                                 return
2002
2003                         pagenum = pagenum + 1
2004
2005 class YahooSearchIE(InfoExtractor):
2006         """Information Extractor for Yahoo! Video search queries."""
2007         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
2008         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2009         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2010         _MORE_PAGES_INDICATOR = r'\s*Next'
2011         _yahoo_ie = None
2012         _max_yahoo_results = 1000
2013
2014         def __init__(self, yahoo_ie, downloader=None):
2015                 InfoExtractor.__init__(self, downloader)
2016                 self._yahoo_ie = yahoo_ie
2017
2018         @staticmethod
2019         def suitable(url):
2020                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
2021
2022         def report_download_page(self, query, pagenum):
2023                 """Report attempt to download playlist page with given number."""
2024                 query = query.decode(preferredencoding())
2025                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2026
2027         def _real_initialize(self):
2028                 self._yahoo_ie.initialize()
2029
2030         def _real_extract(self, query):
2031                 mobj = re.match(self._VALID_QUERY, query)
2032                 if mobj is None:
2033                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2034                         return
2035
2036                 prefix, query = query.split(':')
2037                 prefix = prefix[8:]
2038                 query  = query.encode('utf-8')
2039                 if prefix == '':
2040                         self._download_n_results(query, 1)
2041                         return
2042                 elif prefix == 'all':
2043                         self._download_n_results(query, self._max_yahoo_results)
2044                         return
2045                 else:
2046                         try:
2047                                 n = long(prefix)
2048                                 if n <= 0:
2049                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2050                                         return
2051                                 elif n > self._max_yahoo_results:
2052                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
2053                                         n = self._max_yahoo_results
2054                                 self._download_n_results(query, n)
2055                                 return
2056                         except ValueError: # parsing prefix as integer fails
2057                                 self._download_n_results(query, 1)
2058                                 return
2059
2060         def _download_n_results(self, query, n):
2061                 """Downloads a specified number of results for a query"""
2062
2063                 video_ids = []
2064                 already_seen = set()
2065                 pagenum = 1
2066
2067                 while True:
2068                         self.report_download_page(query, pagenum)
2069                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2070                         request = urllib2.Request(result_url)
2071                         try:
2072                                 page = urllib2.urlopen(request).read()
2073                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2074                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2075                                 return
2076
2077                         # Extract video identifiers
2078                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2079                                 video_id = mobj.group(1)
2080                                 if video_id not in already_seen:
2081                                         video_ids.append(video_id)
2082                                         already_seen.add(video_id)
2083                                         if len(video_ids) == n:
2084                                                 # Specified n videos reached
2085                                                 for id in video_ids:
2086                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2087                                                 return
2088
2089                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2090                                 for id in video_ids:
2091                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2092                                 return
2093
2094                         pagenum = pagenum + 1
2095
2096 class YoutubePlaylistIE(InfoExtractor):
2097         """Information Extractor for YouTube playlists."""
2098
2099         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2100         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2101         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2102         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2103         _youtube_ie = None
2104
2105         def __init__(self, youtube_ie, downloader=None):
2106                 InfoExtractor.__init__(self, downloader)
2107                 self._youtube_ie = youtube_ie
2108
2109         @staticmethod
2110         def suitable(url):
2111                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2112
2113         def report_download_page(self, playlist_id, pagenum):
2114                 """Report attempt to download playlist page with given number."""
2115                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2116
2117         def _real_initialize(self):
2118                 self._youtube_ie.initialize()
2119
2120         def _real_extract(self, url):
2121                 # Extract playlist id
2122                 mobj = re.match(self._VALID_URL, url)
2123                 if mobj is None:
2124                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2125                         return
2126
2127                 # Single video case
2128                 if mobj.group(3) is not None:
2129                         self._youtube_ie.extract(mobj.group(3))
2130                         return
2131
2132                 # Download playlist pages
2133                 # prefix is 'p' as default for playlists but there are other types that need extra care
2134                 playlist_prefix = mobj.group(1)
2135                 if playlist_prefix == 'a':
2136                         playlist_access = 'artist'
2137                 else:
2138                         playlist_prefix = 'p'
2139                         playlist_access = 'view_play_list'
2140                 playlist_id = mobj.group(2)
2141                 video_ids = []
2142                 pagenum = 1
2143
2144                 while True:
2145                         self.report_download_page(playlist_id, pagenum)
2146                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
2147                         try:
2148                                 page = urllib2.urlopen(request).read()
2149                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2150                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2151                                 return
2152
2153                         # Extract video identifiers
2154                         ids_in_page = []
2155                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2156                                 if mobj.group(1) not in ids_in_page:
2157                                         ids_in_page.append(mobj.group(1))
2158                         video_ids.extend(ids_in_page)
2159
2160                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2161                                 break
2162                         pagenum = pagenum + 1
2163
2164                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2165                 playlistend = self._downloader.params.get('playlistend', -1)
2166                 video_ids = video_ids[playliststart:playlistend]
2167
2168                 for id in video_ids:
2169                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2170                 return
2171
2172 class YoutubeUserIE(InfoExtractor):
2173         """Information Extractor for YouTube users."""
2174
2175         _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2176         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2177         _GDATA_PAGE_SIZE = 50
2178         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2179         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2180         _youtube_ie = None
2181
2182         def __init__(self, youtube_ie, downloader=None):
2183                 InfoExtractor.__init__(self, downloader)
2184                 self._youtube_ie = youtube_ie
2185
2186         @staticmethod
2187         def suitable(url):
2188                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2189
2190         def report_download_page(self, username, start_index):
2191                 """Report attempt to download user page."""
2192                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2193                                            (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2194
2195         def _real_initialize(self):
2196                 self._youtube_ie.initialize()
2197
2198         def _real_extract(self, url):
2199                 # Extract username
2200                 mobj = re.match(self._VALID_URL, url)
2201                 if mobj is None:
2202                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2203                         return
2204
2205                 username = mobj.group(1)
2206
2207                 # Download video ids using YouTube Data API. Result size per
2208                 # query is limited (currently to 50 videos) so we need to query
2209                 # page by page until there are no video ids - it means we got
2210                 # all of them.
2211
2212                 video_ids = []
2213                 pagenum = 0
2214
2215                 while True:
2216                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2217                         self.report_download_page(username, start_index)
2218
2219                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2220
2221                         try:
2222                                 page = urllib2.urlopen(request).read()
2223                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2224                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2225                                 return
2226
2227                         # Extract video identifiers
2228                         ids_in_page = []
2229
2230                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2231                                 if mobj.group(1) not in ids_in_page:
2232                                         ids_in_page.append(mobj.group(1))
2233
2234                         video_ids.extend(ids_in_page)
2235
2236                         # A little optimization - if current page is not
2237                         # "full", ie. does not contain PAGE_SIZE video ids then
2238                         # we can assume that this page is the last one - there
2239                         # are no more ids on further pages - no need to query
2240                         # again.
2241
2242                         if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2243                                 break
2244
2245                         pagenum += 1
2246
2247                 all_ids_count = len(video_ids)
2248                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2249                 playlistend = self._downloader.params.get('playlistend', -1)
2250
2251                 if playlistend == -1:
2252                         video_ids = video_ids[playliststart:]
2253                 else:
2254                         video_ids = video_ids[playliststart:playlistend]
2255                         
2256                 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2257                                            (username, all_ids_count, len(video_ids)))
2258
2259                 for video_id in video_ids:
2260                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2261
2262
2263 class DepositFilesIE(InfoExtractor):
2264         """Information extractor for depositfiles.com"""
2265
2266         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2267
2268         def __init__(self, downloader=None):
2269                 InfoExtractor.__init__(self, downloader)
2270
2271         @staticmethod
2272         def suitable(url):
2273                 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2274
2275         def report_download_webpage(self, file_id):
2276                 """Report webpage download."""
2277                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2278
2279         def report_extraction(self, file_id):
2280                 """Report information extraction."""
2281                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2282
2283         def _real_initialize(self):
2284                 return
2285
2286         def _real_extract(self, url):
2287                 # At this point we have a new file
2288                 self._downloader.increment_downloads()
2289
2290                 file_id = url.split('/')[-1]
2291                 # Rebuild url in english locale
2292                 url = 'http://depositfiles.com/en/files/' + file_id
2293
2294                 # Retrieve file webpage with 'Free download' button pressed
2295                 free_download_indication = { 'gateway_result' : '1' }
2296                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2297                 try:
2298                         self.report_download_webpage(file_id)
2299                         webpage = urllib2.urlopen(request).read()
2300                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2301                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2302                         return
2303
2304                 # Search for the real file URL
2305                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2306                 if (mobj is None) or (mobj.group(1) is None):
2307                         # Try to figure out reason of the error.
2308                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2309                         if (mobj is not None) and (mobj.group(1) is not None):
2310                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2311                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2312                         else:
2313                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2314                         return
2315
2316                 file_url = mobj.group(1)
2317                 file_extension = os.path.splitext(file_url)[1][1:]
2318
2319                 # Search for file title
2320                 mobj = re.search(r'<b title="(.*?)">', webpage)
2321                 if mobj is None:
2322                         self._downloader.trouble(u'ERROR: unable to extract title')
2323                         return
2324                 file_title = mobj.group(1).decode('utf-8')
2325
2326                 try:
2327                         # Process file information
2328                         self._downloader.process_info({
2329                                 'id':           file_id.decode('utf-8'),
2330                                 'url':          file_url.decode('utf-8'),
2331                                 'uploader':     u'NA',
2332                                 'upload_date':  u'NA',
2333                                 'title':        file_title,
2334                                 'stitle':       file_title,
2335                                 'ext':          file_extension.decode('utf-8'),
2336                                 'format':       u'NA',
2337                                 'player_url':   None,
2338                         })
2339                 except UnavailableVideoError, err:
2340                         self._downloader.trouble(u'ERROR: unable to download file')
2341
2342 class FacebookIE(InfoExtractor):
2343         """Information Extractor for Facebook"""
2344
2345         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2346         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2347         _NETRC_MACHINE = 'facebook'
2348         _available_formats = ['highqual', 'lowqual']
2349         _video_extensions = {
2350                 'highqual': 'mp4',
2351                 'lowqual': 'mp4',
2352         }
2353
2354         def __init__(self, downloader=None):
2355                 InfoExtractor.__init__(self, downloader)
2356
2357         @staticmethod
2358         def suitable(url):
2359                 return (re.match(FacebookIE._VALID_URL, url) is not None)
2360
2361         def _reporter(self, message):
2362                 """Add header and report message."""
2363                 self._downloader.to_screen(u'[facebook] %s' % message)
2364
2365         def report_login(self):
2366                 """Report attempt to log in."""
2367                 self._reporter(u'Logging in')
2368
2369         def report_video_webpage_download(self, video_id):
2370                 """Report attempt to download video webpage."""
2371                 self._reporter(u'%s: Downloading video webpage' % video_id)
2372
2373         def report_information_extraction(self, video_id):
2374                 """Report attempt to extract video information."""
2375                 self._reporter(u'%s: Extracting video information' % video_id)
2376
2377         def _parse_page(self, video_webpage):
2378                 """Extract video information from page"""
2379                 # General data
2380                 data = {'title': r'class="video_title datawrap">(.*?)</',
2381                         'description': r'<div class="datawrap">(.*?)</div>',
2382                         'owner': r'\("video_owner_name", "(.*?)"\)',
2383                         'upload_date': r'data-date="(.*?)"',
2384                         'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2385                         }
2386                 video_info = {}
2387                 for piece in data.keys():
2388                         mobj = re.search(data[piece], video_webpage)
2389                         if mobj is not None:
2390                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2391
2392                 # Video urls
2393                 video_urls = {}
2394                 for fmt in self._available_formats:
2395                         mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2396                         if mobj is not None:
2397                                 # URL is in a Javascript segment inside an escaped Unicode format within
2398                                 # the generally utf-8 page
2399                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2400                 video_info['video_urls'] = video_urls
2401
2402                 return video_info
2403
2404         def _real_initialize(self):
2405                 if self._downloader is None:
2406                         return
2407
2408                 useremail = None
2409                 password = None
2410                 downloader_params = self._downloader.params
2411
2412                 # Attempt to use provided username and password or .netrc data
2413                 if downloader_params.get('username', None) is not None:
2414                         useremail = downloader_params['username']
2415                         password = downloader_params['password']
2416                 elif downloader_params.get('usenetrc', False):
2417                         try:
2418                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2419                                 if info is not None:
2420                                         useremail = info[0]
2421                                         password = info[2]
2422                                 else:
2423                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2424                         except (IOError, netrc.NetrcParseError), err:
2425                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2426                                 return
2427
2428                 if useremail is None:
2429                         return
2430
2431                 # Log in
2432                 login_form = {
2433                         'email': useremail,
2434                         'pass': password,
2435                         'login': 'Log+In'
2436                         }
2437                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2438                 try:
2439                         self.report_login()
2440                         login_results = urllib2.urlopen(request).read()
2441                         if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2442                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2443                                 return
2444                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2445                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2446                         return
2447
2448         def _real_extract(self, url):
2449                 mobj = re.match(self._VALID_URL, url)
2450                 if mobj is None:
2451                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2452                         return
2453                 video_id = mobj.group('ID')
2454
2455                 # Get video webpage
2456                 self.report_video_webpage_download(video_id)
2457                 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2458                 try:
2459                         page = urllib2.urlopen(request)
2460                         video_webpage = page.read()
2461                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2462                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2463                         return
2464
2465                 # Start extracting information
2466                 self.report_information_extraction(video_id)
2467
2468                 # Extract information
2469                 video_info = self._parse_page(video_webpage)
2470
2471                 # uploader
2472                 if 'owner' not in video_info:
2473                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2474                         return
2475                 video_uploader = video_info['owner']
2476
2477                 # title
2478                 if 'title' not in video_info:
2479                         self._downloader.trouble(u'ERROR: unable to extract video title')
2480                         return
2481                 video_title = video_info['title']
2482                 video_title = video_title.decode('utf-8')
2483                 video_title = sanitize_title(video_title)
2484
2485                 # simplified title
2486                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2487                 simple_title = simple_title.strip(ur'_')
2488
2489                 # thumbnail image
2490                 if 'thumbnail' not in video_info:
2491                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2492                         video_thumbnail = ''
2493                 else:
2494                         video_thumbnail = video_info['thumbnail']
2495
2496                 # upload date
2497                 upload_date = u'NA'
2498                 if 'upload_date' in video_info:
2499                         upload_time = video_info['upload_date']
2500                         timetuple = email.utils.parsedate_tz(upload_time)
2501                         if timetuple is not None:
2502                                 try:
2503                                         upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2504                                 except:
2505                                         pass
2506
2507                 # description
2508                 video_description = 'No description available.'
2509                 if (self._downloader.params.get('forcedescription', False) and
2510                     'description' in video_info):
2511                         video_description = video_info['description']
2512
2513                 url_map = video_info['video_urls']
2514                 if len(url_map.keys()) > 0:
2515                         # Decide which formats to download
2516                         req_format = self._downloader.params.get('format', None)
2517                         format_limit = self._downloader.params.get('format_limit', None)
2518
2519                         if format_limit is not None and format_limit in self._available_formats:
2520                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
2521                         else:
2522                                 format_list = self._available_formats
2523                         existing_formats = [x for x in format_list if x in url_map]
2524                         if len(existing_formats) == 0:
2525                                 self._downloader.trouble(u'ERROR: no known formats available for video')
2526                                 return
2527                         if req_format is None:
2528                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2529                         elif req_format == '-1':
2530                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2531                         else:
2532                                 # Specific format
2533                                 if req_format not in url_map:
2534                                         self._downloader.trouble(u'ERROR: requested format not available')
2535                                         return
2536                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
2537
2538                 for format_param, video_real_url in video_url_list:
2539
2540                         # At this point we have a new video
2541                         self._downloader.increment_downloads()
2542
2543                         # Extension
2544                         video_extension = self._video_extensions.get(format_param, 'mp4')
2545
2546                         # Find the video URL in fmt_url_map or conn paramters
2547                         try:
2548                                 # Process video information
2549                                 self._downloader.process_info({
2550                                         'id':           video_id.decode('utf-8'),
2551                                         'url':          video_real_url.decode('utf-8'),
2552                                         'uploader':     video_uploader.decode('utf-8'),
2553                                         'upload_date':  upload_date,
2554                                         'title':        video_title,
2555                                         'stitle':       simple_title,
2556                                         'ext':          video_extension.decode('utf-8'),
2557                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
2558                                         'thumbnail':    video_thumbnail.decode('utf-8'),
2559                                         'description':  video_description.decode('utf-8'),
2560                                         'player_url':   None,
2561                                 })
2562                         except UnavailableVideoError, err:
2563                                 self._downloader.trouble(u'\nERROR: unable to download video')
2564
2565 class PostProcessor(object):
2566         """Post Processor class.
2567
2568         PostProcessor objects can be added to downloaders with their
2569         add_post_processor() method. When the downloader has finished a
2570         successful download, it will take its internal chain of PostProcessors
2571         and start calling the run() method on each one of them, first with
2572         an initial argument and then with the returned value of the previous
2573         PostProcessor.
2574
2575         The chain will be stopped if one of them ever returns None or the end
2576         of the chain is reached.
2577
2578         PostProcessor objects follow a "mutual registration" process similar
2579         to InfoExtractor objects.
2580         """
2581
2582         _downloader = None
2583
2584         def __init__(self, downloader=None):
2585                 self._downloader = downloader
2586
2587         def set_downloader(self, downloader):
2588                 """Sets the downloader for this PP."""
2589                 self._downloader = downloader
2590
2591         def run(self, information):
2592                 """Run the PostProcessor.
2593
2594                 The "information" argument is a dictionary like the ones
2595                 composed by InfoExtractors. The only difference is that this
2596                 one has an extra field called "filepath" that points to the
2597                 downloaded file.
2598
2599                 When this method returns None, the postprocessing chain is
2600                 stopped. However, this method may return an information
2601                 dictionary that will be passed to the next postprocessing
2602                 object in the chain. It can be the one it received after
2603                 changing some fields.
2604
2605                 In addition, this method may raise a PostProcessingError
2606                 exception that will be taken into account by the downloader
2607                 it was called from.
2608                 """
2609                 return information # by default, do nothing
2610
2611 ### MAIN PROGRAM ###
2612 if __name__ == '__main__':
2613         try:
2614                 # Modules needed only when running the main program
2615                 import getpass
2616                 import optparse
2617
2618                 # Function to update the program file with the latest version from the repository.
2619                 def update_self(downloader, filename):
2620                         # Note: downloader only used for options
2621                         if not os.access(filename, os.W_OK):
2622                                 sys.exit('ERROR: no write permissions on %s' % filename)
2623
2624                         downloader.to_screen('Updating to latest stable version...')
2625                         try:
2626                                 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2627                                 latest_version = urllib.urlopen(latest_url).read().strip()
2628                                 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2629                                 newcontent = urllib.urlopen(prog_url).read()
2630                         except (IOError, OSError), err:
2631                                 sys.exit('ERROR: unable to download latest version')
2632                         try:
2633                                 stream = open(filename, 'w')
2634                                 stream.write(newcontent)
2635                                 stream.close()
2636                         except (IOError, OSError), err:
2637                                 sys.exit('ERROR: unable to overwrite current version')
2638                         downloader.to_screen('Updated to version %s' % latest_version)
2639
2640                 # Parse command line
2641                 parser = optparse.OptionParser(
2642                         usage='Usage: %prog [options] url...',
2643                         version='2011.01.30',
2644                         conflict_handler='resolve',
2645                 )
2646
2647                 parser.add_option('-h', '--help',
2648                                 action='help', help='print this help text and exit')
2649                 parser.add_option('-v', '--version',
2650                                 action='version', help='print program version and exit')
2651                 parser.add_option('-U', '--update',
2652                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2653                 parser.add_option('-i', '--ignore-errors',
2654                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2655                 parser.add_option('-r', '--rate-limit',
2656                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2657                 parser.add_option('-R', '--retries',
2658                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2659                 parser.add_option('--playlist-start',
2660                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2661                 parser.add_option('--playlist-end',
2662                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2663                 parser.add_option('--dump-user-agent',
2664                                 action='store_true', dest='dump_user_agent',
2665                                 help='display the current browser identification', default=False)
2666
2667                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2668                 authentication.add_option('-u', '--username',
2669                                 dest='username', metavar='USERNAME', help='account username')
2670                 authentication.add_option('-p', '--password',
2671                                 dest='password', metavar='PASSWORD', help='account password')
2672                 authentication.add_option('-n', '--netrc',
2673                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2674                 parser.add_option_group(authentication)
2675
2676                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2677                 video_format.add_option('-f', '--format',
2678                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2679                 video_format.add_option('--all-formats',
2680                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2681                 video_format.add_option('--max-quality',
2682                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2683                 parser.add_option_group(video_format)
2684
2685                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2686                 verbosity.add_option('-q', '--quiet',
2687                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2688                 verbosity.add_option('-s', '--simulate',
2689                                 action='store_true', dest='simulate', help='do not download video', default=False)
2690                 verbosity.add_option('-g', '--get-url',
2691                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2692                 verbosity.add_option('-e', '--get-title',
2693                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2694                 verbosity.add_option('--get-thumbnail',
2695                                 action='store_true', dest='getthumbnail',
2696                                 help='simulate, quiet but print thumbnail URL', default=False)
2697                 verbosity.add_option('--get-description',
2698                                 action='store_true', dest='getdescription',
2699                                 help='simulate, quiet but print video description', default=False)
2700                 verbosity.add_option('--get-filename',
2701                                 action='store_true', dest='getfilename',
2702                                 help='simulate, quiet but print output filename', default=False)
2703                 verbosity.add_option('--no-progress',
2704                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2705                 verbosity.add_option('--console-title',
2706                                 action='store_true', dest='consoletitle',
2707                                 help='display progress in console titlebar', default=False)
2708                 parser.add_option_group(verbosity)
2709
2710                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2711                 filesystem.add_option('-t', '--title',
2712                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2713                 filesystem.add_option('-l', '--literal',
2714                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2715                 filesystem.add_option('-A', '--auto-number',
2716                                 action='store_true', dest='autonumber',
2717                                 help='number downloaded files starting from 00000', default=False)
2718                 filesystem.add_option('-o', '--output',
2719                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2720                 filesystem.add_option('-a', '--batch-file',
2721                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2722                 filesystem.add_option('-w', '--no-overwrites',
2723                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2724                 filesystem.add_option('-c', '--continue',
2725                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2726                 filesystem.add_option('--cookies',
2727                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2728                 filesystem.add_option('--no-part',
2729                                 action='store_true', dest='nopart', help='do not use .part files', default=False)
2730                 filesystem.add_option('--no-mtime',
2731                                 action='store_false', dest='updatetime',
2732                                 help='do not use the Last-modified header to set the file modification time', default=True)
2733                 parser.add_option_group(filesystem)
2734
2735                 (opts, args) = parser.parse_args()
2736
2737                 # Open appropriate CookieJar
2738                 if opts.cookiefile is None:
2739                         jar = cookielib.CookieJar()
2740                 else:
2741                         try:
2742                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2743                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2744                                         jar.load()
2745                         except (IOError, OSError), err:
2746                                 sys.exit(u'ERROR: unable to open cookie file')
2747
2748                 # Dump user agent
2749                 if opts.dump_user_agent:
2750                         print std_headers['User-Agent']
2751                         sys.exit(0)
2752
2753                 # General configuration
2754                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2755                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
2756                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2757
2758                 # Batch file verification
2759                 batchurls = []
2760                 if opts.batchfile is not None:
2761                         try:
2762                                 if opts.batchfile == '-':
2763                                         batchfd = sys.stdin
2764                                 else:
2765                                         batchfd = open(opts.batchfile, 'r')
2766                                 batchurls = batchfd.readlines()
2767                                 batchurls = [x.strip() for x in batchurls]
2768                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2769                         except IOError:
2770                                 sys.exit(u'ERROR: batch file could not be read')
2771                 all_urls = batchurls + args
2772
2773                 # Conflicting, missing and erroneous options
2774                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2775                         parser.error(u'using .netrc conflicts with giving username/password')
2776                 if opts.password is not None and opts.username is None:
2777                         parser.error(u'account username missing')
2778                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2779                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2780                 if opts.usetitle and opts.useliteral:
2781                         parser.error(u'using title conflicts with using literal title')
2782                 if opts.username is not None and opts.password is None:
2783                         opts.password = getpass.getpass(u'Type account password and press return:')
2784                 if opts.ratelimit is not None:
2785                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2786                         if numeric_limit is None:
2787                                 parser.error(u'invalid rate limit specified')
2788                         opts.ratelimit = numeric_limit
2789                 if opts.retries is not None:
2790                         try:
2791                                 opts.retries = long(opts.retries)
2792                         except (TypeError, ValueError), err:
2793                                 parser.error(u'invalid retry count specified')
2794                 try:
2795                         opts.playliststart = long(opts.playliststart)
2796                         if opts.playliststart <= 0:
2797                                 raise ValueError
2798                 except (TypeError, ValueError), err:
2799                         parser.error(u'invalid playlist start number specified')
2800                 try:
2801                         opts.playlistend = long(opts.playlistend)
2802                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2803                                 raise ValueError
2804                 except (TypeError, ValueError), err:
2805                         parser.error(u'invalid playlist end number specified')
2806
2807                 # Information extractors
2808                 youtube_ie = YoutubeIE()
2809                 metacafe_ie = MetacafeIE(youtube_ie)
2810                 dailymotion_ie = DailymotionIE()
2811                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2812                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2813                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2814                 google_ie = GoogleIE()
2815                 google_search_ie = GoogleSearchIE(google_ie)
2816                 photobucket_ie = PhotobucketIE()
2817                 yahoo_ie = YahooIE()
2818                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2819                 deposit_files_ie = DepositFilesIE()
2820                 facebook_ie = FacebookIE()
2821                 generic_ie = GenericIE()
2822
2823                 # File downloader
2824                 fd = FileDownloader({
2825                         'usenetrc': opts.usenetrc,
2826                         'username': opts.username,
2827                         'password': opts.password,
2828                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2829                         'forceurl': opts.geturl,
2830                         'forcetitle': opts.gettitle,
2831                         'forcethumbnail': opts.getthumbnail,
2832                         'forcedescription': opts.getdescription,
2833                         'forcefilename': opts.getfilename,
2834                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
2835                         'format': opts.format,
2836                         'format_limit': opts.format_limit,
2837                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2838                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2839                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2840                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2841                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2842                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2843                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2844                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2845                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2846                                 or u'%(id)s.%(ext)s'),
2847                         'ignoreerrors': opts.ignoreerrors,
2848                         'ratelimit': opts.ratelimit,
2849                         'nooverwrites': opts.nooverwrites,
2850                         'retries': opts.retries,
2851                         'continuedl': opts.continue_dl,
2852                         'noprogress': opts.noprogress,
2853                         'playliststart': opts.playliststart,
2854                         'playlistend': opts.playlistend,
2855                         'logtostderr': opts.outtmpl == '-',
2856                         'consoletitle': opts.consoletitle,
2857                         'nopart': opts.nopart,
2858                         'updatetime': opts.updatetime,
2859                         })
2860                 fd.add_info_extractor(youtube_search_ie)
2861                 fd.add_info_extractor(youtube_pl_ie)
2862                 fd.add_info_extractor(youtube_user_ie)
2863                 fd.add_info_extractor(metacafe_ie)
2864                 fd.add_info_extractor(dailymotion_ie)
2865                 fd.add_info_extractor(youtube_ie)
2866                 fd.add_info_extractor(google_ie)
2867                 fd.add_info_extractor(google_search_ie)
2868                 fd.add_info_extractor(photobucket_ie)
2869                 fd.add_info_extractor(yahoo_ie)
2870                 fd.add_info_extractor(yahoo_search_ie)
2871                 fd.add_info_extractor(deposit_files_ie)
2872                 fd.add_info_extractor(facebook_ie)
2873
2874                 # This must come last since it's the
2875                 # fallback if none of the others work
2876                 fd.add_info_extractor(generic_ie)
2877
2878                 # Update version
2879                 if opts.update_self:
2880                         update_self(fd, sys.argv[0])
2881
2882                 # Maybe do nothing
2883                 if len(all_urls) < 1:
2884                         if not opts.update_self:
2885                                 parser.error(u'you must provide at least one URL')
2886                         else:
2887                                 sys.exit()
2888                 retcode = fd.download(all_urls)
2889
2890                 # Dump cookie jar if requested
2891                 if opts.cookiefile is not None:
2892                         try:
2893                                 jar.save()
2894                         except (IOError, OSError), err:
2895                                 sys.exit(u'ERROR: unable to save cookie jar')
2896
2897                 sys.exit(retcode)
2898
2899         except DownloadError:
2900                 sys.exit(1)
2901         except SameFileError:
2902                 sys.exit(u'ERROR: fixed output name but more than one file to download')
2903         except KeyboardInterrupt:
2904                 sys.exit(u'\nERROR: Interrupted by user')