Bump version number
[youtube-dl.git] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # License: Public domain code
6 import htmlentitydefs
7 import httplib
8 import locale
9 import math
10 import netrc
11 import os
12 import os.path
13 import re
14 import socket
15 import string
16 import subprocess
17 import sys
18 import time
19 import urllib
20 import urllib2
21 import urlparse
22
23 std_headers = {
24         'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
25         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
26         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
27         'Accept-Language': 'en-us,en;q=0.5',
28 }
29
30 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
31
32 def preferredencoding():
33         """Get preferred encoding.
34
35         Returns the best encoding scheme for the system, based on
36         locale.getpreferredencoding() and some further tweaks.
37         """
38         def yield_preferredencoding():
39                 try:
40                         pref = locale.getpreferredencoding()
41                         u'TEST'.encode(pref)
42                 except:
43                         pref = 'UTF-8'
44                 while True:
45                         yield pref
46         return yield_preferredencoding().next()
47
48 class DownloadError(Exception):
49         """Download Error exception.
50         
51         This exception may be thrown by FileDownloader objects if they are not
52         configured to continue on errors. They will contain the appropriate
53         error message.
54         """
55         pass
56
57 class SameFileError(Exception):
58         """Same File exception.
59
60         This exception will be thrown by FileDownloader objects if they detect
61         multiple files would have to be downloaded to the same file on disk.
62         """
63         pass
64
65 class PostProcessingError(Exception):
66         """Post Processing exception.
67
68         This exception may be raised by PostProcessor's .run() method to
69         indicate an error in the postprocessing task.
70         """
71         pass
72
73 class UnavailableFormatError(Exception):
74         """Unavailable Format exception.
75
76         This exception will be thrown when a video is requested
77         in a format that is not available for that video.
78         """
79         pass
80
81 class ContentTooShortError(Exception):
82         """Content Too Short exception.
83
84         This exception may be raised by FileDownloader objects when a file they
85         download is too small for what the server announced first, indicating
86         the connection was probably interrupted.
87         """
88         # Both in bytes
89         downloaded = None
90         expected = None
91
92         def __init__(self, downloaded, expected):
93                 self.downloaded = downloaded
94                 self.expected = expected
95
96 class FileDownloader(object):
97         """File Downloader class.
98
99         File downloader objects are the ones responsible of downloading the
100         actual video file and writing it to disk if the user has requested
101         it, among some other tasks. In most cases there should be one per
102         program. As, given a video URL, the downloader doesn't know how to
103         extract all the needed information, task that InfoExtractors do, it
104         has to pass the URL to one of them.
105
106         For this, file downloader objects have a method that allows
107         InfoExtractors to be registered in a given order. When it is passed
108         a URL, the file downloader handles it to the first InfoExtractor it
109         finds that reports being able to handle it. The InfoExtractor extracts
110         all the information about the video or videos the URL refers to, and
111         asks the FileDownloader to process the video information, possibly
112         downloading the video.
113
114         File downloaders accept a lot of parameters. In order not to saturate
115         the object constructor with arguments, it receives a dictionary of
116         options instead. These options are available through the params
117         attribute for the InfoExtractors to use. The FileDownloader also
118         registers itself as the downloader in charge for the InfoExtractors
119         that are added to it, so this is a "mutual registration".
120
121         Available options:
122
123         username:       Username for authentication purposes.
124         password:       Password for authentication purposes.
125         usenetrc:       Use netrc for authentication instead.
126         quiet:          Do not print messages to stdout.
127         forceurl:       Force printing final URL.
128         forcetitle:     Force printing title.
129         simulate:       Do not download the video files.
130         format:         Video format code.
131         outtmpl:        Template for output names.
132         ignoreerrors:   Do not stop on download errors.
133         ratelimit:      Download speed limit, in bytes/sec.
134         nooverwrites:   Prevent overwriting files.
135         continuedl:     Try to continue downloads if possible.
136         """
137
138         params = None
139         _ies = []
140         _pps = []
141         _download_retcode = None
142
143         def __init__(self, params):
144                 """Create a FileDownloader object with the given options."""
145                 self._ies = []
146                 self._pps = []
147                 self._download_retcode = 0
148                 self.params = params
149         
150         @staticmethod
151         def pmkdir(filename):
152                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
153                 components = filename.split(os.sep)
154                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
155                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
156                 for dir in aggregate:
157                         if not os.path.exists(dir):
158                                 os.mkdir(dir)
159         
160         @staticmethod
161         def format_bytes(bytes):
162                 if bytes is None:
163                         return 'N/A'
164                 if type(bytes) is str:
165                         bytes = float(bytes)
166                 if bytes == 0.0:
167                         exponent = 0
168                 else:
169                         exponent = long(math.log(bytes, 1024.0))
170                 suffix = 'bkMGTPEZY'[exponent]
171                 converted = float(bytes) / float(1024**exponent)
172                 return '%.2f%s' % (converted, suffix)
173
174         @staticmethod
175         def calc_percent(byte_counter, data_len):
176                 if data_len is None:
177                         return '---.-%'
178                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
179
180         @staticmethod
181         def calc_eta(start, now, total, current):
182                 if total is None:
183                         return '--:--'
184                 dif = now - start
185                 if current == 0 or dif < 0.001: # One millisecond
186                         return '--:--'
187                 rate = float(current) / dif
188                 eta = long((float(total) - float(current)) / rate)
189                 (eta_mins, eta_secs) = divmod(eta, 60)
190                 if eta_mins > 99:
191                         return '--:--'
192                 return '%02d:%02d' % (eta_mins, eta_secs)
193
194         @staticmethod
195         def calc_speed(start, now, bytes):
196                 dif = now - start
197                 if bytes == 0 or dif < 0.001: # One millisecond
198                         return '%10s' % '---b/s'
199                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
200
201         @staticmethod
202         def best_block_size(elapsed_time, bytes):
203                 new_min = max(bytes / 2.0, 1.0)
204                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
205                 if elapsed_time < 0.001:
206                         return long(new_max)
207                 rate = bytes / elapsed_time
208                 if rate > new_max:
209                         return long(new_max)
210                 if rate < new_min:
211                         return long(new_min)
212                 return long(rate)
213
214         @staticmethod
215         def parse_bytes(bytestr):
216                 """Parse a string indicating a byte quantity into a long integer."""
217                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
218                 if matchobj is None:
219                         return None
220                 number = float(matchobj.group(1))
221                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
222                 return long(round(number * multiplier))
223
224         @staticmethod
225         def verify_url(url):
226                 """Verify a URL is valid and data could be downloaded. Return real data URL."""
227                 request = urllib2.Request(url, None, std_headers)
228                 data = urllib2.urlopen(request)
229                 data.read(1)
230                 url = data.geturl()
231                 data.close()
232                 return url
233
234         def add_info_extractor(self, ie):
235                 """Add an InfoExtractor object to the end of the list."""
236                 self._ies.append(ie)
237                 ie.set_downloader(self)
238         
239         def add_post_processor(self, pp):
240                 """Add a PostProcessor object to the end of the chain."""
241                 self._pps.append(pp)
242                 pp.set_downloader(self)
243         
244         def to_stdout(self, message, skip_eol=False):
245                 """Print message to stdout if not in quiet mode."""
246                 if not self.params.get('quiet', False):
247                         print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
248                         sys.stdout.flush()
249         
250         def to_stderr(self, message):
251                 """Print message to stderr."""
252                 print >>sys.stderr, message.encode(preferredencoding())
253         
254         def fixed_template(self):
255                 """Checks if the output template is fixed."""
256                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
257
258         def trouble(self, message=None):
259                 """Determine action to take when a download problem appears.
260
261                 Depending on if the downloader has been configured to ignore
262                 download errors or not, this method may throw an exception or
263                 not when errors are found, after printing the message.
264                 """
265                 if message is not None:
266                         self.to_stderr(message)
267                 if not self.params.get('ignoreerrors', False):
268                         raise DownloadError(message)
269                 self._download_retcode = 1
270
271         def slow_down(self, start_time, byte_counter):
272                 """Sleep if the download speed is over the rate limit."""
273                 rate_limit = self.params.get('ratelimit', None)
274                 if rate_limit is None or byte_counter == 0:
275                         return
276                 now = time.time()
277                 elapsed = now - start_time
278                 if elapsed <= 0.0:
279                         return
280                 speed = float(byte_counter) / elapsed
281                 if speed > rate_limit:
282                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
283
284         def report_destination(self, filename):
285                 """Report destination filename."""
286                 self.to_stdout(u'[download] Destination: %s' % filename)
287         
288         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
289                 """Report download progress."""
290                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
291                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
292
293         def report_resuming_byte(self, resume_len):
294                 """Report attemtp to resume at given byte."""
295                 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
296         
297         def report_file_already_downloaded(self, file_name):
298                 """Report file has already been fully downloaded."""
299                 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
300         
301         def report_unable_to_resume(self):
302                 """Report it was impossible to resume download."""
303                 self.to_stdout(u'[download] Unable to resume')
304         
305         def report_finish(self):
306                 """Report download finished."""
307                 self.to_stdout(u'')
308
309         def process_info(self, info_dict):
310                 """Process a single dictionary returned by an InfoExtractor."""
311                 # Do nothing else if in simulate mode
312                 if self.params.get('simulate', False):
313                         try:
314                                 info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
315                         except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
316                                 raise UnavailableFormatError
317
318                         # Forced printings
319                         if self.params.get('forcetitle', False):
320                                 print info_dict['title'].encode(preferredencoding())
321                         if self.params.get('forceurl', False):
322                                 print info_dict['url'].encode(preferredencoding())
323
324                         return
325                         
326                 try:
327                         template_dict = dict(info_dict)
328                         template_dict['epoch'] = unicode(long(time.time()))
329                         filename = self.params['outtmpl'] % template_dict
330                 except (ValueError, KeyError), err:
331                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
332                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
333                         self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
334                         return
335
336                 try:
337                         self.pmkdir(filename)
338                 except (OSError, IOError), err:
339                         self.trouble('ERROR: unable to create directories: %s' % str(err))
340                         return
341
342                 try:
343                         success = self._do_download(filename, info_dict['url'].encode('utf-8'))
344                 except (OSError, IOError), err:
345                         raise UnavailableFormatError
346                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
347                         self.trouble('ERROR: unable to download video data: %s' % str(err))
348                         return
349                 except (ContentTooShortError, ), err:
350                         self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
351                         return
352
353                 if success:
354                         try:
355                                 self.post_process(filename, info_dict)
356                         except (PostProcessingError), err:
357                                 self.trouble('ERROR: postprocessing: %s' % str(err))
358                                 return
359
360         def download(self, url_list):
361                 """Download a given list of URLs."""
362                 if len(url_list) > 1 and self.fixed_template():
363                         raise SameFileError(self.params['outtmpl'])
364
365                 for url in url_list:
366                         suitable_found = False
367                         for ie in self._ies:
368                                 # Go to next InfoExtractor if not suitable
369                                 if not ie.suitable(url):
370                                         continue
371
372                                 # Suitable InfoExtractor found
373                                 suitable_found = True
374
375                                 # Extract information from URL and process it
376                                 ie.extract(url)
377
378                                 # Suitable InfoExtractor had been found; go to next URL
379                                 break
380
381                         if not suitable_found:
382                                 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
383
384                 return self._download_retcode
385
386         def post_process(self, filename, ie_info):
387                 """Run the postprocessing chain on the given file."""
388                 info = dict(ie_info)
389                 info['filepath'] = filename
390                 for pp in self._pps:
391                         info = pp.run(info)
392                         if info is None:
393                                 break
394         
395         def _download_with_rtmpdump(self, filename, url):
396                 self.report_destination(filename)
397
398                 # Check for rtmpdump first
399                 try:
400                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
401                 except (OSError, IOError):
402                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
403                         return False
404
405                 # Download using rtmpdump. rtmpdump returns exit code 2 when
406                 # the connection was interrumpted and resuming appears to be
407                 # possible. This is part of rtmpdump's normal usage, AFAIK.
408                 retval = subprocess.call(['rtmpdump', '-q', '-r', url, '-o', filename] + [[], ['-e']][self.params.get('continuedl', False)])
409                 while retval == 2:
410                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True)
411                         time.sleep(2.0) # This seems to be needed
412                         retval = subprocess.call(['rtmpdump', '-q', '-e', '-r', url, '-o', filename])
413                 if retval == 0:
414                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
415                         return True
416                 else:
417                         self.trouble('ERROR: rtmpdump exited with code %d' % retval)
418                         return False
419
420         def _do_download(self, filename, url):
421                 # Attempt to download using rtmpdump
422                 if url.startswith('rtmp'):
423                         return self._download_with_rtmpdump(filename, url)
424
425                 stream = None
426                 open_mode = 'wb'
427                 basic_request = urllib2.Request(url, None, std_headers)
428                 request = urllib2.Request(url, None, std_headers)
429
430                 # Establish possible resume length
431                 if os.path.isfile(filename):
432                         resume_len = os.path.getsize(filename)
433                 else:
434                         resume_len = 0
435
436                 # Request parameters in case of being able to resume
437                 if self.params.get('continuedl', False) and resume_len != 0:
438                         self.report_resuming_byte(resume_len)
439                         request.add_header('Range','bytes=%d-' % resume_len)
440                         open_mode = 'ab'
441
442                 # Establish connection
443                 try:
444                         data = urllib2.urlopen(request)
445                 except (urllib2.HTTPError, ), err:
446                         if err.code != 416: #  416 is 'Requested range not satisfiable'
447                                 raise
448                         # Unable to resume
449                         data = urllib2.urlopen(basic_request)
450                         content_length = data.info()['Content-Length']
451
452                         if content_length is not None and long(content_length) == resume_len:
453                                 # Because the file had already been fully downloaded
454                                 self.report_file_already_downloaded(filename)
455                                 return True
456                         else:
457                                 # Because the server didn't let us
458                                 self.report_unable_to_resume()
459                                 open_mode = 'wb'
460
461                 data_len = data.info().get('Content-length', None)
462                 data_len_str = self.format_bytes(data_len)
463                 byte_counter = 0
464                 block_size = 1024
465                 start = time.time()
466                 while True:
467                         # Download and write
468                         before = time.time()
469                         data_block = data.read(block_size)
470                         after = time.time()
471                         data_block_len = len(data_block)
472                         if data_block_len == 0:
473                                 break
474                         byte_counter += data_block_len
475
476                         # Open file just in time
477                         if stream is None:
478                                 try:
479                                         stream = open(filename, open_mode)
480                                         self.report_destination(filename)
481                                 except (OSError, IOError), err:
482                                         self.trouble('ERROR: unable to open for writing: %s' % str(err))
483                                         return False
484                         stream.write(data_block)
485                         block_size = self.best_block_size(after - before, data_block_len)
486
487                         # Progress message
488                         percent_str = self.calc_percent(byte_counter, data_len)
489                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
490                         speed_str = self.calc_speed(start, time.time(), byte_counter)
491                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
492
493                         # Apply rate limit
494                         self.slow_down(start, byte_counter)
495
496                 self.report_finish()
497                 if data_len is not None and str(byte_counter) != data_len:
498                         raise ContentTooShortError(byte_counter, long(data_len))
499                 return True
500
501 class InfoExtractor(object):
502         """Information Extractor class.
503
504         Information extractors are the classes that, given a URL, extract
505         information from the video (or videos) the URL refers to. This
506         information includes the real video URL, the video title and simplified
507         title, author and others. The information is stored in a dictionary
508         which is then passed to the FileDownloader. The FileDownloader
509         processes this information possibly downloading the video to the file
510         system, among other possible outcomes. The dictionaries must include
511         the following fields:
512
513         id:             Video identifier.
514         url:            Final video URL.
515         uploader:       Nickname of the video uploader.
516         title:          Literal title.
517         stitle:         Simplified title.
518         ext:            Video filename extension.
519
520         Subclasses of this one should re-define the _real_initialize() and
521         _real_extract() methods, as well as the suitable() static method.
522         Probably, they should also be instantiated and added to the main
523         downloader.
524         """
525
526         _ready = False
527         _downloader = None
528
529         def __init__(self, downloader=None):
530                 """Constructor. Receives an optional downloader."""
531                 self._ready = False
532                 self.set_downloader(downloader)
533
534         @staticmethod
535         def suitable(url):
536                 """Receives a URL and returns True if suitable for this IE."""
537                 return False
538
539         def initialize(self):
540                 """Initializes an instance (authentication, etc)."""
541                 if not self._ready:
542                         self._real_initialize()
543                         self._ready = True
544
545         def extract(self, url):
546                 """Extracts URL information and returns it in list of dicts."""
547                 self.initialize()
548                 return self._real_extract(url)
549
550         def set_downloader(self, downloader):
551                 """Sets the downloader for this IE."""
552                 self._downloader = downloader
553         
554         def _real_initialize(self):
555                 """Real initialization process. Redefine in subclasses."""
556                 pass
557
558         def _real_extract(self, url):
559                 """Real extraction process. Redefine in subclasses."""
560                 pass
561
562 class YoutubeIE(InfoExtractor):
563         """Information extractor for youtube.com."""
564
565         _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
566         _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
567         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
568         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
569         _NETRC_MACHINE = 'youtube'
570         _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
571         _video_extensions = {
572                 '13': '3gp',
573                 '17': 'mp4',
574                 '18': 'mp4',
575                 '22': 'mp4',
576                 '37': 'mp4',
577         }
578
579         @staticmethod
580         def suitable(url):
581                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
582
583         @staticmethod
584         def htmlentity_transform(matchobj):
585                 """Transforms an HTML entity to a Unicode character."""
586                 entity = matchobj.group(1)
587
588                 # Known non-numeric HTML entity
589                 if entity in htmlentitydefs.name2codepoint:
590                         return unichr(htmlentitydefs.name2codepoint[entity])
591
592                 # Unicode character
593                 mobj = re.match(ur'(?u)#(x?\d+)', entity)
594                 if mobj is not None:
595                         numstr = mobj.group(1)
596                         if numstr.startswith(u'x'):
597                                 base = 16
598                                 numstr = u'0%s' % numstr
599                         else:
600                                 base = 10
601                         return unichr(long(numstr, base))
602
603                 # Unknown entity in name, return its literal representation
604                 return (u'&%s;' % entity)
605
606         def report_lang(self):
607                 """Report attempt to set language."""
608                 self._downloader.to_stdout(u'[youtube] Setting language')
609
610         def report_login(self):
611                 """Report attempt to log in."""
612                 self._downloader.to_stdout(u'[youtube] Logging in')
613         
614         def report_age_confirmation(self):
615                 """Report attempt to confirm age."""
616                 self._downloader.to_stdout(u'[youtube] Confirming age')
617         
618         def report_video_info_webpage_download(self, video_id):
619                 """Report attempt to download video info webpage."""
620                 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
621         
622         def report_information_extraction(self, video_id):
623                 """Report attempt to extract video information."""
624                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
625         
626         def report_unavailable_format(self, video_id, format):
627                 """Report extracted video URL."""
628                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
629         
630         def report_rtmp_download(self):
631                 """Indicate the download will use the RTMP protocol."""
632                 self._downloader.to_stdout(u'[youtube] RTMP download detected')
633         
634         def _real_initialize(self):
635                 if self._downloader is None:
636                         return
637
638                 username = None
639                 password = None
640                 downloader_params = self._downloader.params
641
642                 # Attempt to use provided username and password or .netrc data
643                 if downloader_params.get('username', None) is not None:
644                         username = downloader_params['username']
645                         password = downloader_params['password']
646                 elif downloader_params.get('usenetrc', False):
647                         try:
648                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
649                                 if info is not None:
650                                         username = info[0]
651                                         password = info[2]
652                                 else:
653                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
654                         except (IOError, netrc.NetrcParseError), err:
655                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
656                                 return
657
658                 # Set language
659                 request = urllib2.Request(self._LANG_URL, None, std_headers)
660                 try:
661                         self.report_lang()
662                         urllib2.urlopen(request).read()
663                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
664                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
665                         return
666
667                 # No authentication to be performed
668                 if username is None:
669                         return
670
671                 # Log in
672                 login_form = {
673                                 'current_form': 'loginForm',
674                                 'next':         '/',
675                                 'action_login': 'Log In',
676                                 'username':     username,
677                                 'password':     password,
678                                 }
679                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
680                 try:
681                         self.report_login()
682                         login_results = urllib2.urlopen(request).read()
683                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
684                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
685                                 return
686                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
687                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
688                         return
689         
690                 # Confirm age
691                 age_form = {
692                                 'next_url':             '/',
693                                 'action_confirm':       'Confirm',
694                                 }
695                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
696                 try:
697                         self.report_age_confirmation()
698                         age_results = urllib2.urlopen(request).read()
699                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
700                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
701                         return
702
703         def _real_extract(self, url):
704                 # Extract video id from URL
705                 mobj = re.match(self._VALID_URL, url)
706                 if mobj is None:
707                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
708                         return
709                 video_id = mobj.group(2)
710
711                 # Downloader parameters
712                 best_quality = False
713                 format_param = None
714                 quality_index = 0
715                 if self._downloader is not None:
716                         params = self._downloader.params
717                         format_param = params.get('format', None)
718                         if format_param == '0':
719                                 format_param = self._available_formats[quality_index]
720                                 best_quality = True
721
722                 while True:
723                         # Extension
724                         video_extension = self._video_extensions.get(format_param, 'flv')
725
726                         # Get video info
727                         video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id
728                         request = urllib2.Request(video_info_url, None, std_headers)
729                         try:
730                                 self.report_video_info_webpage_download(video_id)
731                                 video_info_webpage = urllib2.urlopen(request).read()
732                                 video_info = urlparse.parse_qs(video_info_webpage)
733                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
734                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
735                                 return
736                         self.report_information_extraction(video_id)
737
738                         # "t" param
739                         if 'token' not in video_info:
740                                 # Attempt to see if YouTube has issued an error message
741                                 if 'reason' not in video_info:
742                                         self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
743                                         stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
744                                         stream.write(video_info_webpage)
745                                         stream.close()
746                                 else:
747                                         reason = urllib.unquote_plus(video_info['reason'][0])
748                                         self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
749                                 return
750                         token = urllib.unquote_plus(video_info['token'][0])
751                         video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
752                         if format_param is not None:
753                                 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
754
755                         # Check possible RTMP download
756                         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
757                                 self.report_rtmp_download()
758                                 video_real_url = video_info['conn'][0]
759
760                         # uploader
761                         if 'author' not in video_info:
762                                 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
763                                 return
764                         video_uploader = urllib.unquote_plus(video_info['author'][0])
765
766                         # title
767                         if 'title' not in video_info:
768                                 self._downloader.trouble(u'ERROR: unable to extract video title')
769                                 return
770                         video_title = urllib.unquote_plus(video_info['title'][0])
771                         video_title = video_title.decode('utf-8')
772                         video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
773                         video_title = video_title.replace(os.sep, u'%')
774
775                         # simplified title
776                         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
777                         simple_title = simple_title.strip(ur'_')
778
779                         try:
780                                 # Process video information
781                                 self._downloader.process_info({
782                                         'id':           video_id.decode('utf-8'),
783                                         'url':          video_real_url.decode('utf-8'),
784                                         'uploader':     video_uploader.decode('utf-8'),
785                                         'title':        video_title,
786                                         'stitle':       simple_title,
787                                         'ext':          video_extension.decode('utf-8'),
788                                 })
789
790                                 return
791
792                         except UnavailableFormatError, err:
793                                 if best_quality:
794                                         if quality_index == len(self._available_formats) - 1:
795                                                 # I don't ever expect this to happen
796                                                 self._downloader.trouble(u'ERROR: no known formats available for video')
797                                                 return
798                                         else:
799                                                 self.report_unavailable_format(video_id, format_param)
800                                                 quality_index += 1
801                                                 format_param = self._available_formats[quality_index]
802                                                 continue
803                                 else: 
804                                         self._downloader.trouble('ERROR: format not available for video')
805                                         return
806
807
808 class MetacafeIE(InfoExtractor):
809         """Information Extractor for metacafe.com."""
810
811         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
812         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
813         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
814         _youtube_ie = None
815
816         def __init__(self, youtube_ie, downloader=None):
817                 InfoExtractor.__init__(self, downloader)
818                 self._youtube_ie = youtube_ie
819
820         @staticmethod
821         def suitable(url):
822                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
823
824         def report_disclaimer(self):
825                 """Report disclaimer retrieval."""
826                 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
827
828         def report_age_confirmation(self):
829                 """Report attempt to confirm age."""
830                 self._downloader.to_stdout(u'[metacafe] Confirming age')
831         
832         def report_download_webpage(self, video_id):
833                 """Report webpage download."""
834                 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
835         
836         def report_extraction(self, video_id):
837                 """Report information extraction."""
838                 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
839
840         def _real_initialize(self):
841                 # Retrieve disclaimer
842                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
843                 try:
844                         self.report_disclaimer()
845                         disclaimer = urllib2.urlopen(request).read()
846                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
847                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
848                         return
849
850                 # Confirm age
851                 disclaimer_form = {
852                         'filters': '0',
853                         'submit': "Continue - I'm over 18",
854                         }
855                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
856                 try:
857                         self.report_age_confirmation()
858                         disclaimer = urllib2.urlopen(request).read()
859                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
860                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
861                         return
862         
863         def _real_extract(self, url):
864                 # Extract id and simplified title from URL
865                 mobj = re.match(self._VALID_URL, url)
866                 if mobj is None:
867                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
868                         return
869
870                 video_id = mobj.group(1)
871
872                 # Check if video comes from YouTube
873                 mobj2 = re.match(r'^yt-(.*)$', video_id)
874                 if mobj2 is not None:
875                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
876                         return
877
878                 simple_title = mobj.group(2).decode('utf-8')
879                 video_extension = 'flv'
880
881                 # Retrieve video webpage to extract further information
882                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
883                 try:
884                         self.report_download_webpage(video_id)
885                         webpage = urllib2.urlopen(request).read()
886                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
887                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
888                         return
889
890                 # Extract URL, uploader and title from webpage
891                 self.report_extraction(video_id)
892                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
893                 if mobj is None:
894                         self._downloader.trouble(u'ERROR: unable to extract media URL')
895                         return
896                 mediaURL = urllib.unquote(mobj.group(1))
897
898                 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
899                 #if mobj is None:
900                 #       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
901                 #       return
902                 #gdaKey = mobj.group(1)
903                 #
904                 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
905
906                 video_url = mediaURL
907
908                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
909                 if mobj is None:
910                         self._downloader.trouble(u'ERROR: unable to extract title')
911                         return
912                 video_title = mobj.group(1).decode('utf-8')
913
914                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
915                 if mobj is None:
916                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
917                         return
918                 video_uploader = mobj.group(1)
919
920                 try:
921                         # Process video information
922                         self._downloader.process_info({
923                                 'id':           video_id.decode('utf-8'),
924                                 'url':          video_url.decode('utf-8'),
925                                 'uploader':     video_uploader.decode('utf-8'),
926                                 'title':        video_title,
927                                 'stitle':       simple_title,
928                                 'ext':          video_extension.decode('utf-8'),
929                         })
930                 except UnavailableFormatError:
931                         self._downloader.trouble(u'ERROR: format not available for video')
932
933
934 class YoutubeSearchIE(InfoExtractor):
935         """Information Extractor for YouTube search queries."""
936         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
937         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
938         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
939         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
940         _youtube_ie = None
941         _max_youtube_results = 1000
942
943         def __init__(self, youtube_ie, downloader=None):
944                 InfoExtractor.__init__(self, downloader)
945                 self._youtube_ie = youtube_ie
946         
947         @staticmethod
948         def suitable(url):
949                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
950
951         def report_download_page(self, query, pagenum):
952                 """Report attempt to download playlist page with given number."""
953                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
954
955         def _real_initialize(self):
956                 self._youtube_ie.initialize()
957         
958         def _real_extract(self, query):
959                 mobj = re.match(self._VALID_QUERY, query)
960                 if mobj is None:
961                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
962                         return
963
964                 prefix, query = query.split(':')
965                 prefix = prefix[8:]
966                 if prefix == '':
967                         self._download_n_results(query, 1)
968                         return
969                 elif prefix == 'all':
970                         self._download_n_results(query, self._max_youtube_results)
971                         return
972                 else:
973                         try:
974                                 n = long(prefix)
975                                 if n <= 0:
976                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
977                                         return
978                                 elif n > self._max_youtube_results:
979                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
980                                         n = self._max_youtube_results
981                                 self._download_n_results(query, n)
982                                 return
983                         except ValueError: # parsing prefix as integer fails
984                                 self._download_n_results(query, 1)
985                                 return
986
987         def _download_n_results(self, query, n):
988                 """Downloads a specified number of results for a query"""
989
990                 video_ids = []
991                 already_seen = set()
992                 pagenum = 1
993
994                 while True:
995                         self.report_download_page(query, pagenum)
996                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
997                         request = urllib2.Request(result_url, None, std_headers)
998                         try:
999                                 page = urllib2.urlopen(request).read()
1000                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1001                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1002                                 return
1003
1004                         # Extract video identifiers
1005                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1006                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1007                                 if video_id not in already_seen:
1008                                         video_ids.append(video_id)
1009                                         already_seen.add(video_id)
1010                                         if len(video_ids) == n:
1011                                                 # Specified n videos reached
1012                                                 for id in video_ids:
1013                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1014                                                 return
1015
1016                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1017                                 for id in video_ids:
1018                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1019                                 return
1020
1021                         pagenum = pagenum + 1
1022
1023 class YoutubePlaylistIE(InfoExtractor):
1024         """Information Extractor for YouTube playlists."""
1025
1026         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
1027         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1028         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1029         _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
1030         _youtube_ie = None
1031
1032         def __init__(self, youtube_ie, downloader=None):
1033                 InfoExtractor.__init__(self, downloader)
1034                 self._youtube_ie = youtube_ie
1035         
1036         @staticmethod
1037         def suitable(url):
1038                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1039
1040         def report_download_page(self, playlist_id, pagenum):
1041                 """Report attempt to download playlist page with given number."""
1042                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1043
1044         def _real_initialize(self):
1045                 self._youtube_ie.initialize()
1046         
1047         def _real_extract(self, url):
1048                 # Extract playlist id
1049                 mobj = re.match(self._VALID_URL, url)
1050                 if mobj is None:
1051                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1052                         return
1053
1054                 # Download playlist pages
1055                 playlist_id = mobj.group(1)
1056                 video_ids = []
1057                 pagenum = 1
1058
1059                 while True:
1060                         self.report_download_page(playlist_id, pagenum)
1061                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1062                         try:
1063                                 page = urllib2.urlopen(request).read()
1064                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1065                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1066                                 return
1067
1068                         # Extract video identifiers
1069                         ids_in_page = []
1070                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1071                                 if mobj.group(1) not in ids_in_page:
1072                                         ids_in_page.append(mobj.group(1))
1073                         video_ids.extend(ids_in_page)
1074
1075                         if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
1076                                 break
1077                         pagenum = pagenum + 1
1078
1079                 for id in video_ids:
1080                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1081                 return
1082
1083 class YoutubeUserIE(InfoExtractor):
1084         """Information Extractor for YouTube users."""
1085
1086         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1087         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1088         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1089         _youtube_ie = None
1090
1091         def __init__(self, youtube_ie, downloader=None):
1092                 InfoExtractor.__init__(self, downloader)
1093                 self._youtube_ie = youtube_ie
1094         
1095         @staticmethod
1096         def suitable(url):
1097                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1098
1099         def report_download_page(self, username):
1100                 """Report attempt to download user page."""
1101                 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1102
1103         def _real_initialize(self):
1104                 self._youtube_ie.initialize()
1105         
1106         def _real_extract(self, url):
1107                 # Extract username
1108                 mobj = re.match(self._VALID_URL, url)
1109                 if mobj is None:
1110                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1111                         return
1112
1113                 # Download user page
1114                 username = mobj.group(1)
1115                 video_ids = []
1116                 pagenum = 1
1117
1118                 self.report_download_page(username)
1119                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1120                 try:
1121                         page = urllib2.urlopen(request).read()
1122                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1123                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1124                         return
1125
1126                 # Extract video identifiers
1127                 ids_in_page = []
1128
1129                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1130                         if mobj.group(1) not in ids_in_page:
1131                                 ids_in_page.append(mobj.group(1))
1132                 video_ids.extend(ids_in_page)
1133
1134                 for id in video_ids:
1135                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1136                 return
1137
1138 class PostProcessor(object):
1139         """Post Processor class.
1140
1141         PostProcessor objects can be added to downloaders with their
1142         add_post_processor() method. When the downloader has finished a
1143         successful download, it will take its internal chain of PostProcessors
1144         and start calling the run() method on each one of them, first with
1145         an initial argument and then with the returned value of the previous
1146         PostProcessor.
1147
1148         The chain will be stopped if one of them ever returns None or the end
1149         of the chain is reached.
1150
1151         PostProcessor objects follow a "mutual registration" process similar
1152         to InfoExtractor objects.
1153         """
1154
1155         _downloader = None
1156
1157         def __init__(self, downloader=None):
1158                 self._downloader = downloader
1159
1160         def set_downloader(self, downloader):
1161                 """Sets the downloader for this PP."""
1162                 self._downloader = downloader
1163         
1164         def run(self, information):
1165                 """Run the PostProcessor.
1166
1167                 The "information" argument is a dictionary like the ones
1168                 composed by InfoExtractors. The only difference is that this
1169                 one has an extra field called "filepath" that points to the
1170                 downloaded file.
1171
1172                 When this method returns None, the postprocessing chain is
1173                 stopped. However, this method may return an information
1174                 dictionary that will be passed to the next postprocessing
1175                 object in the chain. It can be the one it received after
1176                 changing some fields.
1177
1178                 In addition, this method may raise a PostProcessingError
1179                 exception that will be taken into account by the downloader
1180                 it was called from.
1181                 """
1182                 return information # by default, do nothing
1183         
1184 ### MAIN PROGRAM ###
1185 if __name__ == '__main__':
1186         try:
1187                 # Modules needed only when running the main program
1188                 import getpass
1189                 import optparse
1190
1191                 # Function to update the program file with the latest version from bitbucket.org
1192                 def update_self(downloader, filename):
1193                         # Note: downloader only used for options
1194                         if not os.access (filename, os.W_OK):
1195                                 sys.exit('ERROR: no write permissions on %s' % filename)
1196
1197                         downloader.to_stdout('Updating to latest stable version...')
1198                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1199                         latest_version = urllib.urlopen(latest_url).read().strip()
1200                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1201                         newcontent = urllib.urlopen(prog_url).read()
1202                         stream = open(filename, 'w')
1203                         stream.write(newcontent)
1204                         stream.close()
1205                         downloader.to_stdout('Updated to version %s' % latest_version)
1206
1207                 # General configuration
1208                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1209                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1210                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1211
1212                 # Parse command line
1213                 parser = optparse.OptionParser(
1214                         usage='Usage: %prog [options] url...',
1215                         version='2010.01.05',
1216                         conflict_handler='resolve',
1217                 )
1218
1219                 parser.add_option('-h', '--help',
1220                                 action='help', help='print this help text and exit')
1221                 parser.add_option('-v', '--version',
1222                                 action='version', help='print program version and exit')
1223                 parser.add_option('-U', '--update',
1224                                 action='store_true', dest='update_self', help='update this program to latest stable version')
1225                 parser.add_option('-i', '--ignore-errors',
1226                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1227                 parser.add_option('-r', '--rate-limit',
1228                                 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1229
1230                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1231                 authentication.add_option('-u', '--username',
1232                                 dest='username', metavar='UN', help='account username')
1233                 authentication.add_option('-p', '--password',
1234                                 dest='password', metavar='PW', help='account password')
1235                 authentication.add_option('-n', '--netrc',
1236                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1237                 parser.add_option_group(authentication)
1238
1239                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1240                 video_format.add_option('-f', '--format',
1241                                 action='store', dest='format', metavar='FMT', help='video format code')
1242                 video_format.add_option('-b', '--best-quality',
1243                                 action='store_const', dest='format', help='download the best quality video possible', const='0')
1244                 video_format.add_option('-m', '--mobile-version',
1245                                 action='store_const', dest='format', help='alias for -f 17', const='17')
1246                 video_format.add_option('-d', '--high-def',
1247                                 action='store_const', dest='format', help='alias for -f 22', const='22')
1248                 parser.add_option_group(video_format)
1249
1250                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
1251                 verbosity.add_option('-q', '--quiet',
1252                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
1253                 verbosity.add_option('-s', '--simulate',
1254                                 action='store_true', dest='simulate', help='do not download video', default=False)
1255                 verbosity.add_option('-g', '--get-url',
1256                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
1257                 verbosity.add_option('-e', '--get-title',
1258                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
1259                 parser.add_option_group(verbosity)
1260
1261                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1262                 filesystem.add_option('-t', '--title',
1263                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
1264                 filesystem.add_option('-l', '--literal',
1265                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
1266                 filesystem.add_option('-o', '--output',
1267                                 dest='outtmpl', metavar='TPL', help='output filename template')
1268                 filesystem.add_option('-a', '--batch-file',
1269                                 dest='batchfile', metavar='F', help='file containing URLs to download')
1270                 filesystem.add_option('-w', '--no-overwrites',
1271                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
1272                 filesystem.add_option('-c', '--continue',
1273                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
1274                 parser.add_option_group(filesystem)
1275
1276                 (opts, args) = parser.parse_args()
1277         
1278                 # Batch file verification
1279                 batchurls = []
1280                 if opts.batchfile is not None:
1281                         try:
1282                                 batchurls = open(opts.batchfile, 'r').readlines()
1283                                 batchurls = [x.strip() for x in batchurls]
1284                                 batchurls = [x for x in batchurls if len(x) > 0]
1285                         except IOError:
1286                                 sys.exit(u'ERROR: batch file could not be read')
1287                 all_urls = batchurls + args
1288
1289                 # Conflicting, missing and erroneous options
1290                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
1291                         parser.error(u'using .netrc conflicts with giving username/password')
1292                 if opts.password is not None and opts.username is None:
1293                         parser.error(u'account username missing')
1294                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
1295                         parser.error(u'using output template conflicts with using title or literal title')
1296                 if opts.usetitle and opts.useliteral:
1297                         parser.error(u'using title conflicts with using literal title')
1298                 if opts.username is not None and opts.password is None:
1299                         opts.password = getpass.getpass(u'Type account password and press return:')
1300                 if opts.ratelimit is not None:
1301                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
1302                         if numeric_limit is None:
1303                                 parser.error(u'invalid rate limit specified')
1304                         opts.ratelimit = numeric_limit
1305
1306                 # Information extractors
1307                 youtube_ie = YoutubeIE()
1308                 metacafe_ie = MetacafeIE(youtube_ie)
1309                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
1310                 youtube_user_ie = YoutubeUserIE(youtube_ie)
1311                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
1312
1313                 # File downloader
1314                 fd = FileDownloader({
1315                         'usenetrc': opts.usenetrc,
1316                         'username': opts.username,
1317                         'password': opts.password,
1318                         'quiet': (opts.quiet or opts.geturl or opts.gettitle),
1319                         'forceurl': opts.geturl,
1320                         'forcetitle': opts.gettitle,
1321                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
1322                         'format': opts.format,
1323                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
1324                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
1325                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
1326                                 or u'%(id)s.%(ext)s'),
1327                         'ignoreerrors': opts.ignoreerrors,
1328                         'ratelimit': opts.ratelimit,
1329                         'nooverwrites': opts.nooverwrites,
1330                         'continuedl': opts.continue_dl,
1331                         })
1332                 fd.add_info_extractor(youtube_search_ie)
1333                 fd.add_info_extractor(youtube_pl_ie)
1334                 fd.add_info_extractor(youtube_user_ie)
1335                 fd.add_info_extractor(metacafe_ie)
1336                 fd.add_info_extractor(youtube_ie)
1337
1338                 # Update version
1339                 if opts.update_self:
1340                         update_self(fd, sys.argv[0])
1341
1342                 # Maybe do nothing
1343                 if len(all_urls) < 1:
1344                         if not opts.update_self:
1345                                 parser.error(u'you must provide at least one URL')
1346                         else:
1347                                 sys.exit()
1348                 retcode = fd.download(all_urls)
1349                 sys.exit(retcode)
1350
1351         except DownloadError:
1352                 sys.exit(1)
1353         except SameFileError:
1354                 sys.exit(u'ERROR: fixed output name but more than one file to download')
1355         except KeyboardInterrupt:
1356                 sys.exit(u'\nERROR: Interrupted by user')