X-Git-Url: https://git.jankratochvil.net/?a=blobdiff_plain;f=youtube_dl%2F__init__.py;h=4c9c237d91d0086077e54d763117c508460eda75;hb=09fbc6c952ddb42ff042c2911576102dc8e78e23;hp=2404e2359ced09d34dcdb06245998de7e9458cf7;hpb=c92e184f751a3b58b5a6fbf090f4882932e5bd4b;p=youtube-dl.git

diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 2404e23..4c9c237 100755
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -18,7 +18,7 @@ __authors__  = (
 	)
 
 __license__ = 'Public Domain'
-__version__ = '2011.12.18'
+__version__ = '2012.01.08b'
 
 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
 
@@ -259,14 +259,14 @@ def sanitize_open(filename, open_mode):
 				import msvcrt
 				msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 			return (sys.stdout, filename)
-		stream = open(filename, open_mode)
+		stream = open(_encodeFilename(filename), open_mode)
 		return (stream, filename)
 	except (IOError, OSError), err:
 		# In case of error, try to remove win32 forbidden chars
 		filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 
 		# An exception here should be caught in the caller
-		stream = open(filename, open_mode)
+		stream = open(_encodeFilename(filename), open_mode)
 		return (stream, filename)
 
 
@@ -290,6 +290,30 @@ def _orderedSet(iterable):
 			res.append(el)
 	return res
 
+def _unescapeHTML(s):
+	"""
+	@param s a string (of type unicode)
+	"""
+	assert type(s) == type(u'')
+
+	htmlParser = HTMLParser.HTMLParser()
+	return htmlParser.unescape(s)
+
+def _encodeFilename(s):
+	"""
+	@param s The name of the file (of type unicode)
+	"""
+
+	assert type(s) == type(u'')
+
+	if sys.platform == 'win32' and sys.getwindowsversion().major >= 5:
+		# Pass u'' directly to use Unicode APIs on Windows 2000 and up
+		# (Detecting Windows NT 4 is tricky because 'major >= 4' would
+		# match Windows 9x series as well. Besides, NT 4 is obsolete.)
+		return s
+	else:
+		return s.encode(sys.getfilesystemencoding(), 'ignore')
+
 class DownloadError(Exception):
 	"""Download Error exception.
 
@@ -554,16 +578,17 @@ class FileDownloader(object):
 		self._pps.append(pp)
 		pp.set_downloader(self)
 
-	def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
+	def to_screen(self, message, skip_eol=False):
 		"""Print message to stdout if not in quiet mode."""
-		try:
-			if not self.params.get('quiet', False):
-				terminator = [u'\n', u''][skip_eol]
-				print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
+		assert type(message) == type(u'')
+		if not self.params.get('quiet', False):
+			terminator = [u'\n', u''][skip_eol]
+			output = message + terminator
+
+			if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
+				output = output.encode(preferredencoding(), 'ignore')
+			self._screen_file.write(output)
 			self._screen_file.flush()
-		except (UnicodeEncodeError), err:
-			if not ignore_encoding_errors:
-				raise
 
 	def to_stderr(self, message):
 		"""Print message to stderr."""
@@ -613,7 +638,7 @@ class FileDownloader(object):
 	def temp_name(self, filename):
 		"""Returns a temporary filename for the given filename."""
 		if self.params.get('nopart', False) or filename == u'-' or \
-				(os.path.exists(filename) and not os.path.isfile(filename)):
+				(os.path.exists(_encodeFilename(filename)) and not os.path.isfile(_encodeFilename(filename))):
 			return filename
 		return filename + u'.part'
 
@@ -626,7 +651,7 @@ class FileDownloader(object):
 		try:
 			if old_filename == new_filename:
 				return
-			os.rename(old_filename, new_filename)
+			os.rename(_encodeFilename(old_filename), _encodeFilename(new_filename))
 		except (IOError, OSError), err:
 			self.trouble(u'ERROR: unable to rename file')
 
@@ -634,7 +659,7 @@ class FileDownloader(object):
 		"""Try to set the last-modified time of the given file."""
 		if last_modified_hdr is None:
 			return
-		if not os.path.isfile(filename):
+		if not os.path.isfile(_encodeFilename(filename)):
 			return
 		timestr = last_modified_hdr
 		if timestr is None:
@@ -650,15 +675,15 @@ class FileDownloader(object):
 
 	def report_writedescription(self, descfn):
 		""" Report that the description file is being written """
-		self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
+		self.to_screen(u'[info] Writing video description to: ' + descfn)
 
 	def report_writeinfojson(self, infofn):
 		""" Report that the metadata file has been written """
-		self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
+		self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 
 	def report_destination(self, filename):
 		"""Report destination filename."""
-		self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
+		self.to_screen(u'[download] Destination: ' + filename)
 
 	def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 		"""Report download progress."""
@@ -760,8 +785,8 @@ class FileDownloader(object):
 			return
 
 		try:
-			dn = os.path.dirname(filename)
-			if dn != '' and not os.path.exists(dn):
+			dn = os.path.dirname(_encodeFilename(filename))
+			if dn != '' and not os.path.exists(dn): # dn is already encoded
 				os.makedirs(dn)
 		except (OSError, IOError), err:
 			self.trouble(u'ERROR: unable to create directory ' + unicode(err))
@@ -769,9 +794,9 @@ class FileDownloader(object):
 
 		if self.params.get('writedescription', False):
 			try:
-				descfn = filename + '.description'
+				descfn = filename + u'.description'
 				self.report_writedescription(descfn)
-				descfile = open(descfn, 'wb')
+				descfile = open(_encodeFilename(descfn), 'wb')
 				try:
 					descfile.write(info_dict['description'].encode('utf-8'))
 				finally:
@@ -781,7 +806,7 @@ class FileDownloader(object):
 				return
 
 		if self.params.get('writeinfojson', False):
-			infofn = filename + '.info.json'
+			infofn = filename + u'.info.json'
 			self.report_writeinfojson(infofn)
 			try:
 				json.dump
@@ -789,7 +814,7 @@ class FileDownloader(object):
 				self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
 				return
 			try:
-				infof = open(infofn, 'wb')
+				infof = open(_encodeFilename(infofn), 'wb')
 				try:
 					json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
 					json.dump(json_info_dict, infof)
@@ -800,7 +825,7 @@ class FileDownloader(object):
 				return
 
 		if not self.params.get('skip_download', False):
-			if self.params.get('nooverwrites', False) and os.path.exists(filename):
+			if self.params.get('nooverwrites', False) and os.path.exists(_encodeFilename(filename)):
 				success = True
 			else:
 				try:
@@ -873,11 +898,11 @@ class FileDownloader(object):
 		basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 		retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 		while retval == 2 or retval == 1:
-			prevsize = os.path.getsize(tmpfilename)
+			prevsize = os.path.getsize(_encodeFilename(tmpfilename))
 			self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 			time.sleep(5.0) # This seems to be needed
 			retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
-			cursize = os.path.getsize(tmpfilename)
+			cursize = os.path.getsize(_encodeFilename(tmpfilename))
 			if prevsize == cursize and retval == 1:
 				break
 			 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
@@ -886,7 +911,7 @@ class FileDownloader(object):
 				retval = 0
 				break
 		if retval == 0:
-			self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
+			self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(_encodeFilename(tmpfilename)))
 			self.try_rename(tmpfilename, filename)
 			return True
 		else:
@@ -898,7 +923,7 @@ class FileDownloader(object):
 		player_url = info_dict.get('player_url', None)
 
 		# Check file already present
-		if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
+		if self.params.get('continuedl', False) and os.path.isfile(_encodeFilename(filename)) and not self.params.get('nopart', False):
 			self.report_file_already_downloaded(filename)
 			return True
 
@@ -915,8 +940,8 @@ class FileDownloader(object):
 		request = urllib2.Request(url, None, headers)
 
 		# Establish possible resume length
-		if os.path.isfile(tmpfilename):
-			resume_len = os.path.getsize(tmpfilename)
+		if os.path.isfile(_encodeFilename(tmpfilename)):
+			resume_len = os.path.getsize(_encodeFilename(tmpfilename))
 		else:
 			resume_len = 0
 
@@ -1590,8 +1615,6 @@ class DailymotionIE(InfoExtractor):
 		self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
 
 	def _real_extract(self, url):
-		htmlParser = HTMLParser.HTMLParser()
-		
 		# Extract id and simplified title from URL
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
@@ -1635,7 +1658,7 @@ class DailymotionIE(InfoExtractor):
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: unable to extract title')
 			return
-		video_title = htmlParser.unescape(mobj.group('title')).decode('utf-8')
+		video_title = _unescapeHTML(mobj.group('title').decode('utf-8'))
 		video_title = sanitize_title(video_title)
 		simple_title = _simplify_title(video_title)
 
@@ -2636,7 +2659,7 @@ class YoutubeUserIE(InfoExtractor):
 		else:
 			video_ids = video_ids[playliststart:playlistend]
 
-		self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
+		self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" %
 				(username, all_ids_count, len(video_ids)))
 
 		for video_id in video_ids:
@@ -3743,17 +3766,17 @@ class MixcloudIE(InfoExtractor):
 		try:
 			# Process file information
 			self._downloader.process_info({
-				'id':		file_id.decode('utf-8'),
-				'url':		file_url.decode('utf-8'),
+				'id': file_id.decode('utf-8'),
+				'url': file_url.decode('utf-8'),
 				'uploader':	uploader.decode('utf-8'),
-				'upload_date':	u'NA',
-				'title':	json_data['name'],
-				'stitle':	_simplify_title(json_data['name']),
-				'ext':		file_url.split('.')[-1].decode('utf-8'),
-				'format':	(format_param is None and u'NA' or format_param.decode('utf-8')),
-				'thumbnail':    json_data['thumbnail_url'],
-				'description':  json_data['description'],
-				'player_url':	player_url.decode('utf-8'),
+				'upload_date': u'NA',
+				'title': json_data['name'],
+				'stitle': _simplify_title(json_data['name']),
+				'ext': file_url.split('.')[-1].decode('utf-8'),
+				'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
+				'thumbnail': json_data['thumbnail_url'],
+				'description': json_data['description'],
+				'player_url': player_url.decode('utf-8'),
 			})
 		except UnavailableVideoError, err:
 			self._downloader.trouble(u'ERROR: unable to download file')
@@ -3877,6 +3900,100 @@ class StanfordOpenClassroomIE(InfoExtractor):
 				assert entry['type'] == 'reference'
 				self.extract(entry['url'])
 
+class MTVIE(InfoExtractor):
+	"""Information extractor for MTV.com"""
+
+	_VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
+	IE_NAME = u'mtv'
+
+	def report_webpage(self, video_id):
+		"""Report information extraction."""
+		self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
+
+	def report_extraction(self, video_id):
+		"""Report information extraction."""
+		self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
+
+	def _real_extract(self, url):
+		mobj = re.match(self._VALID_URL, url)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+			return
+		if not mobj.group('proto'):
+			url = 'http://' + url
+		video_id = mobj.group('videoid')
+		self.report_webpage(video_id)
+
+		request = urllib2.Request(url)
+		try:
+			webpage = urllib2.urlopen(request).read()
+		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+			return
+
+		mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: unable to extract song name')
+			return
+		song_name = _unescapeHTML(mobj.group(1).decode('iso-8859-1'))
+		mobj = re.search(r'<meta name="mtv_an" content="([^"]+)"/>', webpage)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: unable to extract performer')
+			return
+		performer = _unescapeHTML(mobj.group(1).decode('iso-8859-1'))
+		video_title = performer + ' - ' + song_name 
+
+		mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: unable to mtvn_uri')
+			return
+		mtvn_uri = mobj.group(1)
+
+		mobj = re.search(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', webpage)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: unable to extract content id')
+			return
+		content_id = mobj.group(1)
+
+		videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
+		self.report_extraction(video_id)
+		request = urllib2.Request(videogen_url)
+		try:
+			metadataXml = urllib2.urlopen(request).read()
+		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+			self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err))
+			return
+
+		mdoc = xml.etree.ElementTree.fromstring(metadataXml)
+		renditions = mdoc.findall('.//rendition')
+
+		# For now, always pick the highest quality.
+		rendition = renditions[-1]
+
+		try:
+			_,_,ext = rendition.attrib['type'].partition('/')
+			format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
+			video_url = rendition.find('./src').text
+		except KeyError:
+			self._downloader.trouble('Invalid rendition field.')
+			return
+
+		self._downloader.increment_downloads()
+		info = {
+			'id': video_id,
+			'url': video_url,
+			'uploader': performer,
+			'title': video_title,
+			'stitle': _simplify_title(video_title),
+			'ext': ext,
+			'format': format,
+		}
+
+		try:
+			self._downloader.process_info(info)
+		except UnavailableVideoError, err:
+			self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
+
 
 class PostProcessor(object):
 	"""Post Processor class.
@@ -3941,7 +4058,7 @@ class FFmpegExtractAudioPP(PostProcessor):
 	@staticmethod
 	def get_audio_codec(path):
 		try:
-			cmd = ['ffprobe', '-show_streams', '--', path]
+			cmd = ['ffprobe', '-show_streams', '--', _encodeFilename(path)]
 			handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
 			output = handle.communicate()[0]
 			if handle.wait() != 0:
@@ -3962,7 +4079,7 @@ class FFmpegExtractAudioPP(PostProcessor):
 			acodec_opts = []
 		else:
 			acodec_opts = ['-acodec', codec]
-		cmd = ['ffmpeg', '-y', '-i', path, '-vn'] + acodec_opts + more_opts + ['--', out_path]
+		cmd = ['ffmpeg', '-y', '-i', _encodeFilename(path), '-vn'] + acodec_opts + more_opts + ['--', _encodeFilename(out_path)]
 		try:
 			p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 			stdout,stderr = p.communicate()
@@ -4023,9 +4140,9 @@ class FFmpegExtractAudioPP(PostProcessor):
 				extension = 'wav'
 				more_opts += ['-f', 'wav']
 
-		(prefix, ext) = os.path.splitext(path)
-		new_path = prefix + '.' + extension
-		self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
+		prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
+		new_path = prefix + sep + extension
+		self._downloader.to_screen(u'[ffmpeg] Destination: ' + new_path)
 		try:
 			self.run_ffmpeg(path, new_path, acodec, more_opts)
 		except:
@@ -4039,13 +4156,13 @@ class FFmpegExtractAudioPP(PostProcessor):
  		# Try to update the date time for extracted audio file.
 		if information.get('filetime') is not None:
 			try:
-				os.utime(new_path, (time.time(), information['filetime']))
+				os.utime(_encodeFilename(new_path), (time.time(), information['filetime']))
 			except:
 				self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
 
 		if not self._keepvideo:
 			try:
-				os.remove(path)
+				os.remove(_encodeFilename(path))
 			except (IOError, OSError):
 				self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
 				return None
@@ -4060,7 +4177,7 @@ def updateSelf(downloader, filename):
 	if not os.access(filename, os.W_OK):
 		sys.exit('ERROR: no write permissions on %s' % filename)
 
-	downloader.to_screen('Updating to latest version...')
+	downloader.to_screen(u'Updating to latest version...')
 
 	try:
 		try:
@@ -4069,7 +4186,7 @@ def updateSelf(downloader, filename):
 			
 			vmatch = re.search("__version__ = '([^']+)'", newcontent)
 			if vmatch is not None and vmatch.group(1) == __version__:
-				downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')')
+				downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
 				return
 		finally:
 			urlh.close()
@@ -4085,7 +4202,7 @@ def updateSelf(downloader, filename):
 	except (IOError, OSError), err:
 		sys.exit('ERROR: unable to overwrite current version')
 
-	downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
+	downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
 
 def parseOpts():
 	# Deferred imports
@@ -4093,9 +4210,9 @@ def parseOpts():
 	import optparse
 	import shlex
 
-	def _readOptions(filename):
+	def _readOptions(filename_bytes):
 		try:
-			optionf = open(filename)
+			optionf = open(filename_bytes)
 		except IOError:
 			return [] # silently skip if file is not present
 		try:
@@ -4234,6 +4351,8 @@ def parseOpts():
 	verbosity.add_option('--console-title',
 			action='store_true', dest='consoletitle',
 			help='display progress in console titlebar', default=False)
+	verbosity.add_option('-v', '--verbose',
+			action='store_true', dest='verbose', help='print various debugging information', default=False)
 
 
 	filesystem.add_option('-t', '--title',
@@ -4329,6 +4448,7 @@ def gen_extractors():
 		InfoQIE(),
 		MixcloudIE(),
 		StanfordOpenClassroomIE(),
+		MTVIE(),
 
 		GenericIE()
 	]
@@ -4369,10 +4489,14 @@ def _real_main():
 
 	# General configuration
 	cookie_processor = urllib2.HTTPCookieProcessor(jar)
-	opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
+	proxy_handler = urllib2.ProxyHandler()
+	opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
 	urllib2.install_opener(opener)
 	socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
 
+	if opts.verbose:
+		print(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
+
 	extractors = gen_extractors()
 
 	if opts.list_extractors: