X-Git-Url: http://git.jankratochvil.net/?a=blobdiff_plain;f=youtube-dl;h=a5af555ff1471630f72853bb3c97cd6cfe7c6990;hb=cb6568bf21e891143dab378d4a40988fc52b9f1d;hp=6eafc30b1a3d9f82cf9f34caa552f445c3478ea6;hpb=b20d4f8626783ae61f5865a4d9aa3f460053c9a4;p=youtube-dl.git

diff --git a/youtube-dl b/youtube-dl
index 6eafc30..a5af555 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -12,6 +12,8 @@ __author__  = (
 	'RogÃ©rio Brito',
 	'Philipp Hagemeister',
 	'SÃ¶ren Schulze',
+	'Kevin Ngo',
+	'Ori Avtalion',
 	)
 
 __license__ = 'Public Domain'
@@ -1560,9 +1562,6 @@ class DailymotionIE(InfoExtractor):
 		"""Report information extraction."""
 		self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
 
-	def _real_initialize(self):
-		return
-
 	def _real_extract(self, url):
 		# Extract id and simplified title from URL
 		mobj = re.match(self._VALID_URL, url)
@@ -1651,9 +1650,6 @@ class GoogleIE(InfoExtractor):
 		"""Report information extraction."""
 		self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
 
-	def _real_initialize(self):
-		return
-
 	def _real_extract(self, url):
 		# Extract id from URL
 		mobj = re.match(self._VALID_URL, url)
@@ -1758,9 +1754,6 @@ class PhotobucketIE(InfoExtractor):
 		"""Report information extraction."""
 		self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
 
-	def _real_initialize(self):
-		return
-
 	def _real_extract(self, url):
 		# Extract id from URL
 		mobj = re.match(self._VALID_URL, url)
@@ -1840,9 +1833,6 @@ class YahooIE(InfoExtractor):
 		"""Report information extraction."""
 		self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
 
-	def _real_initialize(self):
-		return
-
 	def _real_extract(self, url, new_video=True):
 		# Extract ID from URL
 		mobj = re.match(self._VALID_URL, url)
@@ -1993,9 +1983,6 @@ class VimeoIE(InfoExtractor):
 		"""Report information extraction."""
 		self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
 
-	def _real_initialize(self):
-		return
-
 	def _real_extract(self, url, new_video=True):
 		# Extract ID from URL
 		mobj = re.match(self._VALID_URL, url)
@@ -2118,9 +2105,6 @@ class GenericIE(InfoExtractor):
 		"""Report information extraction."""
 		self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
 
-	def _real_initialize(self):
-		return
-
 	def _real_extract(self, url):
 		# At this point we have a new video
 		self._downloader.increment_downloads()
@@ -2470,7 +2454,7 @@ class YahooSearchIE(InfoExtractor):
 class YoutubePlaylistIE(InfoExtractor):
 	"""Information Extractor for YouTube playlists."""
 
-	_VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
+	_VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
 	_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
 	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
 	_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
@@ -2514,7 +2498,8 @@ class YoutubePlaylistIE(InfoExtractor):
 
 		while True:
 			self.report_download_page(playlist_id, pagenum)
-			request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
+			url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
+			request = urllib2.Request(url)
 			try:
 				page = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@@ -2548,7 +2533,7 @@ class YoutubeUserIE(InfoExtractor):
 	_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
 	_GDATA_PAGE_SIZE = 50
 	_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
-	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
+	_VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
 	_youtube_ie = None
 	IE_NAME = u'youtube:user'
 
@@ -2646,9 +2631,6 @@ class DepositFilesIE(InfoExtractor):
 		"""Report information extraction."""
 		self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
 
-	def _real_initialize(self):
-		return
-
 	def _real_extract(self, url):
 		# At this point we have a new file
 		self._downloader.increment_downloads()
@@ -3039,9 +3021,6 @@ class MyVideoIE(InfoExtractor):
 		"""Report information extraction."""
 		self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
 
-	def _real_initialize(self):
-		return
-
 	def _real_extract(self,url):
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
@@ -3504,9 +3483,6 @@ class SoundcloudIE(InfoExtractor):
 		"""Report information extraction."""
 		self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
 
-	def _real_initialize(self):
-		return
-
 	def _real_extract(self, url):
 		htmlParser = HTMLParser.HTMLParser()
 
@@ -3532,50 +3508,141 @@ class SoundcloudIE(InfoExtractor):
 
 		self.report_extraction('%s/%s' % (uploader, slug_title))
 
-		# extract uid and access token
-		mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', page)   
+		# extract uid and stream token that soundcloud hands out for access
+		mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', webpage)
+		if mobj:
+			video_id = mobj.group(1)
+			stream_token = mobj.group(2)
+
+		# extract unsimplified title
+		mobj = re.search('"title":"(.*?)",', webpage)
 		if mobj:
-			video_id = match.group(1)
-			stream_token = match.group(2)
+			title = mobj.group(1)
 
-		# construct media url (with uid/token) to request song
+		# construct media url (with uid/token)
 		mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
 		mediaURL = mediaURL % (video_id, stream_token)
 
 		# description
 		description = u'No description available'
-		mobj = re.search('track-description-value"><p>(.*?)</p>', page)
+		mobj = re.search('track-description-value"><p>(.*?)</p>', webpage)
 		if mobj:
 			description = mobj.group(1)
 		
 		# upload date
-		mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", page)
+		upload_date = None
+		mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
 		if mobj:
 			try:
-				upload_date = datetime.datetime.strptime(match.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
-			except:
-				pass
+				upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
+			except Exception as e:
+				print str(e)
 
-		# for soundcloud, a request must be made to a cross domain to establish
-		# needed cookies
+		# for soundcloud, a request to a cross domain is required for cookies
 		request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
 
 		try:
 			self._downloader.process_info({
-				'id':		video_id,
-				'url':		video_url,
-				'uploader':	uploader,
+				'id':		video_id.decode('utf-8'),
+				'url':		mediaURL,
+				'uploader':	uploader.decode('utf-8'),
 				'upload_date':  upload_date,
-				'title':	video_title,
-				'stitle':	simple_title,
+				'title':	simple_title.decode('utf-8'),
+				'stitle':	simple_title.decode('utf-8'),
 				'ext':		u'mp3',
 				'format':	u'NA',
 				'player_url':	None,
-				'description': description
+				'description': description.decode('utf-8')
 			})
 		except UnavailableVideoError:
 			self._downloader.trouble(u'\nERROR: unable to download video')
 
+
+class InfoQIE(InfoExtractor):
+	"""Information extractor for infoq.com"""
+
+	_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
+	IE_NAME = u'infoq'
+
+	def report_webpage(self, video_id):
+		"""Report information extraction."""
+		self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
+
+	def report_extraction(self, video_id):
+		"""Report information extraction."""
+		self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
+
+	def _simplify_title(self, title):
+		res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
+		res = res.strip(ur'_')
+		return res
+
+	def _real_extract(self, url):
+		htmlParser = HTMLParser.HTMLParser()
+
+		mobj = re.match(self._VALID_URL, url)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+			return
+
+		self.report_webpage(url)
+
+		request = urllib2.Request(url)
+		try:
+			webpage = urllib2.urlopen(request).read()
+		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+			return
+
+		self.report_extraction(url)
+
+
+		# Extract video URL
+		mobj = re.search(r"jsclassref='([^']*)'", webpage)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: unable to extract video url')
+			return
+		video_url = 'rtmpe://video.infoq.com/cfx/st/' + urllib2.unquote(mobj.group(1).decode('base64'))
+
+
+		# Extract title
+		mobj = re.search(r'contentTitle = "(.*?)";', webpage)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: unable to extract video title')
+			return
+		video_title = mobj.group(1).decode('utf-8')
+
+		# Extract description
+		video_description = u'No description available.'
+		mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', webpage)
+		if mobj is not None:
+			video_description = mobj.group(1).decode('utf-8')
+
+		video_filename = video_url.split('/')[-1]
+		video_id, extension = video_filename.split('.')
+
+		self._downloader.increment_downloads()
+		info = {
+			'id': video_id,
+			'url': video_url,
+			'uploader': None,
+			'upload_date': None,
+			'title': video_title,
+			'stitle': self._simplify_title(video_title),
+			'ext': extension,
+			'format': extension, # Extension is always(?) mp4, but seems to be flv
+			'thumbnail': None,
+			'description': video_description,
+			'player_url': None,
+		}
+
+		try:
+			self._downloader.process_info(info)
+		except UnavailableVideoError, err:
+			self._downloader.trouble(u'\nERROR: unable to download ' + video_url)
+
+
+
 class PostProcessor(object):
 	"""Post Processor class.
 
@@ -3972,12 +4039,13 @@ def gen_extractors():
 		EscapistIE(),
 		CollegeHumorIE(),
 		XVideosIE(),
-        SoundcloudIE(),
+		SoundcloudIE(),
+		InfoQIE(),
 
 		GenericIE()
 	]
 
-def main():
+def _real_main():
 	parser, opts, args = parseOpts()
 
 	# Open appropriate CookieJar
@@ -4137,10 +4205,9 @@ def main():
 
 	sys.exit(retcode)
 
-
-if __name__ == '__main__':
+def main():
 	try:
-		main()
+		_real_main()
 	except DownloadError:
 		sys.exit(1)
 	except SameFileError:
@@ -4148,4 +4215,7 @@ if __name__ == '__main__':
 	except KeyboardInterrupt:
 		sys.exit(u'\nERROR: Interrupted by user')
 
+if __name__ == '__main__':
+	main()
+
 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: