Merge remote-tracking branch 'ngokevin/soundcloud'
[youtube-dl.git] / youtube-dl
index 949d2e5..ff01775 100755 (executable)
@@ -2470,7 +2470,7 @@ class YahooSearchIE(InfoExtractor):
 class YoutubePlaylistIE(InfoExtractor):
        """Information Extractor for YouTube playlists."""
 
-       _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
+       _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
        _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
        _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
        _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
@@ -2514,7 +2514,8 @@ class YoutubePlaylistIE(InfoExtractor):
 
                while True:
                        self.report_download_page(playlist_id, pagenum)
-                       request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
+                       url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
+                       request = urllib2.Request(url)
                        try:
                                page = urllib2.urlopen(request).read()
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@@ -2548,7 +2549,7 @@ class YoutubeUserIE(InfoExtractor):
        _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
        _GDATA_PAGE_SIZE = 50
        _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
-       _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
+       _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
        _youtube_ie = None
        IE_NAME = u'youtube:user'
 
@@ -3532,13 +3533,18 @@ class SoundcloudIE(InfoExtractor):
 
                self.report_extraction('%s/%s' % (uploader, slug_title))
 
-               # extract uid and access token
+               # extract uid and stream token that soundcloud hands out for access
                mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', webpage)   
                if mobj:
                        video_id = mobj.group(1)
                        stream_token = mobj.group(2)
 
-               # construct media url (with uid/token) to request song
+               # extract unsimplified title
+               mobj = re.search('"title":"(.*?)",', webpage)
+               if mobj:
+                       title = mobj.group(1)
+
+               # construct media url (with uid/token)
                mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
                mediaURL = mediaURL % (video_id, stream_token)
 
@@ -3557,26 +3563,26 @@ class SoundcloudIE(InfoExtractor):
                        except Exception as e:
                                print str(e)
 
-               # for soundcloud, a request must be made to a cross domain to establish
-               # needed cookies
+               # for soundcloud, a request to a cross domain is required for cookies
                request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
 
                try:
                        self._downloader.process_info({
-                               'id':           video_id,
+                               'id':           video_id.decode('utf-8'),
                                'url':          mediaURL,
-                               'uploader':     uploader,
+                               'uploader':     uploader.decode('utf-8'),
                                'upload_date':  upload_date,
-                               'title':        simple_title,
-                               'stitle':       simple_title,
+                               'title':        simple_title.decode('utf-8'),
+                               'stitle':       simple_title.decode('utf-8'),
                                'ext':          u'mp3',
                                'format':       u'NA',
                                'player_url':   None,
-                               'description': description
+                               'description': description.decode('utf-8')
                        })
                except UnavailableVideoError:
                        self._downloader.trouble(u'\nERROR: unable to download video')
 
+
 class PostProcessor(object):
        """Post Processor class.
 
@@ -3973,7 +3979,7 @@ def gen_extractors():
                EscapistIE(),
                CollegeHumorIE(),
                XVideosIE(),
-        SoundcloudIE(),
+               SoundcloudIE(),
 
                GenericIE()
        ]