mostly vertical whitespace and mixed spaces and tabs
simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
def preferredencoding():
"""Get preferred encoding.
def preferredencoding():
"""Get preferred encoding.
yield pref
return yield_preferredencoding().next()
yield pref
return yield_preferredencoding().next()
def htmlentity_transform(matchobj):
"""Transforms an HTML entity to a Unicode character.
def htmlentity_transform(matchobj):
"""Transforms an HTML entity to a Unicode character.
# Unknown entity in name, return its literal representation
return (u'&%s;' % entity)
# Unknown entity in name, return its literal representation
return (u'&%s;' % entity)
def sanitize_title(utitle):
"""Sanitizes a video title so it could be used as part of a filename."""
utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
return utitle.replace(unicode(os.sep), u'%')
def sanitize_title(utitle):
"""Sanitizes a video title so it could be used as part of a filename."""
utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
return utitle.replace(unicode(os.sep), u'%')
def sanitize_open(filename, open_mode):
"""Try to open the given filename, and slightly tweak it if this fails.
def sanitize_open(filename, open_mode):
"""Try to open the given filename, and slightly tweak it if this fails.
stream = open(filename, open_mode)
return (stream, filename)
stream = open(filename, open_mode)
return (stream, filename)
def timeconvert(timestr):
def timeconvert(timestr):
- """Convert RFC 2822 defined time string into system timestamp"""
- timestamp = None
- timetuple = email.utils.parsedate_tz(timestr)
- if timetuple is not None:
- timestamp = email.utils.mktime_tz(timetuple)
- return timestamp
+ """Convert RFC 2822 defined time string into system timestamp"""
+ timestamp = None
+ timetuple = email.utils.parsedate_tz(timestr)
+ if timetuple is not None:
+ timestamp = email.utils.mktime_tz(timetuple)
+ return timestamp
+
class DownloadError(Exception):
"""Download Error exception.
class DownloadError(Exception):
"""Download Error exception.
class SameFileError(Exception):
"""Same File exception.
class SameFileError(Exception):
"""Same File exception.
class PostProcessingError(Exception):
"""Post Processing exception.
class PostProcessingError(Exception):
"""Post Processing exception.
class UnavailableVideoError(Exception):
"""Unavailable Format exception.
class UnavailableVideoError(Exception):
"""Unavailable Format exception.
class ContentTooShortError(Exception):
"""Content Too Short exception.
class ContentTooShortError(Exception):
"""Content Too Short exception.
self.downloaded = downloaded
self.expected = expected
self.downloaded = downloaded
self.expected = expected
class YoutubeDLHandler(urllib2.HTTPHandler):
"""Handler for HTTP requests and responses.
class YoutubeDLHandler(urllib2.HTTPHandler):
"""Handler for HTTP requests and responses.
a particular request, the original request in the program code only has
to include the HTTP header "Youtubedl-No-Compression", which will be
removed before making the real request.
a particular request, the original request in the program code only has
to include the HTTP header "Youtubedl-No-Compression", which will be
removed before making the real request.
Part of this code was copied from:
Part of this code was copied from:
- http://techknack.net/python-urllib2-handlers/
-
+ http://techknack.net/python-urllib2-handlers/
+
Andrew Rowls, the author of that code, agreed to release it to the
public domain.
"""
Andrew Rowls, the author of that code, agreed to release it to the
public domain.
"""
return zlib.decompress(data, -zlib.MAX_WBITS)
except zlib.error:
return zlib.decompress(data)
return zlib.decompress(data, -zlib.MAX_WBITS)
except zlib.error:
return zlib.decompress(data)
@staticmethod
def addinfourl_wrapper(stream, headers, url, code):
if hasattr(urllib2.addinfourl, 'getcode'):
@staticmethod
def addinfourl_wrapper(stream, headers, url, code):
if hasattr(urllib2.addinfourl, 'getcode'):
ret = urllib2.addinfourl(stream, headers, url)
ret.code = code
return ret
ret = urllib2.addinfourl(stream, headers, url)
ret.code = code
return ret
def http_request(self, req):
for h in std_headers:
if h in req.headers:
def http_request(self, req):
for h in std_headers:
if h in req.headers:
resp.msg = old_resp.msg
return resp
resp.msg = old_resp.msg
return resp
class FileDownloader(object):
"""File Downloader class.
class FileDownloader(object):
"""File Downloader class.
else:
exponent = long(math.log(bytes, 1024.0))
suffix = 'bkMGTPEZY'[exponent]
else:
exponent = long(math.log(bytes, 1024.0))
suffix = 'bkMGTPEZY'[exponent]
- converted = float(bytes) / float(1024**exponent)
+ converted = float(bytes) / float(1024 ** exponent)
return '%.2f%s' % (converted, suffix)
@staticmethod
return '%.2f%s' % (converted, suffix)
@staticmethod
os.rename(old_filename, new_filename)
except (IOError, OSError), err:
self.trouble(u'ERROR: unable to rename file')
os.rename(old_filename, new_filename)
except (IOError, OSError), err:
self.trouble(u'ERROR: unable to rename file')
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
if last_modified_hdr is None:
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
if last_modified_hdr is None:
if filetime is None:
return
try:
if filetime is None:
return
try:
- os.utime(filename,(time.time(), filetime))
+ os.utime(filename, (time.time(), filetime))
# Request parameters in case of being able to resume
if self.params.get('continuedl', False) and resume_len != 0:
self.report_resuming_byte(resume_len)
# Request parameters in case of being able to resume
if self.params.get('continuedl', False) and resume_len != 0:
self.report_resuming_byte(resume_len)
- request.add_header('Range','bytes=%d-' % resume_len)
+ request.add_header('Range', 'bytes=%d-' % resume_len)
open_mode = 'ab'
count = 0
open_mode = 'ab'
count = 0
else:
# Examine the reported length
if (content_length is not None and
else:
# Examine the reported length
if (content_length is not None and
- (resume_len - 100 < long(content_length) < resume_len + 100)):
+ (resume_len - 100 < long(content_length) < resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
class InfoExtractor(object):
"""Information Extractor class.
class InfoExtractor(object):
"""Information Extractor class.
"""Real extraction process. Redefine in subclasses."""
pass
"""Real extraction process. Redefine in subclasses."""
pass
class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com."""
class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com."""
self.report_video_info_webpage_download(video_id)
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
self.report_video_info_webpage_download(video_id)
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
request = urllib2.Request(video_info_url)
try:
video_info_webpage = urllib2.urlopen(request).read()
request = urllib2.Request(video_info_url)
try:
video_info_webpage = urllib2.urlopen(request).read()
except UnavailableVideoError:
self._downloader.trouble(u'\nERROR: unable to download video')
except UnavailableVideoError:
self._downloader.trouble(u'\nERROR: unable to download video')
class GoogleIE(InfoExtractor):
"""Information extractor for video.google.com."""
class GoogleIE(InfoExtractor):
"""Information extractor for video.google.com."""
else: # we need something to pass to process_info
video_thumbnail = ''
else: # we need something to pass to process_info
video_thumbnail = ''
try:
# Process video information
self._downloader.process_info({
try:
# Process video information
self._downloader.process_info({
self._downloader.trouble(u'ERROR: unable to extract video description')
return
video_description = mobj.group(1).decode('utf-8')
self._downloader.trouble(u'ERROR: unable to extract video description')
return
video_description = mobj.group(1).decode('utf-8')
- if not video_description: video_description = 'No description available.'
+ if not video_description:
+ video_description = 'No description available.'
# Extract video height and width
mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
# Extract video height and width
mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
yv_bitrate = '700' # according to Wikipedia this is hard-coded
request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
yv_bitrate = '700' # according to Wikipedia this is hard-coded
request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
- '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
- '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
+ '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
+ '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
try:
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
try:
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
return
video_url = urllib.unquote(mobj.group(1))
return
video_url = urllib.unquote(mobj.group(1))
- video_id = os.path.basename(video_url)
+ video_id = os.path.basename(video_url)
# here's a fun little line of code for you:
video_extension = os.path.splitext(video_id)[1][1:]
# here's a fun little line of code for you:
video_extension = os.path.splitext(video_id)[1][1:]
- video_id = os.path.splitext(video_id)[0]
+ video_id = os.path.splitext(video_id)[0]
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# it's tempting to parse this further, but you would
# have to take into account all the variations like
prefix, query = query.split(':')
prefix = prefix[8:]
prefix, query = query.split(':')
prefix = prefix[8:]
- query = query.encode('utf-8')
+ query = query.encode('utf-8')
if prefix == '':
self._download_n_results(query, 1)
return
if prefix == '':
self._download_n_results(query, 1)
return
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return
elif n > self._max_youtube_results:
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return
elif n > self._max_youtube_results:
- self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
+ self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
n = self._max_youtube_results
self._download_n_results(query, n)
return
n = self._max_youtube_results
self._download_n_results(query, n)
return
class GoogleSearchIE(InfoExtractor):
"""Information Extractor for Google Video search queries."""
_VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
class GoogleSearchIE(InfoExtractor):
"""Information Extractor for Google Video search queries."""
_VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
prefix, query = query.split(':')
prefix = prefix[8:]
prefix, query = query.split(':')
prefix = prefix[8:]
- query = query.encode('utf-8')
+ query = query.encode('utf-8')
if prefix == '':
self._download_n_results(query, 1)
return
if prefix == '':
self._download_n_results(query, 1)
return
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return
elif n > self._max_google_results:
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return
elif n > self._max_google_results:
- self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
+ self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
n = self._max_google_results
self._download_n_results(query, n)
return
n = self._max_google_results
self._download_n_results(query, n)
return
class YahooSearchIE(InfoExtractor):
"""Information Extractor for Yahoo! Video search queries."""
_VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
class YahooSearchIE(InfoExtractor):
"""Information Extractor for Yahoo! Video search queries."""
_VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
prefix, query = query.split(':')
prefix = prefix[8:]
prefix, query = query.split(':')
prefix = prefix[8:]
- query = query.encode('utf-8')
+ query = query.encode('utf-8')
if prefix == '':
self._download_n_results(query, 1)
return
if prefix == '':
self._download_n_results(query, 1)
return
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return
elif n > self._max_yahoo_results:
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return
elif n > self._max_yahoo_results:
- self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
+ self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
n = self._max_yahoo_results
self._download_n_results(query, n)
return
n = self._max_yahoo_results
self._download_n_results(query, n)
return
class YoutubePlaylistIE(InfoExtractor):
"""Information Extractor for YouTube playlists."""
class YoutubePlaylistIE(InfoExtractor):
"""Information Extractor for YouTube playlists."""
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
return
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
return
class YoutubeUserIE(InfoExtractor):
"""Information Extractor for YouTube users."""
class YoutubeUserIE(InfoExtractor):
"""Information Extractor for YouTube users."""
def report_download_page(self, username, start_index):
"""Report attempt to download user page."""
self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
def report_download_page(self, username, start_index):
"""Report attempt to download user page."""
self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
- (username, start_index, start_index + self._GDATA_PAGE_SIZE))
+ (username, start_index, start_index + self._GDATA_PAGE_SIZE))
def _real_initialize(self):
self._youtube_ie.initialize()
def _real_initialize(self):
self._youtube_ie.initialize()
video_ids = video_ids[playliststart:]
else:
video_ids = video_ids[playliststart:playlistend]
video_ids = video_ids[playliststart:]
else:
video_ids = video_ids[playliststart:playlistend]
self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
- (username, all_ids_count, len(video_ids)))
+ (username, all_ids_count, len(video_ids)))
for video_id in video_ids:
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
for video_id in video_ids:
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
except UnavailableVideoError, err:
self._downloader.trouble(u'ERROR: unable to download file')
except UnavailableVideoError, err:
self._downloader.trouble(u'ERROR: unable to download file')
class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook"""
class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook"""
# description
video_description = 'No description available.'
if (self._downloader.params.get('forcedescription', False) and
# description
video_description = 'No description available.'
if (self._downloader.params.get('forcedescription', False) and
- 'description' in video_info):
+ 'description' in video_info):
video_description = video_info['description']
url_map = video_info['video_urls']
video_description = video_info['description']
url_map = video_info['video_urls']
except UnavailableVideoError, err:
self._downloader.trouble(u'\nERROR: unable to download video')
except UnavailableVideoError, err:
self._downloader.trouble(u'\nERROR: unable to download video')
class PostProcessor(object):
"""Post Processor class.
class PostProcessor(object):
"""Post Processor class.
"""
return information # by default, do nothing
"""
return information # by default, do nothing
class FFmpegExtractAudioPP(PostProcessor):
def __init__(self, downloader=None, preferredcodec=None):
class FFmpegExtractAudioPP(PostProcessor):
def __init__(self, downloader=None, preferredcodec=None):