Reworked 'upload_date' code for output sequence in YoutubeIE.
authorNevar Angelo <psi.neamf@gmail.com>
Tue, 30 Nov 2010 16:51:00 +0000 (18:51 +0200)
committerNevar Angelo <psi.neamf@gmail.com>
Tue, 30 Nov 2010 16:51:00 +0000 (18:51 +0200)
Reverted to previous version of 'upload_date' and fixed
a mistake that prevented the code from working properly.

youtube-dl

index 24722d2..4cea2e8 100755 (executable)
@@ -5,6 +5,7 @@
 # Author: Benjamin Johnson
 # License: Public domain code
 import cookielib
+import datetime
 import htmlentitydefs
 import httplib
 import locale
@@ -36,21 +37,6 @@ std_headers = {
 
 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
 
-month_name_to_number = {
-       'January':      '01',
-       'February':     '02',
-       'March':        '03',
-       'April':        '04',
-       'May':          '05',
-       'June':         '06',
-       'July':         '07',
-       'August':       '08',
-       'September':    '09',
-       'October':      '10',
-       'November':     '11',
-       'December':     '12',
-}
-
 def preferredencoding():
        """Get preferred encoding.
 
@@ -913,18 +899,13 @@ class YoutubeIE(InfoExtractor):
                upload_date = u'NA'
                mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
                if mobj is not None:
-                       try:
-                               if ',' in mobj.group(1):
-                                       # Month Day, Year
-                                       m, d, y = mobj.group(1).replace(',', '').split()
-                               else:
-                                       # Day Month Year, we'll suppose
-                                       d, m, y = mobj.group(1).split()
-                               m = month_name_to_number[m]
-                               d = '%02d' % (long(d))
-                               upload_date = '%s%s%s' % (y, m, d)
-                       except:
-                               upload_date = u'NA'
+                       upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
+                       format_expressions = ['%d %B %Y', '%B %d %Y']
+                       for expression in format_expressions:
+                               try:
+                                       upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
+                               except:
+                                       pass
 
                # description
                video_description = 'No description available.'