#!/usr/bin/env python import os from os.path import join from time import strptime, strftime from string import replace,find,strip template="""AUTHOR: patrick TITLE: %s STATUS: Publish ALLOW COMMENTS: 1 CONVERT BREAKS: __default__ ALLOW PINGS: 0 PRIMARY CATEGORY: CATEGORY: DATE: %s ----- BODY: %s ----- EXTENDED BODY: %s ----- EXCERPT: ----- KEYWORDS: ----- --------""" def printTemplate(title,date,body,ext_body): #convert date DATE: 01/22/2004 10:07:19 PM date = replace(date,'-08:00','') date_format = '%Y-%m-%dT%H:%M:%S' date = strftime('%m/%d/%Y %I:%M:%S %p', strptime(date,date_format)) print template % (title,date,body,ext_body) def parseFile(file): #print "reading file %s" % file f=open(file,'r') title = '' title_key = '' date = '' date_key = '' body_start = '' read_body = False body_more = '
' body_end = '
' body = '' body_ext = '' read_body_ext = False for line in f: #print line, if find(line,title_key) >= 0 and title == '': title = replace(strip(line),title_key,'') title = replace(title,'
','') if find(line,date_key) >= 0 and date == '': date = replace(strip(line),date_key,'') date = replace(date,'','') if find(line,body_start) > 0 and read_body == False: read_body = True continue # skip first line if find(line,body_more) > 0 and read_body == True: read_body_ext = True continue # skip the more tag if find(line,'div') > 0 and read_body == True: #print 'end body false becase line is',line read_body = False if read_body and not read_body_ext: body = body + line if read_body and read_body_ext: body_ext = body_ext + line f.close() printTemplate(title,date,strip(body),body_ext) def walktree(location): for root,dirs,files in os.walk(location): for name in files: if name != 'index.html': parseFile(join(root,name)) #title='Wordpress Rocks' #date='2005-09-22T22:15:18-08:00' #printTemplate(title,date,"another blog entry",'') #parseFile('2005/01/world_vision_up.html') #parseFile('2004/01/can_you_say_ica.html') walktree('years/2005')