- The gdata API only allows 50 blog posts (and possibly comments) to be created in a 24 hour period. The script doesn't yet handle this.
- There's a bug in gdata that causes all comments to be created with the blog owner as the author, not the value you set it to.
- It doesn't move links that point to within the blog.
- It doesn't move attached files or embedded images, or attempt to fix up any links that point to them.
The script at the bottom of this entry requires the version of python client library from subversion as of Nov. 25th 2007. It can not be used with the 1.0.9 release as this has another bug that causes all post create calls to fail. I've only tested this on MacOS X, so caveat emptor.
Usage:
convertor <blogger username> <blogger password> <blog name> <Wordpress XML backup file>BTW: you may be wondering why I bothered, since there's other tools out there - like blogsync - that do the same thing. The problem with blogsync is that it's written in Java (which makes it a pain to run) and, to be honest, I find it kind of crap and messy to use.
#!/usr/bin/python
import gdata.service
import atom
import sys
import codecs
import xml.dom.minidom
class BlogComment(object):
def __init__(self):
self.author = ""
self.email = ""
self.url = ""
self.ip = ""
self.date = ""
self.content = ""
class BlogItem(object):
def __init__(self):
self.title = ""
self.description = ""
self.author = ""
self.date = ""
self.categories = []
self.content = ""
self.comments = []
def grab(data):
try:
t = data.firstChild.data
except:
try:
t = data.data
except:
t = None
return t
def parseComment(comment):
co = BlogComment()
for i in comment.childNodes:
if i.nodeName == "wp:comment_author":
co.author = grab(i)
elif i.nodeName == "wp:comment_email":
co.email = grab(i)
elif i.nodeName == "wp:comment_author_url":
co.url = grab(i)
elif i.nodeName == "wp:comment_author_IP":
co.ip = grab(i)
elif i.nodeName == "wp:comment_date":
co.date = grab(i)
elif i.nodeName == "wp:comment_content":
co.content = grab(i)
return co
def extract(item):
it = BlogItem()
for i in item.childNodes:
if i.nodeName == "title":
it.title = grab(i)
if i.nodeName == "description":
it.description = grab(i)
elif i.nodeName == "dc:creator":
it.author = grab(i)
elif i.nodeName == "wp:post_date":
it.date = grab(i)
elif i.nodeName == "category":
it.categories.append(grab(i))
elif i.nodeName == "content:encoded":
for j in i.childNodes:
it.content += grab(j)
elif i.nodeName == "wp:comment":
it.comments.append(parseComment(i))
return it
def convert(node):
items = []
for i in node.childNodes:
if i.nodeName == "item":
items.append(extract(i))
return items
if len(sys.argv) != 5:
print "usage: %s user password blogname backupfile" % sys.argv[0]
sys.exit(1)
user = sys.argv[1]
password = sys.argv[2]
blogname = sys.argv[3]
fname = sys.argv[4]
gservice = gdata.service.GDataService(user, password)
gservice.source = "wordpress2blogger"
gservice.service = "blogger"
gservice.server = "www.blogger.com"
gservice.ProgrammaticLogin()
feeds = gservice.Get('/feeds/default/blogs')
found = None
for i in feeds.entry:
if i.title.text == blogname:
found = i
if not found:
print 'Could not find blog "%s"\n' % blogname
f = codecs.open(fname, "r", "utf-8")
u = f.read()
db = xml.dom.minidom.parseString(u.encode("utf-8"))
link = found.GetSelfLink().href.split('/')[-1]
db.normalize()
items = convert(db.documentElement.childNodes[1])
for i in items:
entry = gdata.GDataEntry()
entry.author.append(atom.Author(atom.Name(text=user)))
entry.title = atom.Title(title_type='xhtml', text=i.title)
entry.content = atom.Content(content_type='html', text=i.content)
d = i.date.split(' ')
entry.published = atom.Published(d[0]+"T"+d[1]+".000-08:00")
t = gservice.Post(entry, '/feeds/' + link + '/posts/default')
postid = t.GetSelfLink().href.split('/')[-1]
uri = '/feeds/' + link + '/' + postid + '/comments/default'
for j in i.comments:
co = gdata.GDataEntry()
if j.author:
author = atom.Name(j.author)
else:
author = None
if j.email:
email = atom.Email(j.email)
else:
email = None
if j.url:
url = atom.Uri(j.url)
else:
url = None
co.author.append(atom.Author(name=author, email=email, uri=url, text=j.author))
co.content = atom.Content(content_type='xhtml', text=j.content)
d = j.date.split(' ')
co.published = atom.Published(d[0]+"T"+d[1]+".000-08:00")
gservice.Post(co, uri)