Switch backup to use pickles over RPC instead of XML

The to_xml() method of a datastore entity updates the auto_now
date time fields.  This is a known bug:

  http://code.google.com/p/googleappengine/issues/detail?id=322

Since we cannot fix it we have to serialize everything in pickles
and then use the pickled format on the client side, which means we
also must import the GAE SDK.  I've hardcoded it to where it is on
my system, which is ../../google_appengine, aka a sibling of the
gerrit repo client.

Signed-off-by: Shawn O. Pearce <sop@google.com>
diff --git a/proto/backup.proto b/proto/backup.proto
index 44cfe12..6ffa192 100644
--- a/proto/backup.proto
+++ b/proto/backup.proto
@@ -20,20 +20,16 @@
   required string last_key = 2;
 }
 
-message EntityXml {
+message EntityData {
   optional int32 key_id = 1;
   optional string key_name = 2;
   required string key = 3;
   required int32 last_backed_up = 4;
-  required bytes xml = 5;
+  required bytes data = 5;
 }
 
 message NextChunkResponse {
-  repeated EntityXml entity = 1;
-}
-
-message EntityAck {
-  required string key = 1;
+  repeated EntityData entity = 1;
 }
 
 service BackupService {
diff --git a/webapp/backup_gae.py b/webapp/backup_gae.py
index b7f8f57..9bab1d7 100755
--- a/webapp/backup_gae.py
+++ b/webapp/backup_gae.py
@@ -14,22 +14,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import binascii
-import base64
-import sha
-import zlib
+import sys
+sys.path.insert(0, '../../google_appengine/')
+
+from datetime import datetime
+import cStringIO
+import pickle
 import getpass
 import logging
 import optparse
 import os
-import re
 import sys
-from xml.dom.minidom import parseString
 from pyPgSQL import PgSQL
-from pyPgSQL.libpq import PgQuoteBytea, OperationalError
+from pyPgSQL.libpq import OperationalError
 
+from google.appengine.ext import db
+from google.appengine.api import users
 from codereview.proto_client import HttpRpc, Proxy
 from codereview.backup_pb2 import *
+from codereview import models
 
 KINDS = [
   "ApprovalRight",
@@ -115,52 +118,9 @@
                  host_override=options.host,
                  cookie_file=cookie_file)
 
-def getText(nodelist):
-  rc = ""
-  for node in nodelist:
-    if node.nodeType == node.TEXT_NODE:
-      rc = rc + node.data
-  return rc
-
-key_re = re.compile(r'^tag:.*\[(.*)\]$')
-
-def parse_dom(dom):
-  class AnyObject(object):
-    def __getattr__(self, name):
-      return []
-
-  o = AnyObject()
-  for p in dom.getElementsByTagName('property'):
-    n = p.getAttribute('name')
-    v = getText(p.childNodes)
-    t = p.getAttribute('type')
-    if t == 'null':
-      continue
-
-    if t == 'key':
-      v = key_re.match(v).group(1)
-    elif t == 'int':
-      v = int(v)
-    elif t == 'bool':
-      if v == 'True':
-        v = True
-      elif v == 'False':
-        v = False
-    elif t == 'gd:email':
-      v = p.getElementsByTagName('gd:email')[0].getAttribute('address')
-      if v and '@' not in v:
-        v += '@gmail.com'
-    elif t == 'user':
-      if v and '@' not in v:
-        v += '@gmail.com'
-
-    a = getattr(o, n, [])
-    if v != '':
-      a.append(v)
-    setattr(o, n, a)
-  return o
-
 def one(v):
+  if not isinstance(v, list):
+    return v
   if len(v) == 1:
     return v[0]
   return None
@@ -171,6 +131,8 @@
   return 'N'
 
 def yn_null(v):
+  if not isinstance(v, list):
+    v = [v]
   if len(v) == 1 and v[0] is not None:
     return yn(v)
   return None
@@ -185,21 +147,32 @@
     c.execute('DELETE FROM ' + table_name + ' WHERE gae_key=%s',
               (entity.key))
 
-  def insert(self, table_name, dict, base64_keys=[]):
+  def insert(self, table_name, dict):
     p = []
+    v = []
     for u in dict.keys():
-      if u in base64_keys:
-        p.append("decode(%s,'base64')")
-      else:
-        p.append('%s')
+      p.append('%s')
+      a = dict[u]
+      if isinstance(a, users.User):
+        a = a.email()
+      if isinstance(a, db.Key):
+        a = str(a)
+      if isinstance(a, datetime):
+        a = a.isoformat(' ')
+      if isinstance(a, unicode):
+        a = a.encode('utf-8')
+      v.append(a)
 
     s = 'INSERT INTO ' + table_name + '(' + ','.join(dict.keys()) + ')'
     s += 'VALUES(' + ','.join(p) + ')'
     c = self.db.cursor()
     try:
-      c.execute(s, dict.values())
+      c.execute(s, v)
     except OperationalError:
-      print 'FAIL %s %s' % (table_name, dict)
+      print
+      print 'FAIL %s' % table_name
+      print 'SQL  %s' % s
+      print 'DATA %s' % dict
       raise
 
   def save_ApprovalRight(self, entity, obj):
@@ -254,7 +227,7 @@
     self.insert('branches', {
       'gae_key': entity.key,
       'last_backed_up': one(obj.last_backed_up),
-      'project_key': one(obj.project),
+      'project_key': models.Branch.project.get_value_for_datastore(obj),
       'name': one(obj.name),
     })
 
@@ -264,7 +237,7 @@
       'revision_id': one(obj.id),
       'gae_key': entity.key,
       'last_backed_up': one(obj.last_backed_up),
-      'project_key': one(obj.project),
+      'project_key': models.RevisionId.project.get_value_for_datastore(obj),
 
       'author_name': one(obj.author_name),
       'author_email': one(obj.author_email),
@@ -305,8 +278,8 @@
       'closed': yn(obj.closed),
       'n_comments': one(obj.n_comments),
       'n_patchsets': one(obj.n_patchsets),
-      'dest_project_key': one(obj.dest_project),
-      'dest_branch_key': one(obj.dest_branch),
+      'dest_project_key': models.Change.dest_project.get_value_for_datastore(obj),
+      'dest_branch_key': models.Change.dest_branch.get_value_for_datastore(obj),
       'merge_submitted': one(obj.merge_submitted),
       'merged': yn(obj.merged),
       'emailed_clean_merge': yn(obj.emailed_clean_merge),
@@ -326,12 +299,12 @@
       'gae_key': entity.key,
       'last_backed_up': one(obj.last_backed_up),
       'patchset_id': one(obj.id),
-      'change_key': one(obj.change),
+      'change_key': models.PatchSet.change.get_value_for_datastore(obj),
       'message': one(obj.message),
       'owner': one(obj.owner),
       'created': one(obj.created),
       'modified': one(obj.modified),
-      'revision_key': one(obj.revision),
+      'revision_key': models.PatchSet.revision.get_value_for_datastore(obj),
       'complete': yn(obj.complete),
     })
 
@@ -340,7 +313,7 @@
     self.insert('messages', {
       'gae_key': entity.key,
       'last_backed_up': one(obj.last_backed_up),
-      'change_key': one(obj.change),
+      'change_key': models.Message.change.get_value_for_datastore(obj),
       'subject': one(obj.subject),
       'sender': one(obj.sender),
       'date_sent': one(obj.date),
@@ -350,33 +323,19 @@
     for u in set(obj.recipients):
       self.insert('message_recipients', {'message_key':entity.key,'email':u})
 
-  def save_DeltaContent(self, entity, obj):
-    type, hash = entity.key_name.split(':')
-
-    self.delete('delta_content', entity)
-    self.insert('delta_content', {
-      'gae_key': entity.key,
-      'last_backed_up': one(obj.last_backed_up),
-      'type': type,
-      'hash': hash,
-      'data_z': one(obj.text_z),
-      'depth': one(obj.depth),
-      'base_key': one(obj.base),
-    }, set(['data_z']))
-
   def save_Patch(self, entity, obj):
     self.delete('patches', entity)
     self.insert('patches', {
       'gae_key': entity.key,
       'last_backed_up': one(obj.last_backed_up),
-      'patchset_key': one(obj.patchset),
+      'patchset_key': models.Patch.patchset.get_value_for_datastore(obj),
       'filename': one(obj.filename),
       'status': one(obj.status),
       'multi_way_diff': yn(obj.multi_way_diff),
       'n_comments': one(obj.n_comments),
-      'old_data_key': one(obj.old_data),
-      'new_data_key': one(obj.new_data),
-      'diff_data_key': one(obj.diff_data),
+      'old_data_key': models.Patch.old_data.get_value_for_datastore(obj),
+      'new_data_key': models.Patch.new_data.get_value_for_datastore(obj),
+      'diff_data_key': models.Patch.diff_data.get_value_for_datastore(obj),
     })
 
   def save_Comment(self, entity, obj):
@@ -384,7 +343,7 @@
     self.insert('comments', {
       'gae_key': entity.key,
       'last_backed_up': one(obj.last_backed_up),
-      'patch_key': one(obj.patch),
+      'patch_key': models.Comment.patch.get_value_for_datastore(obj),
       'message_id': one(obj.message_id),
       'author': one(obj.author),
       'written': one(obj.date),
@@ -395,11 +354,13 @@
     })
 
   def save_ReviewStatus(self, entity, obj):
+    if obj.lgtm == '':
+      obj.lgtm = 'abstain'
     self.delete('review_status', entity)
     self.insert('review_status', {
       'gae_key': entity.key,
       'last_backed_up': one(obj.last_backed_up),
-      'change_key': one(obj.change),
+      'change_key': models.ReviewStatus.change.get_value_for_datastore(obj),
       'email': one(obj.user),
       'lgtm': one(obj.lgtm),
       'verified': yn_null(obj.verified),
@@ -494,12 +455,7 @@
         cnt += 1
         sys.stdout.write('\r%-18s ... %5d ' % (kind_name, cnt))
 
-        o = parse_dom(parseString(
-          '<?xml version="1.0" encoding="utf-8"?>'
-          '<root xmlns:gd="http://www.google.com/">'
-          '%s'
-          '</root>'
-          % entity.xml))
+        o = pickle.load(cStringIO.StringIO(entity.data))
         getattr(store, 'save_%s' % kind_name)(entity, o)
         last_key = entity.key
       db.commit()
diff --git a/webapp/codereview/backup_service.py b/webapp/codereview/backup_service.py
index f344f5a..c2fa143 100644
--- a/webapp/codereview/backup_service.py
+++ b/webapp/codereview/backup_service.py
@@ -14,6 +14,8 @@
 
 import base64
 import logging
+import cStringIO
+import pickle
 
 from google.appengine.ext import db
 
@@ -74,8 +76,7 @@
       e.key = str(o.key())
       e.last_backed_up = o.last_backed_up
 
-      if isinstance(o, models.DeltaContent):
-        o.text_z = base64.b64encode(o.text_z)
-
-      e.xml = o.to_xml().encode('utf_8')
+      buf = cStringIO.StringIO()
+      pickle.dump(o, buf, -1)
+      e.data = buf.getvalue()
     done(rsp)
diff --git a/webapp/codereview/models.py b/webapp/codereview/models.py
index 01a5bc7..af26245 100644
--- a/webapp/codereview/models.py
+++ b/webapp/codereview/models.py
@@ -17,7 +17,10 @@
 # Python imports
 import base64
 import datetime
-import hashlib
+try:
+  import hashlib
+except ImportError:
+  pass
 import logging
 import random
 import re