Enable git trace2 event format logging.

Ways to enable logging:
1) Set git's trace2.eventtarget config variable to desired logging path
2) Specify path via --git_trace2_event_log option

A unique logfile name is generated per repo execution (based on the repo session-id).

Testing:
1) Verified git config 'trace2.eventtarget' and flag enable logging.
2) Verified version/start/end events are expected format:
  https://git-scm.com/docs/api-trace2#_event_format
3) Unit tests

Bug: https://crbug.com/gerrit/13706
Change-Id: I335eba68124055321c4149979bec36ac16ef81eb
Tested-by: Ian Kasprzak <iankaz@google.com>
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/292262
Reviewed-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Jonathan Nieder <jrn@google.com>
diff --git a/git_trace2_event_log.py b/git_trace2_event_log.py
new file mode 100644
index 0000000..4a8e034
--- /dev/null
+++ b/git_trace2_event_log.py
@@ -0,0 +1,197 @@
+# Copyright (C) 2020 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Provide event logging in the git trace2 EVENT format.
+
+The git trace2 EVENT format is defined at:
+https://www.kernel.org/pub/software/scm/git/docs/technical/api-trace2.html#_event_format
+https://git-scm.com/docs/api-trace2#_the_event_format_target
+
+  Usage:
+
+  git_trace_log = EventLog()
+  git_trace_log.StartEvent()
+  ...
+  git_trace_log.ExitEvent()
+  git_trace_log.Write()
+"""
+
+
+import datetime
+import json
+import os
+import sys
+import tempfile
+import threading
+
+from git_command import GitCommand, RepoSourceVersion
+
+
+class EventLog(object):
+  """Event log that records events that occurred during a repo invocation.
+
+  Events are written to the log as a consecutive JSON entries, one per line.
+  Entries follow the git trace2 EVENT format.
+
+  Each entry contains the following common keys:
+  - event: The event name
+  - sid: session-id - Unique string to allow process instance to be identified.
+  - thread: The thread name.
+  - time: is the UTC time of the event.
+
+  Valid 'event' names and event specific fields are documented here:
+  https://git-scm.com/docs/api-trace2#_event_format
+  """
+
+  def __init__(self, env=None):
+    """Initializes the event log."""
+    self._log = []
+    # Try to get session-id (sid) from environment (setup in repo launcher).
+    KEY = 'GIT_TRACE2_PARENT_SID'
+    if env is None:
+      env = os.environ
+
+    now = datetime.datetime.utcnow()
+
+    # Save both our sid component and the complete sid.
+    # We use our sid component (self._sid) as the unique filename prefix and
+    # the full sid (self._full_sid) in the log itself.
+    self._sid = 'repo-%s-P%08x' % (now.strftime('%Y%m%dT%H%M%SZ'), os.getpid())
+    parent_sid = env.get(KEY)
+    # Append our sid component to the parent sid (if it exists).
+    if parent_sid is not None:
+      self._full_sid = parent_sid + '/' + self._sid
+    else:
+      self._full_sid = self._sid
+
+    # Set/update the environment variable.
+    # Environment handling across systems is messy.
+    try:
+      env[KEY] = self._full_sid
+    except UnicodeEncodeError:
+      env[KEY] = self._full_sid.encode()
+
+    # Add a version event to front of the log.
+    self._AddVersionEvent()
+
+  @property
+  def full_sid(self):
+    return self._full_sid
+
+  def _AddVersionEvent(self):
+    """Adds a 'version' event at the beginning of current log."""
+    version_event = self._CreateEventDict('version')
+    version_event['evt'] = 2
+    version_event['exe'] = RepoSourceVersion()
+    self._log.insert(0, version_event)
+
+  def _CreateEventDict(self, event_name):
+    """Returns a dictionary with the common keys/values for git trace2 events.
+
+    Args:
+      event_name: The event name.
+
+    Returns:
+      Dictionary with the common event fields populated.
+    """
+    return {
+        'event': event_name,
+        'sid': self._full_sid,
+        'thread': threading.currentThread().getName(),
+        'time': datetime.datetime.utcnow().isoformat() + 'Z',
+    }
+
+  def StartEvent(self):
+    """Append a 'start' event to the current log."""
+    start_event = self._CreateEventDict('start')
+    start_event['argv'] = sys.argv
+    self._log.append(start_event)
+
+  def ExitEvent(self, result):
+    """Append an 'exit' event to the current log.
+
+    Args:
+      result: Exit code of the event
+    """
+    exit_event = self._CreateEventDict('exit')
+
+    # Consider 'None' success (consistent with event_log result handling).
+    if result is None:
+      result = 0
+    exit_event['code'] = result
+    self._log.append(exit_event)
+
+  def Write(self, path=None):
+    """Writes the log out to a file.
+
+    Log is only written if 'path' or 'git config --get trace2.eventtarget'
+    provide a valid path to write logs to.
+
+    Logging filename format follows the git trace2 style of being a unique
+    (exclusive writable) file.
+
+    Args:
+      path: Path to where logs should be written.
+
+    Returns:
+      log_path: Path to the log file if log is written, otherwise None
+    """
+    log_path = None
+    # If no logging path is specified, get the path from 'trace2.eventtarget'.
+    if path is None:
+      cmd = ['config', '--get', 'trace2.eventtarget']
+      # TODO(https://crbug.com/gerrit/13706): Use GitConfig when it supports
+      # system git config variables.
+      p = GitCommand(None, cmd, capture_stdout=True, capture_stderr=True,
+                     bare=True)
+      retval = p.Wait()
+      if retval == 0:
+        # Strip trailing carriage-return in path.
+        path = p.stdout.rstrip('\n')
+      elif retval != 1:
+        # `git config --get` is documented to produce an exit status of `1` if
+        # the requested variable is not present in the configuration. Report any
+        # other return value as an error.
+        print("repo: error: 'git config --get' call failed with return code: %r, stderr: %r" % (
+            retval, p.stderr), file=sys.stderr)
+
+    if isinstance(path, str):
+      # Get absolute path.
+      path = os.path.abspath(os.path.expanduser(path))
+    else:
+      raise TypeError('path: str required but got %s.' % type(path))
+
+    # Git trace2 requires a directory to write log to.
+
+    # TODO(https://crbug.com/gerrit/13706): Support file (append) mode also.
+    if not os.path.isdir(path):
+      return None
+    # Use NamedTemporaryFile to generate a unique filename as required by git trace2.
+    try:
+      with tempfile.NamedTemporaryFile(mode='x', prefix=self._sid, dir=path,
+                                       delete=False) as f:
+        # TODO(https://crbug.com/gerrit/13706): Support writing events as they
+        # occur.
+        for e in self._log:
+          # Dump in compact encoding mode.
+          # See 'Compact encoding' in Python docs:
+          # https://docs.python.org/3/library/json.html#module-json
+          json.dump(e, f, indent=None, separators=(',', ':'))
+          f.write('\n')
+        log_path = f.name
+    except FileExistsError as err:
+      print('repo: warning: git trace2 logging failed: %r' % err,
+            file=sys.stderr)
+      return None
+    return log_path
diff --git a/main.py b/main.py
index e152de4..a7c3c68 100755
--- a/main.py
+++ b/main.py
@@ -50,6 +50,7 @@
 from repo_trace import SetTrace
 from git_command import user_agent
 from git_config import init_ssh, close_ssh, RepoConfig
+from git_trace2_event_log import EventLog
 from command import InteractiveCommand
 from command import MirrorSafeCommand
 from command import GitcAvailableCommand, GitcClientCommand
@@ -130,6 +131,8 @@
 global_options.add_option('--event-log',
                           dest='event_log', action='store',
                           help='filename of event log to append timeline to')
+global_options.add_option('--git-trace2-event-log', action='store',
+                          help='directory to write git trace2 event log to')
 
 
 class _Repo(object):
@@ -211,6 +214,7 @@
             file=sys.stderr)
       return 1
 
+    git_trace2_event_log = EventLog()
     cmd.repodir = self.repodir
     cmd.client = RepoClient(cmd.repodir)
     cmd.manifest = cmd.client.manifest
@@ -261,6 +265,8 @@
     start = time.time()
     cmd_event = cmd.event_log.Add(name, event_log.TASK_COMMAND, start)
     cmd.event_log.SetParent(cmd_event)
+    git_trace2_event_log.StartEvent()
+
     try:
       cmd.ValidateOptions(copts, cargs)
       result = cmd.Execute(copts, cargs)
@@ -303,10 +309,13 @@
 
       cmd.event_log.FinishEvent(cmd_event, finish,
                                 result is None or result == 0)
+      git_trace2_event_log.ExitEvent(result)
+
       if gopts.event_log:
         cmd.event_log.Write(os.path.abspath(
                             os.path.expanduser(gopts.event_log)))
 
+      git_trace2_event_log.Write(gopts.git_trace2_event_log)
     return result
 
 
diff --git a/tests/test_git_trace2_event_log.py b/tests/test_git_trace2_event_log.py
new file mode 100644
index 0000000..3905630
--- /dev/null
+++ b/tests/test_git_trace2_event_log.py
@@ -0,0 +1,169 @@
+# Copyright (C) 2020 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unittests for the git_trace2_event_log.py module."""
+
+import json
+import tempfile
+import unittest
+
+import git_trace2_event_log
+
+
+class EventLogTestCase(unittest.TestCase):
+  """TestCase for the EventLog module."""
+
+  PARENT_SID_KEY = 'GIT_TRACE2_PARENT_SID'
+  PARENT_SID_VALUE = 'parent_sid'
+  SELF_SID_REGEX = r'repo-\d+T\d+Z-.*'
+  FULL_SID_REGEX = r'^%s/%s' % (PARENT_SID_VALUE, SELF_SID_REGEX)
+
+  def setUp(self):
+    """Load the event_log module every time."""
+    self._event_log_module = None
+    # By default we initialize with the expected case where
+    # repo launches us (so GIT_TRACE2_PARENT_SID is set).
+    env = {
+        self.PARENT_SID_KEY: self.PARENT_SID_VALUE,
+    }
+    self._event_log_module = git_trace2_event_log.EventLog(env=env)
+    self._log_data = None
+
+  def verifyCommonKeys(self, log_entry, expected_event_name, full_sid=True):
+    """Helper function to verify common event log keys."""
+    self.assertIn('event', log_entry)
+    self.assertIn('sid', log_entry)
+    self.assertIn('thread', log_entry)
+    self.assertIn('time', log_entry)
+
+    # Do basic data format validation.
+    self.assertEqual(expected_event_name, log_entry['event'])
+    if full_sid:
+      self.assertRegex(log_entry['sid'], self.FULL_SID_REGEX)
+    else:
+      self.assertRegex(log_entry['sid'], self.SELF_SID_REGEX)
+    self.assertRegex(log_entry['time'], r'^\d+-\d+-\d+T\d+:\d+:\d+\.\d+Z$')
+
+  def readLog(self, log_path):
+    """Helper function to read log data into a list."""
+    log_data = []
+    with open(log_path, mode='rb') as f:
+      for line in f:
+        log_data.append(json.loads(line))
+    return log_data
+
+  def test_initial_state_with_parent_sid(self):
+    """Test initial state when 'GIT_TRACE2_PARENT_SID' is set by parent."""
+    self.assertRegex(self._event_log_module.full_sid, self.FULL_SID_REGEX)
+
+  def test_initial_state_no_parent_sid(self):
+    """Test initial state when 'GIT_TRACE2_PARENT_SID' is not set."""
+    # Setup an empty environment dict (no parent sid).
+    self._event_log_module = git_trace2_event_log.EventLog(env={})
+    self.assertRegex(self._event_log_module.full_sid, self.SELF_SID_REGEX)
+
+  def test_version_event(self):
+    """Test 'version' event data is valid.
+
+    Verify that the 'version' event is written even when no other
+    events are addded.
+
+    Expected event log:
+    <version event>
+    """
+    with tempfile.TemporaryDirectory(prefix='event_log_tests') as tempdir:
+      log_path = self._event_log_module.Write(path=tempdir)
+      self._log_data = self.readLog(log_path)
+
+    # A log with no added events should only have the version entry.
+    self.assertEqual(len(self._log_data), 1)
+    version_event = self._log_data[0]
+    self.verifyCommonKeys(version_event, expected_event_name='version')
+    # Check for 'version' event specific fields.
+    self.assertIn('evt', version_event)
+    self.assertIn('exe', version_event)
+
+  def test_start_event(self):
+    """Test and validate 'start' event data is valid.
+
+    Expected event log:
+    <version event>
+    <start event>
+    """
+    self._event_log_module.StartEvent()
+    with tempfile.TemporaryDirectory(prefix='event_log_tests') as tempdir:
+      log_path = self._event_log_module.Write(path=tempdir)
+      self._log_data = self.readLog(log_path)
+
+    self.assertEqual(len(self._log_data), 2)
+    start_event = self._log_data[1]
+    self.verifyCommonKeys(self._log_data[0], expected_event_name='version')
+    self.verifyCommonKeys(start_event, expected_event_name='start')
+    # Check for 'start' event specific fields.
+    self.assertIn('argv', start_event)
+    self.assertTrue(isinstance(start_event['argv'], list))
+
+  def test_exit_event_result_none(self):
+    """Test 'exit' event data is valid when result is None.
+
+    We expect None result to be converted to 0 in the exit event data.
+
+    Expected event log:
+    <version event>
+    <exit event>
+    """
+    self._event_log_module.ExitEvent(None)
+    with tempfile.TemporaryDirectory(prefix='event_log_tests') as tempdir:
+      log_path = self._event_log_module.Write(path=tempdir)
+      self._log_data = self.readLog(log_path)
+
+    self.assertEqual(len(self._log_data), 2)
+    exit_event = self._log_data[1]
+    self.verifyCommonKeys(self._log_data[0], expected_event_name='version')
+    self.verifyCommonKeys(exit_event, expected_event_name='exit')
+    # Check for 'exit' event specific fields.
+    self.assertIn('code', exit_event)
+    # 'None' result should convert to 0 (successful) return code.
+    self.assertEqual(exit_event['code'], 0)
+
+  def test_exit_event_result_integer(self):
+    """Test 'exit' event data is valid when result is an integer.
+
+    Expected event log:
+    <version event>
+    <exit event>
+    """
+    self._event_log_module.ExitEvent(2)
+    with tempfile.TemporaryDirectory(prefix='event_log_tests') as tempdir:
+      log_path = self._event_log_module.Write(path=tempdir)
+      self._log_data = self.readLog(log_path)
+
+    self.assertEqual(len(self._log_data), 2)
+    exit_event = self._log_data[1]
+    self.verifyCommonKeys(self._log_data[0], expected_event_name='version')
+    self.verifyCommonKeys(exit_event, expected_event_name='exit')
+    # Check for 'exit' event specific fields.
+    self.assertIn('code', exit_event)
+    self.assertEqual(exit_event['code'], 2)
+
+  # TODO(https://crbug.com/gerrit/13706): Add additional test coverage for
+  # Write() where:
+  # - path=None (using git config call)
+  # - path=<Non-String type> (raises TypeError)
+  # - path=<Non-Directory> (should return None)
+  # - tempfile.NamedTemporaryFile errors with FileExistsError (should return  None)
+
+
+if __name__ == '__main__':
+  unittest.main()