diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile
index 87f6dbb3453..1eaf961ce36 100644
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Dockerfile
@@ -20,5 +20,8 @@ RUN apt-get -q update && \
# Copy the Fluentd configuration file for logging Docker container logs.
COPY google-fluentd.conf /etc/google-fluentd/google-fluentd.conf
+# Install the record reformer plugin.
+RUN /usr/sbin/google-fluentd-gem install fluent-plugin-record-reformer
+
# Start Fluentd to pick up our config that watches Docker container logs.
CMD /usr/sbin/google-fluentd "$FLUENTD_ARGS" > /var/log/google-fluentd.log
diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile
index ba668bfb49d..adf5c5585c5 100644
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/Makefile
@@ -6,7 +6,7 @@
.PHONY: build push
-TAG = 1.4
+TAG = 1.5
build:
docker build -t gcr.io/google_containers/fluentd-gcp:$(TAG) .
diff --git a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf
index 34d0996ba01..71ee2b737b8 100644
--- a/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf
+++ b/cluster/addons/fluentd-gcp/fluentd-gcp-image/google-fluentd.conf
@@ -1,16 +1,50 @@
-# This Fluentd configuration file specifies the colleciton
-# of all Docker container log files under /var/lib/docker/containers/...
-# followed by ingestion using the Google Cloud Logging API.
-# This configuration assumes the correct installation of the the
-# Google fluentd plug-in. Currently the collector uses a text format
-# rather than JSON (which is the format used to store the Docker
-# log files). When the fluentd plug-in can accept JSON this
-# configuraiton file should be changed by specifying:
-# format json
-# in the source section.
-# This configuration file assumes that the VM host running
-# this configuraiton has been created with a logging.write scope.
+# This configuration file for Fluentd / td-agent is used
+# to watch changes to Docker log files that live in the
+# directory /var/lib/docker/containers/ and are symbolically
+# linked to from the /varlog directory using names that capture the
+# pod name and container name. These logs are then submitted to
+# Google Cloud Logging which assumes the installation of the cloud-logging plug-in.
# Maintainer: Satnam Singh (satnam@google.com)
+#
+# Example
+# =======
+# A line in the Docker log file might like like this JSON:
+#
+# {"log":"2014/09/25 21:15:03 Got request with path wombat\n",
+# "stream":"stderr",
+# "time":"2014-09-25T21:15:03.499185026Z"}
+#
+# Currently, the log information is ingested as plain text rather than JSON.
+# TODO: When Cloud Logging supports it, ingest as JSON.
+#
+# The record reformer is used to write the tag to focus on the pod name
+# and the Kubernetes container name. For example a Docker container's logs
+# might be in the directory:
+# /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b
+# and in the file:
+# 997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
+# where 997599971ee6... is the Docker ID of the running container.
+# The Kubernetes kubelet makes a symbolic link to this file on the host machine
+# in the /var/log/containers directory which includes the pod name and the Kubernetes
+# container name:
+# synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
+# ->
+# /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
+# The /var/log directory on the host is mapped to the /varlog directory in the container
+# running this instance of Fluentd and we end up collecting the file:
+# /varlog/containers/synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
+# This results in the tag:
+# varlog.containers.synthetic-logger-0.25lps-pod_default-synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
+# The record reformer is used is discard the varlog.containers prefix and
+# the Docker container ID suffix and "kubernetes." is pre-pended giving the
+# final tag which is ingested into Elasticsearch:
+# kubernetes.synthetic-logger-0.25lps-pod_default-synth-lgr
+# This makes it easier for users to search for logs by pod name or by
+# the name of the Kubernetes container regardless of how many times the
+# Kubernetes pod has been restarted (resulting in a several Docker container IDs).
+# TODO: Propagate the labels associated with a container along with its logs
+# so users can query logs using labels as well as or instead of the pod name
+# and container name.
type tail
@@ -19,10 +53,16 @@
path /varlog/containers/*.log
pos_file /varlog/gcp-containers.log.pos
time_format %Y-%m-%dT%H:%M:%S
- tag kubernetes.*
+ tag reform.*
read_from_head true
+
+ type record_reformer
+ enable_ruby true
+ tag kubernetes.${tag_suffix[3].split('-')[0..-2].join('-')}
+
+
type google_cloud
flush_interval 5s
diff --git a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.json b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.json
index 3b0c6f078bf..747fb37dbc0 100644
--- a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.json
+++ b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.json
@@ -5,7 +5,7 @@
"spec": {
"containers": [
{ "name": "fluentd-cloud-logging",
- "image": "gcr.io/google_containers/fluentd-gcp:1.4",
+ "image": "gcr.io/google_containers/fluentd-gcp:1.5",
"env": [
{ "name": "FLUENTD_ARGS",
"value": "-qq"}