From c0d7fa1aebbac3a4de14d0a585f6ab468e934ccd Mon Sep 17 00:00:00 2001
From: Alper Kokmen <alperkokmen@gmail.com>
Date: Thu, 11 Oct 2018 12:17:08 -0700
Subject: [PATCH] handle unknown index types gracefully in iUtils (#18)

* fix regression in parseTaskId regex

when 1b74f29 added support to handle archive tasks, it introduced a new
matching group for the regular expression in parseTaskId which shifted
the indexing for resulting matches (i.e. m).

to simplify things, i am making marking the outer group as
don't-capture-just-group (?:) to keep the matching result consistent. as
a result, type will be populated using m[3], m[2], or m[1].

m[4] will always be the datsource; m[5] will be the timestamp.

this change adds unit tests to demonstrate the expected parseTaskId
output for various tasks type supported. it also covers the error case
when taskId doesn't match the regular expression.

* handle unknown index types gracefully in iUtils

this updates parseTaskId in iUtils.coffee which uses a somwhat strict
regular expression to parse a given task id and extract type,
datasource, and time.

this simply removes throwing the error and falls back to 'other' for
type leaving dataSource and dataTime alone. in case of a match failure,
they would just default to undefined which isn't a blocker to render
/indexing-service view.
---
 src/client/factories/iUtils.coffee     |  7 +--
 test/unit/factories/iUtils.spec.coffee | 66 ++++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 5 deletions(-)
 create mode 100644 test/unit/factories/iUtils.spec.coffee

diff --git a/src/client/factories/iUtils.coffee b/src/client/factories/iUtils.coffee
index 34a5bce..a3dc1b8 100644
--- a/src/client/factories/iUtils.coffee
+++ b/src/client/factories/iUtils.coffee
@@ -2,13 +2,10 @@ moment = require '../../../bower_components/moment/min/moment.min.js'
 
 module.exports = ->
   parseTaskId: (taskId) ->
-    m = taskId.match /^((hadoop_convert_segment)|index_(hadoop|realtime|spark)|(archive))_(.+)_(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}Z)/
-    type = m[3]
-    type ||= m[2]
-    throw Error("Can't parse #{taskId}") unless m
+    m = taskId.match(/^(?:(hadoop_convert_segment)|index_(hadoop|realtime|spark)|(archive))_(.+)_(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}Z)/) || []
     {
       id: taskId
-      type
+      type: m[3] || m[2] || m[1] || 'other'
       dataSource: m[4]
       dataTime: m[5]
     }
diff --git a/test/unit/factories/iUtils.spec.coffee b/test/unit/factories/iUtils.spec.coffee
new file mode 100644
index 0000000..6e96541
--- /dev/null
+++ b/test/unit/factories/iUtils.spec.coffee
@@ -0,0 +1,66 @@
+iUtilsFactory = require '../../../src/client/factories/iUtils.coffee'
+
+describe '$iUtils', () ->
+  $iUtils = undefined
+  $taskTimestamp = '2018-01-01T10:20:30.400Z'
+
+  beforeEach ->
+    $iUtils = iUtilsFactory()
+
+  it 'should have a defined $iUtils', () ->
+    expect($iUtils?).toBeTruthy()
+
+  describe 'parseTaskId', () ->
+    it 'should parse hadoop_convert task', () ->
+      task = $iUtils.parseTaskId('hadoop_convert_segment_wikipedia_' + $taskTimestamp)
+      expect(task).toEqual({
+        id: 'hadoop_convert_segment_wikipedia_' + $taskTimestamp
+        type: 'hadoop_convert_segment'
+        dataSource: 'wikipedia'
+        dataTime: $taskTimestamp
+      })
+
+    it 'should parse index_hadoop task', () ->
+      task = $iUtils.parseTaskId('index_hadoop_twitter_' + $taskTimestamp)
+      expect(task).toEqual({
+        id: 'index_hadoop_twitter_' + $taskTimestamp
+        type: 'hadoop'
+        dataSource: 'twitter'
+        dataTime: $taskTimestamp
+      })
+
+    it 'should parse index_realtime task', () ->
+      task = $iUtils.parseTaskId('index_realtime_twitter_' + $taskTimestamp)
+      expect(task).toEqual({
+        id: 'index_realtime_twitter_' + $taskTimestamp
+        type: 'realtime'
+        dataSource: 'twitter'
+        dataTime: $taskTimestamp
+      })
+
+    it 'should parse index_spark task', () ->
+      task = $iUtils.parseTaskId('index_spark_twitter_' + $taskTimestamp)
+      expect(task).toEqual({
+        id: 'index_spark_twitter_' + $taskTimestamp
+        type: 'spark'
+        dataSource: 'twitter'
+        dataTime: $taskTimestamp
+      })
+
+    it 'should parse archive task', () ->
+      task = $iUtils.parseTaskId('archive_wikipedia_' + $taskTimestamp)
+      expect(task).toEqual({
+        id: 'archive_wikipedia_' + $taskTimestamp
+        type: 'archive'
+        dataSource: 'wikipedia'
+        dataTime: $taskTimestamp
+      })
+
+    it 'should not throw an error if taskId does not match expected pattern', () ->
+      task = $iUtils.parseTaskId('index_kafka_twitter_1675e770de9a423_hfdhgjko')
+      expect(task).toEqual({
+        id: 'index_kafka_twitter_1675e770de9a423_hfdhgjko'
+        type: 'other'
+        dataSource: undefined
+        dataTime: undefined
+      })