Skip to content

Commit

Permalink
handle unknown index types gracefully in iUtils (#18)
Browse files Browse the repository at this point in the history
* fix regression in parseTaskId regex

when 1b74f29 added support to handle archive tasks, it introduced a new
matching group for the regular expression in parseTaskId which shifted
the indexing for resulting matches (i.e. m).

to simplify things, i am making marking the outer group as
don't-capture-just-group (?:) to keep the matching result consistent. as
a result, type will be populated using m[3], m[2], or m[1].

m[4] will always be the datsource; m[5] will be the timestamp.

this change adds unit tests to demonstrate the expected parseTaskId
output for various tasks type supported. it also covers the error case
when taskId doesn't match the regular expression.

* handle unknown index types gracefully in iUtils

this updates parseTaskId in iUtils.coffee which uses a somwhat strict
regular expression to parse a given task id and extract type,
datasource, and time.

this simply removes throwing the error and falls back to 'other' for
type leaving dataSource and dataTime alone. in case of a match failure,
they would just default to undefined which isn't a blocker to render
/indexing-service view.
  • Loading branch information
alperkokmen authored and dclim committed Oct 11, 2018
1 parent 6a445fc commit c0d7fa1
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 5 deletions.
7 changes: 2 additions & 5 deletions src/client/factories/iUtils.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,10 @@ moment = require '../../../bower_components/moment/min/moment.min.js'

module.exports = ->
parseTaskId: (taskId) ->
m = taskId.match /^((hadoop_convert_segment)|index_(hadoop|realtime|spark)|(archive))_(.+)_(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}Z)/
type = m[3]
type ||= m[2]
throw Error("Can't parse #{taskId}") unless m
m = taskId.match(/^(?:(hadoop_convert_segment)|index_(hadoop|realtime|spark)|(archive))_(.+)_(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}Z)/) || []
{
id: taskId
type
type: m[3] || m[2] || m[1] || 'other'
dataSource: m[4]
dataTime: m[5]
}
Expand Down
66 changes: 66 additions & 0 deletions test/unit/factories/iUtils.spec.coffee
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
iUtilsFactory = require '../../../src/client/factories/iUtils.coffee'

describe '$iUtils', () ->
$iUtils = undefined
$taskTimestamp = '2018-01-01T10:20:30.400Z'

beforeEach ->
$iUtils = iUtilsFactory()

it 'should have a defined $iUtils', () ->
expect($iUtils?).toBeTruthy()

describe 'parseTaskId', () ->
it 'should parse hadoop_convert task', () ->
task = $iUtils.parseTaskId('hadoop_convert_segment_wikipedia_' + $taskTimestamp)
expect(task).toEqual({
id: 'hadoop_convert_segment_wikipedia_' + $taskTimestamp
type: 'hadoop_convert_segment'
dataSource: 'wikipedia'
dataTime: $taskTimestamp
})

it 'should parse index_hadoop task', () ->
task = $iUtils.parseTaskId('index_hadoop_twitter_' + $taskTimestamp)
expect(task).toEqual({
id: 'index_hadoop_twitter_' + $taskTimestamp
type: 'hadoop'
dataSource: 'twitter'
dataTime: $taskTimestamp
})

it 'should parse index_realtime task', () ->
task = $iUtils.parseTaskId('index_realtime_twitter_' + $taskTimestamp)
expect(task).toEqual({
id: 'index_realtime_twitter_' + $taskTimestamp
type: 'realtime'
dataSource: 'twitter'
dataTime: $taskTimestamp
})

it 'should parse index_spark task', () ->
task = $iUtils.parseTaskId('index_spark_twitter_' + $taskTimestamp)
expect(task).toEqual({
id: 'index_spark_twitter_' + $taskTimestamp
type: 'spark'
dataSource: 'twitter'
dataTime: $taskTimestamp
})

it 'should parse archive task', () ->
task = $iUtils.parseTaskId('archive_wikipedia_' + $taskTimestamp)
expect(task).toEqual({
id: 'archive_wikipedia_' + $taskTimestamp
type: 'archive'
dataSource: 'wikipedia'
dataTime: $taskTimestamp
})

it 'should not throw an error if taskId does not match expected pattern', () ->
task = $iUtils.parseTaskId('index_kafka_twitter_1675e770de9a423_hfdhgjko')
expect(task).toEqual({
id: 'index_kafka_twitter_1675e770de9a423_hfdhgjko'
type: 'other'
dataSource: undefined
dataTime: undefined
})

0 comments on commit c0d7fa1

Please sign in to comment.