forked from mapr/drill-test-framework
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathodbcTest.py
executable file
·408 lines (367 loc) · 13.8 KB
/
odbcTest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
#!/usr/bin/python
import pyodbc
import datetime
import os
import sys
import fnmatch
import json
import re
import glob
import subprocess
import decimal
from types import *
# Java SQL data types
ARRAY = 2003
BIGINT = -5
BINARY = -2
BIT = -7
BLOB = 2004
BOOLEAN = 16
CHAR = 1
CLOB = 2005
DATALINK = 70
DATE = 91
DECIMAL = 3
DISTINCT = 2001
DOUBLE = 8
FLOAT = 6
INTEGER = 4
JAVA_OBJECT = 2000
LONGNVARCHAR = -16
LONGVARBINARY = -4
LONGVARCHAR = -1
NCHAR = -15
NCLOB = 2011
NULL = 0
NUMERIC = 2
NVARCHAR = -9
OTHER = 1111
REAL = 7
REF = 2006
ROWID = -8
SMALLINT = 5
SQLXML = 2009
STRUCT = 2002
TIME = 92
TIMESTAMP = 93
TINYINT = -6
VARBINARY = -3
VARCHAR = 12
NONETYPE = -100
def ParseJSONFile(json_file):
with open(json_file, 'r') as json_data:
parse_json = json.load(json_data)
# extract query file extension from JSON file
queryFile = parse_json["matrices"][0]["query-file"]
queryFileRegex = re.compile("\.\*(.*)")
queryFileRegexSearch = queryFileRegex.search(queryFile)
queryFileExt = ""
if queryFileRegexSearch:
queryFileExt = queryFileRegexSearch.group(1)
# extract expected file extension from JSON file
expectedFile = parse_json["matrices"][0]["expected-file"]
expectedFileRegex = re.compile("\.\*(.*)")
expectedFileRegexSearch = expectedFileRegex.search(expectedFile)
expectedFileExt = ""
if expectedFileRegexSearch:
expectedFileExt = expectedFileRegexSearch.group(1)
# extract schema from JSON file. convert from unicode
# to utf-8
schema_string = parse_json["matrices"][0]["schema"]
schema = schema_string.encode('utf-8')
try:
outputFile = parse_json["matrices"][0]["output-file"]
outputFileRegex = re.compile("\.\*(.*)")
outputFileExt = outputFileRegex.search(outputFile).group(1)
except KeyError:
outputFileExt = ".o"
return (queryFile, queryFileExt, expectedFile, expectedFileExt, schema,
outputFileExt)
def SaveData (rows, queryDir, queryFileName, outputFileExt, resultFileCreated):
# create "type" file that contains the data types of each column
typeFile = "%s/%s%s" % (queryDir, queryFileName, ".type")
typeFile = open (typeFile, 'w')
# creat "label" file that contains the column names
columnFile = "%s/%s%s" % (queryDir, queryFileName, ".label")
columnFile = open (columnFile, 'w')
# create some dummy classes for "special" data types
newdecimal = type(decimal.Decimal(0))
newdate = type(datetime.date(1,2,3))
newtimestamp = type(datetime.datetime(1,2,3))
newtime = type(datetime.time(1,2,3))
# determine the column names and data types returned by this query
# and store in "type" file and "label" file
dataType = []
for column in cursor.description:
getTypeArray = {
LongType: BIGINT,
BooleanType: BOOLEAN,
newdate: DATE,
newdecimal: DECIMAL,
FloatType: FLOAT,
IntType: INTEGER,
newtime: TIME,
newtimestamp: TIMESTAMP,
BufferType: VARBINARY,
StringType: VARCHAR
}
getType = getTypeArray.get(column[1], NONETYPE)
if getType == NONETYPE:
if column[1] == pyodbc.DATETIME:
getType = TIMESTAMP
else:
sys.exit()
dataType.append(getType);
typeFile.write("%s\n" % getType);
columnFile.write("%s\n" % column[0]);
typeFile.close()
columnFile.close()
# open output file. by default, the output file is created in the same
# directory as the query file. It can also be created in a separate
# directory.
if outputFileGiven:
outputFile = outputFileArg;
elif resultFileCreated:
outputFile = "%s/%s%s" % (resultDir, queryFileName, outputFileExt)
else:
outputFile = "%s/%s%s" % (queryDir, queryFileName, outputFileExt)
outputFileH = open (outputFile, 'w')
# evaluate each row that is returned
for row in rows:
# add null values by converting None values to null values
for index, value in enumerate(row):
if value == None:
row[index] = "null"
# Convert tuple containing row fields into a single string delimited by tabs.
# For some data types, Simba ODBC driver returns data that is not completely
# the same as what the Drill Apache driver returns. Modify the data
# from Simba to match what Drill Apache provides, so that the data matches
# the contents of the expected results files.
convertToString = ""
for index, field in enumerate(row):
fieldString = str(field)
if dataType[index] == BOOLEAN:
if fieldString == "True":
field = "true"
elif fieldString == "False":
field = "false"
if dataType[index] == TIME and fieldString != "null":
# strip off microseconds. Drill only has milliseconds
if field.microsecond > 0:
fieldString = fieldString.rstrip('0123456789')
fieldString = "%s%s" % (fieldString,field.microsecond)
field = fieldString
if dataType[index] == TIMESTAMP and fieldString != "null":
# strip off microseconds. Drill only has milliseconds
try:
data = datetime.datetime.strptime(fieldString, "%Y-%m-%d %H:%M:%S.%f")
if data.microsecond > 0:
fieldString = fieldString.rstrip('0')
except:
data = datetime.datetime.strptime(fieldString, "%Y-%m-%d %H:%M:%S")
fieldString = fieldString + ".0"
field = fieldString
if dataType[index] == VARCHAR and len(fieldString) > 0 and fieldString[0] in ('{','['):
# for varchars that begin with curly braces or square brackets, remove
# spaces and newlines unless they are in a string with double quotes
newFields = []
inQuotes = False
start = 0
stringLength = len(fieldString)
for current, character in enumerate(fieldString):
if character == "\"":
inQuotes = not(inQuotes)
atLastChar = (current == stringLength-1);
if atLastChar == True:
newFields.append(fieldString[start:])
elif ((character == ' ') or (character == '\n')) and not(inQuotes):
newFields.append(fieldString[start:current])
start = current + 1
field = "".join(newFields)
# add field to convertToString
if convertToString:
convertToString = convertToString + "\t" + str(field)
else:
convertToString = str(field)
# Save row to output file. It should match expected results files.
outputFileH.write("%s\n" % convertToString)
outputFileH.close()
def ExecuteQuery(queryDir, queryFile, outputFileExt, resultFileCreated):
queryFileNameRegex = re.compile("(.*)\.(.*)$")
queryFileName = queryFileNameRegex.search(queryFile).group(1)
queryStrings = subprocess.Popen("cat %s/%s" % (queryDir, queryFile), shell=True, stdout=subprocess.PIPE).stdout.read()
# if there are multiple SQL statements, there is only one main SQL query.
# the first few are used to setup the main query.
# the last few are used to restore the state of the system after the query is run.
# If a query starts with "--@test", it is considered the main SQL query.
# Otherwise, the number of statements before and after the main SQL query is the same.
# thus we expect an odd number of SQL statements.
if queryStrings.count(';') > 1:
# find the main query
numberStatements = queryStrings.count(';')
statements = queryStrings.split(';')
mainStatementIndex = -1
for index,statement in enumerate(statements):
statement = statement.lstrip()
if statement.startswith("--@test"):
mainStatementIndex = index
break
if mainStatementIndex == -1:
mainStatementIndex = numberStatements / 2
# execute the setup statements
for index in range(0,mainStatementIndex):
queryString = statements[index]
queryString = queryString.rstrip('\n;')
try:
value = cursor.execute(queryString)
except:
print "ERROR - generic execute error for setup statements"
mainStatement = statements[mainStatementIndex]
multipleStatements = True
else:
mainStatement = queryStrings
multipleStatements = False
queryString = mainStatement.rstrip(';\n\r\t ')
try:
value = cursor.execute(queryString)
except Exception as inst:
print "ERROR - generic execute error for main statement: %s" % inst
failedTests.append(queryFile)
rows = cursor.fetchall()
SaveData (rows, queryDir, queryFileName, outputFileExt, resultFileCreated)
# if there are statements after the main statement, execute them
# to restore the system
if multipleStatements:
for index in range(mainStatementIndex+1, numberStatements):
queryString = statements[index]
queryString = queryString.rstrip('\n;')
try:
value = cursor.execute(queryString)
except:
print "ERROR - generic execute error for cleanup statements"
def GetJSONFileAndExecSingleQuery(queryDir, singleFile):
# find the json file for the query file
json_files = glob.glob("%s/*.json" % queryDir)
if not json_files:
# look in parent directory for a json file
updir = ""
while not json_files:
updir = "%s/.." % updir
json_files = glob.glob("%s%s/*.json" % (queryDir, updir))
for json_file in json_files:
queryFile, queryFileExt, expectedFile, expectedFileExt, schema, \
outputFileExt = ParseJSONFile(json_file)
# execute query file
if queryFileExt:
# get query files that match query file extension
if fnmatch.fnmatch(singleFile, '*%s' % queryFileExt):
# execute SQL statement to "use" the schema specified by
# the JSON file
try:
value = cursor.execute("use `%s`" % schema)
except:
print "execute error for schema"
raise RuntimeError('cannot use schema %s' % schema)
#extract query file name from singleFile, which has absolute path
queryFileRegex = re.compile(".*/(.*)$")
queryFileRegexSearch = queryFileRegex.search(singleFile)
queryFile = queryFileRegexSearch.group(1)
ExecuteQuery(queryDir, queryFile, outputFileExt, resultFileCreated)
else:
# if there is no query file extension, then there is a specific
# query file specified in the json file. check if file specified in json
# file matches the file requested by user. if so, then execute the requested
# file using the information in this json file
if singleFile == '%s/%s' % (queryDir,queryFile):
# execute SQL statement to "use" the schema specified by
# the JSON file
try:
value = cursor.execute("use `%s`" % schema)
except:
print "execute error for schema"
raise RuntimeError('cannot use schema %s' % schema)
ExecuteQuery(queryDir, queryFile, outputFileExt, resultFileCreated)
def GetJSONFileAndExecQueries(queryDir):
for root, dirnames, filenames in os.walk(queryDir):
# find the json files
for filename in fnmatch.filter(filenames, '*.json'):
json_file = os.path.join(queryDir, filename)
queryFile, queryFileExt, expectedFile, expectedFileExt, schema, \
outputFileExt = ParseJSONFile(json_file)
# execute SQL statement to "use" the schema specified by
# the JSON file
cursor.execute("use %s" % schema)
# get list of query files for this json file
# these will have absolute paths
if queryFileExt:
queryFiles = glob.glob("%s/*%s" % (queryDir,queryFileExt))
else:
# check if file specified by json file matches the file requested by user
# if so, then execute the requested file using the information in this
# json file
queryFiles = glob.glob("%s/%s" % (queryDir, queryFile))
queryFileRegex = re.compile(".*/(.*)")
for queryFileWithPath in queryFiles:
queryFileRegexSearch = queryFileRegex.search(queryFileWithPath)
queryFile = queryFileRegexSearch.group(1)
# execute each query file
ExecuteQuery(queryDir, queryFile, outputFileExt, resultFileCreated)
# determine if one query is being executed, or a directory of queries.
singleFileTest = False
singleFile = ""
if os.path.isfile(sys.argv[1]):
singleFile = (sys.argv[1])
queryDir = os.path.dirname(singleFile)
singleFileTest = True
elif os.path.isdir(sys.argv[1]):
queryDir = sys.argv[1]
else:
print "ERROR - %s needs to be a file or directory" % sys.argv[1]
sys.exit(-1)
today = datetime.datetime.now()
resultDir = ""
outputFileArg = ""
resultFileCreated = False
outputFileGiven = False
if len(sys.argv) == 3:
# get output file name
outputFileArg = sys.argv[2]
outputFileArg = os.path.join(os.getcwd(), outputFileArg)
print ("outputFileArg: " + outputFileArg);
#check if arg2 is an existing directory, in which case the result file goes here
if os.path.isdir(outputFileArg):
# create generic output file based on today's date
resultfile = "ODBC-Test-Result-%s.out" % today.isoformat()
resultfilepath = "%s/%s" % (resultDir, resultfile)
resultFileCreated = True
if os.path.exists(resultfile):
print "result file %s exists\n" % resultfilepath
sys.exit(-1)
else:
# arg2 is a file, not a directory. use it as the output file
outputFileGiven = True
if len(sys.argv) == 4:
if resultFileCreated:
print "result file %s has already been created\n" % resultfilepath
sys.exit(-1)
resultDir = sys.argv[3]
print ("resultDir: " + resultDir);
resultfile = "ODBC-Test-Result-%s.out" % today.isoformat()
resultfilepath = "%s/%s" % (resultDir, resultfile)
resultFileCreated = True
if os.path.exists(resultfile):
print "result file path %s exists" % resultfilepath
sys.exit(-1)
# connect to Drill
dbh = pyodbc.connect('DSN=MapR Drill 64-bit', autocommit='True')
cursor = dbh.cursor()
failedTests = []
json_files = []
if singleFileTest:
GetJSONFileAndExecSingleQuery(queryDir, singleFile)
else:
GetJSONFileAndExecQueries(queryDir)
cursor.close()
del cursor
dbh.close()
sys.exit(0)