From 0d1a50b2f1590c75e9ef9ed28a55d4da595584e6 Mon Sep 17 00:00:00 2001 From: carlos-galvez Date: Thu, 27 Jul 2017 17:36:33 +0200 Subject: [PATCH 1/2] - Fixed Oracle driver to map timestamps with ranges to simple timestamps because Hive doesn't accept timestamps with ranges. - Fixed Oracle driver in order to detect when a number is a double or an integer. --- setup.py | 2 +- src/slippinj/databases/drivers/oracle.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index ce5182a..502b141 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ setup( name='slippinj', - version='1.6.1', + version='1.6.2', author='Data Architects SCM Spain', author_email='data.architecture@scmspain.com', packages=find_packages('src'), diff --git a/src/slippinj/databases/drivers/oracle.py b/src/slippinj/databases/drivers/oracle.py index 235ed49..cbf2e11 100644 --- a/src/slippinj/databases/drivers/oracle.py +++ b/src/slippinj/databases/drivers/oracle.py @@ -79,8 +79,8 @@ def __get_columns_for_tables(self, tables): self.__logger.debug('Getting columns information') query_with_owner = "AND owner = '{schema}'".format(schema=self.__db_schema) - info_query = "SELECT table_name, column_name, data_type, data_length, nullable, data_default " \ - "FROM ALL_TAB_COLS " \ + info_query = "SELECT table_name, column_name, data_type, data_length, nullable, data_default, data_scale " \ + "FROM ALL_TAB_COLUMNS " \ "WHERE table_name IN ({tables}) " \ "{owner}" \ "ORDER BY COLUMN_ID".format(tables=self.__join_tables_list(tables), owner=query_with_owner if self.__db_schema else '') @@ -94,6 +94,12 @@ def __get_columns_for_tables(self, tables): self.__logger.debug('Columns found for table {table}'.format(table=row['TABLE_NAME'])) if not row['TABLE_NAME'] in tables_information: tables_information[row['TABLE_NAME']] = {'columns': []} + + if row['DATA_TYPE'] == 'NUMBER' and row['DATA_SCALE'] in (0,None): + row['DATA_TYPE'] = 'bigint' + + row['DATA_TYPE'] = re.sub('TIMESTAMP(.*)', 'TIMESTAMP', row['DATA_TYPE']) + tables_information[row['TABLE_NAME']]['columns'].append({ 'source_column_name': row['COLUMN_NAME'], 'column_name': self.__get_valid_column_name(row['COLUMN_NAME']), From 4e3ca11b16c5eb1b29d0793d36474d9533148fa5 Mon Sep 17 00:00:00 2001 From: carlos-galvez Date: Fri, 28 Jul 2017 09:13:56 +0200 Subject: [PATCH 2/2] - Code refactor of number detentionCode refactoring of number type detection --- src/slippinj/databases/drivers/oracle.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/slippinj/databases/drivers/oracle.py b/src/slippinj/databases/drivers/oracle.py index cbf2e11..56aa9ab 100644 --- a/src/slippinj/databases/drivers/oracle.py +++ b/src/slippinj/databases/drivers/oracle.py @@ -94,18 +94,12 @@ def __get_columns_for_tables(self, tables): self.__logger.debug('Columns found for table {table}'.format(table=row['TABLE_NAME'])) if not row['TABLE_NAME'] in tables_information: tables_information[row['TABLE_NAME']] = {'columns': []} - - if row['DATA_TYPE'] == 'NUMBER' and row['DATA_SCALE'] in (0,None): - row['DATA_TYPE'] = 'bigint' - - row['DATA_TYPE'] = re.sub('TIMESTAMP(.*)', 'TIMESTAMP', row['DATA_TYPE']) tables_information[row['TABLE_NAME']]['columns'].append({ 'source_column_name': row['COLUMN_NAME'], 'column_name': self.__get_valid_column_name(row['COLUMN_NAME']), 'source_data_type': row['DATA_TYPE'], - 'data_type': row['DATA_TYPE'].lower() if row['DATA_TYPE'] not in self.__column_types else self.__column_types[ - row['DATA_TYPE']], + 'data_type': row['DATA_TYPE'].lower() if re.sub('TIMESTAMP(.*)', 'TIMESTAMP', row['DATA_TYPE']) not in self.__column_types else self.__map_columns(row['DATA_TYPE'], row['DATA_SCALE']), 'character_maximum_length': row['DATA_LENGTH'], 'is_nullable': row['NULLABLE'], 'column_default': row['DATA_DEFAULT'], @@ -113,6 +107,10 @@ def __get_columns_for_tables(self, tables): return tables_information + def __map_columns(self,datatype, datascale): + datatype = re.sub('TIMESTAMP(.*)', 'TIMESTAMP', datatype) + return 'bigint' if datatype == 'NUMBER' and datascale in (0,None) else self.__column_types[datatype] + def __get_count_for_tables(self, tables): tables_information = {}