Skip to content

Commit

Permalink
issue #49 - SQL Server support
Browse files Browse the repository at this point in the history
  • Loading branch information
pounard committed Nov 23, 2023
1 parent ec66bd8 commit 8bafdaf
Show file tree
Hide file tree
Showing 17 changed files with 298 additions and 74 deletions.
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"php": ">=8.1",
"doctrine/doctrine-bundle": "^2.10.0",
"doctrine/orm": "^2.15",
"makinacorpus/query-builder": "^0.1",
"makinacorpus/query-builder": "^0.1.9",
"symfony/config": "^6.0",
"symfony/console": "^6.0",
"symfony/dependency-injection": "^6.0",
Expand Down
13 changes: 13 additions & 0 deletions dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,19 @@ do_test() {
-e DBAL_USER=postgres \
-e DATABASE_URL="postgresql://postgres:password@postgresql16:5432/test_db?serverVersion=16&charset=utf8" \
phpunit vendor/bin/phpunit $@

section_title "Running tests with SQL Server 2019"
docker compose -p db_tools_bundle_test exec \
-e DBAL_DRIVER=pdo_sqlsrv \
-e DBAL_DBNAME=test_db \
-e DBAL_HOST=sqlsrv2019 \
-e DBAL_PASSWORD=P@ssword123 \
-e DBAL_PORT=1433 \
-e DBAL_ROOT_PASSWORD=P@ssword123 \
-e DBAL_ROOT_USER=sa \
-e DBAL_USER=sa \
-e DATABASE_URL="pdo-sqlsrv://sa:P%40ssword123@sqlsrv2019:1433/test_db?serverVersion=2019&charset=utf8&driverOptions[TrustServerCertificate]=true" \
phpunit vendor/bin/phpunit $@
}

# Display help
Expand Down
12 changes: 12 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,18 @@ services:
- 9505:5432
networks:
- db-tools-test
sqlsrv2019:
image: mcr.microsoft.com/mssql/server:2019-latest
restart: unless-stopped
environment:
ACCEPT_EULA: "y"
MSSQL_PID: Developer
MSSQL_SA_PASSWORD: P@ssword123
SA_PASSWORD: P@ssword123
ports:
- 9506:1433
networks:
- db-tools-test

networks:
db-tools-test:
25 changes: 18 additions & 7 deletions docker/php/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,29 @@ RUN apt-get update
RUN apt-get install -yqq --no-install-recommends default-mysql-client acl iproute2 zip zlib1g-dev libzip-dev \
libxml2-dev libpng-dev libghc-curl-dev libldb-dev libldap2-dev gnupg2 libpq-dev

# PHP required extensions
RUN pecl install apcu
RUN docker-php-ext-configure pgsql -with-pgsql=/usr/local/pgsql
RUN docker-php-ext-install -j$(nproc) pgsql pdo_pgsql pdo mysqli pdo_mysql zip xml gd curl bcmath
RUN docker-php-ext-enable apcu pdo_pgsql pdo_mysql sodium
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

# Instaling postgresql-client-16
RUN curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc| gpg --dearmor -o /etc/apt/trusted.gpg.d/postgresql.gpg && \
sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list' && \
apt-get update && apt-get install -y postgresql-16

# PHP required extensions
RUN docker-php-ext-configure pgsql -with-pgsql=/usr/local/pgsql
RUN docker-php-ext-install -j$(nproc) pgsql pdo_pgsql pdo mysqli pdo_mysql zip xml gd curl bcmath
RUN docker-php-ext-enable pdo_pgsql pdo_mysql sodium

# SQL Server support
ENV ACCEPT_EULA=Y
RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
RUN curl https://packages.microsoft.com/config/ubuntu/20.04/prod.list > /etc/apt/sources.list.d/mssql-release.list
RUN apt-get update
RUN apt-get -y --no-install-recommends install msodbcsql18 unixodbc-dev
RUN pecl install sqlsrv
RUN pecl install pdo_sqlsrv
RUN docker-php-ext-enable sqlsrv pdo_sqlsrv

# Cleanup.
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

COPY --from=composer /usr/bin/composer /usr/bin/composer

WORKDIR /var/www
163 changes: 153 additions & 10 deletions src/Anonymization/Anonymizator.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

use Doctrine\DBAL\Connection;
use Doctrine\DBAL\Platforms\AbstractMySQLPlatform;
use Doctrine\DBAL\Platforms\SQLServerPlatform;
use Doctrine\DBAL\Schema\Column;
use Doctrine\DBAL\Schema\SchemaDiff;
use Doctrine\DBAL\Schema\TableDiff;
Expand All @@ -16,6 +17,7 @@
use MakinaCorpus\DbToolsBundle\Anonymization\Config\AnonymizationConfig;
use MakinaCorpus\DbToolsBundle\Anonymization\Config\AnonymizerConfig;
use MakinaCorpus\DbToolsBundle\Helper\Format;
use MakinaCorpus\QueryBuilder\Bridge\AbstractBridge;
use MakinaCorpus\QueryBuilder\Bridge\Doctrine\DoctrineQueryBuilder;
use MakinaCorpus\QueryBuilder\Bridge\Doctrine\Query\DoctrineUpdate;

Expand Down Expand Up @@ -233,21 +235,63 @@ protected function anonymizeTablePerColumn(string $table, array $anonymizers): \

protected function createUpdateQuery(string $table): DoctrineUpdate
{
$update = $this->getQueryBuilder()->update($table);
$builder = $this->getQueryBuilder();
$update = $builder->update($table);

$expr = $update->expression();

//
// Add target table a second time into the FROM statement of the
// UPDATE query, in order for anonymizers to be able to JOIN over
// it. Otherwise, JOIN would not be possible for RDBMS that speak
// standard SQL.
$expr = $update->expression();
$update->join(
$table,
$expr->where()->isEqual(
$expr->column(AbstractAnonymizer::JOIN_ID, $table),
$expr->column(AbstractAnonymizer::JOIN_ID, AbstractAnonymizer::JOIN_TABLE),
),
AbstractAnonymizer::JOIN_TABLE
);
//
// This is the only and single hack regarding the UPDATE clause
// syntax, all RDBMS accept the following query:
//
// UPDATE foo
// SET val = bar.val
// FROM foo AS foo_2
// JOIN bar ON bar.id = foo_2.id
// WHERE foo.id = foo_2.id
//
// Except for SQL Server, which cannot deambiguate the foo table
// reference in the WHERE clause, so we have to write it this
// way:
//
// UPDATE foo
// SET val = bar.val
// FROM (
// SELECT *
// FROM foo
// ) AS foo_2
// JOIN bar ON bar.id = foo_2.id
// WHERE foo.id = foo_2.id
//
// Which by the way also works with other RDBMS, but is an
// optimization fence for some, because the nested SELECT becomes
// a temporary table (especially for MySQL...). For those we need
// to keep the original query, even if semantically identical.
//
if (AbstractBridge::SERVER_SQLSERVER === $builder->getServerFlavor()) {
$update->join(
$builder->select($table),
$expr->where()->isEqual(
$expr->column(AbstractAnonymizer::JOIN_ID, $table),
$expr->column(AbstractAnonymizer::JOIN_ID, AbstractAnonymizer::JOIN_TABLE),
),
AbstractAnonymizer::JOIN_TABLE
);
} else {
$update->join(
$table,
$expr->where()->isEqual(
$expr->column(AbstractAnonymizer::JOIN_ID, $table),
$expr->column(AbstractAnonymizer::JOIN_ID, AbstractAnonymizer::JOIN_TABLE),
),
AbstractAnonymizer::JOIN_TABLE
);
}

return $update;
}
Expand Down Expand Up @@ -299,6 +343,12 @@ public function addAnonymizerIdColumn(string $table): void
return;
}

if ($platform instanceof SQLServerPlatform) {
$this->addAnonymizerIdColumnSqlServer($table);

return;
}

$schemaManager->alterSchema(
new SchemaDiff(
changedTables: [
Expand Down Expand Up @@ -464,6 +514,99 @@ protected function addAnonymizerIdColumnMySql(string $table)
}
}

/**
* Add second identity column for SQL Server.
*
* Pretty much like MySQL, SQL Server doesn't allow a second identity
* column, we need to manually create a sequence. It's much easier than
* MySQL thought.
*/
protected function addAnonymizerIdColumnSqlServer(string $table)
{
$platform = $this->connection->getDatabasePlatform();
$queryBuilder = $this->getQueryBuilder();

$sequenceName = $platform->quoteIdentifier('_db_tools_seq_' . $table);

$queryBuilder->executeStatement(
<<<SQL
ALTER TABLE ?::table DROP COLUMN IF EXISTS ?::column
SQL,
[
$table,
AbstractAnonymizer::JOIN_ID,
],
);

$queryBuilder->executeStatement(
<<<SQL
DROP SEQUENCE IF EXISTS ?::identifier;
SQL,
[$sequenceName],
);

$queryBuilder->executeStatement(
<<<SQL
CREATE SEQUENCE ?::identifier
AS int
START WITH 1
INCREMENT BY 1;
SQL,
[$sequenceName],
);

$queryBuilder->executeStatement(
<<<SQL
ALTER TABLE ?::table
ADD ?::column int NOT NULL DEFAULT (
NEXT VALUE FOR ?::identifier
);
SQL,
[
$table,
AbstractAnonymizer::JOIN_ID,
$sequenceName,
],
);

// Remove default value, default values are constraints in SQL Server
// we must find its auto generated identifier. Removing the constraint
// at this stade is mandatory otherwise column will not be deletable
// later when anonymizator will proceed to cleanup.
// @see https://stackoverflow.com/questions/1364526/how-do-you-drop-a-default-value-from-a-column-in-a-table
$queryBuilder->executeStatement(
<<<SQL
DECLARE @ConstraintName nvarchar(200)
SELECT @ConstraintName = Name
FROM SYS.DEFAULT_CONSTRAINTS
WHERE PARENT_OBJECT_ID = OBJECT_ID(?)
AND PARENT_COLUMN_ID = (
SELECT column_id
FROM sys.columns
WHERE NAME = ?
AND object_id = OBJECT_ID(?)
)
IF @ConstraintName IS NOT NULL
EXEC('ALTER TABLE ' + ? + ' DROP CONSTRAINT ' + @ConstraintName)
SQL,
[
$table,
AbstractAnonymizer::JOIN_ID,
$table,
$table,
],
);

$queryBuilder->executeStatement(
<<<SQL
DROP SEQUENCE IF EXISTS ?::identifier;
SQL,
[$sequenceName],
);

$this->createIndex($table, AbstractAnonymizer::JOIN_ID);
}

/**
* Create an index.
*/
Expand Down
54 changes: 39 additions & 15 deletions src/Anonymization/Anonymizer/AbstractAnonymizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
namespace MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer;

use Doctrine\DBAL\Connection;
use Doctrine\DBAL\Platforms\SQLServerPlatform;
use Doctrine\DBAL\Schema\Column;
use Doctrine\DBAL\Schema\Table;
use Doctrine\DBAL\Types\Type;
use MakinaCorpus\DbToolsBundle\Attribute\AsAnonymizer;
use MakinaCorpus\QueryBuilder\Expression;
use MakinaCorpus\QueryBuilder\ExpressionFactory;
use MakinaCorpus\QueryBuilder\Query\Update;
use MakinaCorpus\QueryBuilder\Bridge\Doctrine\DoctrineQueryBuilder;
use MakinaCorpus\QueryBuilder\Query\Update;

abstract class AbstractAnonymizer
{
Expand Down Expand Up @@ -176,26 +177,49 @@ protected function generateTempTableName(): string
return \uniqid(self::TEMP_TABLE_PREFIX);
}

protected function getSetIfNotNullExpression(mixed $valueExpression, mixed $columnExpression = null): Expression
/**
* Return a random float between 0 and 1 expression.
*
* makina-corpus/query-builder already support this, but due to an odd SQL
* Server non standard behaviour, we reimplement it here: SQL Server RAND()
* return value will always be the same no matter how many time you call it
* inside a single SQL query. This means that when you update many rows
* using a RAND() based value, all rows will have the same value.
*
* We are going to get arround by injected a random value as seed for the
* RAND() function, because SQL Server allows this. Using NEWID() which
* generates an GUID might be slow, but we'll that at usage later.
*
* This function takes care of this, and will return an expression that
* works with all supported RDBMS.
*/
protected function getRandomExpression(): Expression
{
if (null === $columnExpression) {
$columnExpression = ExpressionFactory::column($this->columnName, $this->tableName);
if ($this->connection->getDatabasePlatform() instanceof SQLServerPlatform) {
return ExpressionFactory::raw('rand(abs(checksum(newid())))');
}
return ExpressionFactory::ifThen(ExpressionFactory::where()->isNotNull($columnExpression), $valueExpression);
return ExpressionFactory::random();
}

/**
* Generate an SQL text pad left expression.
* For the same reason as getRandomExpression().
*/
protected function getSqlTextPadLeftExpression(mixed $textExpression, int $padSize, string $rawPadString): Expression
protected function getRandomIntExpression(int $max, int $min = 0): Expression
{
if ($this->connection->getDatabasePlatform() instanceof SQLServerPlatform) {
return ExpressionFactory::raw(
'FLOOR(? * (? - ? + 1) + ?)',
[$this->getRandomExpression(), ExpressionFactory::cast($max, 'int'), $min, $min]
);
}
return ExpressionFactory::randomInt($max, $min);
}

protected function getSetIfNotNullExpression(mixed $valueExpression, mixed $columnExpression = null): Expression
{
return ExpressionFactory::raw(
'lpad(?, ?, ?)',
[
ExpressionFactory::cast($textExpression, 'text'),
$padSize,
$rawPadString,
],
);
if (null === $columnExpression) {
$columnExpression = ExpressionFactory::column($this->columnName, $this->tableName);
}
return ExpressionFactory::ifThen(ExpressionFactory::where()->isNotNull($columnExpression), $valueExpression);
}
}
5 changes: 2 additions & 3 deletions src/Anonymization/Anonymizer/AbstractEnumAnonymizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,9 @@ public function anonymize(Update $update): void
$expr
->where()
->raw(
'MOD(?, ?) + 1 = ?',
'? + 1 = ?',
[
$expr->column(self::JOIN_ID, self::JOIN_TABLE),
$sampleCount,
$expr->mod($expr->column(self::JOIN_ID, self::JOIN_TABLE), $sampleCount),
$expr->column('rownum', $joinAlias),
]
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,9 @@ public function anonymize(Update $update): void
$update->join(
$join,
$expr->where()->raw(
'MOD(?, ?) + 1 = ?',
'? + 1 = ?',
[
$expr->column(self::JOIN_ID, self::JOIN_TABLE),
$sampleCount,
$expr->mod($expr->column(self::JOIN_ID, self::JOIN_TABLE), $sampleCount),
$expr->column('rownum', $joinAlias),
]
),
Expand Down
Loading

0 comments on commit 8bafdaf

Please sign in to comment.