Skip to content

Commit

Permalink
less over-the-top sync
Browse files Browse the repository at this point in the history
Close #33, close #32. Feeling agrieved that #52 is not resolved. Maybe some lingering sync issues?
  • Loading branch information
battis committed Aug 11, 2024
1 parent 9fd956d commit b3f01b9
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ const options = {
short: 'p',
description: `Regular expression to email addresses of users/groups to include in Cloud Storage Bucket (will be read from ${cli.colors.value(
'PERMISSIONS_REGEX'
)} environment variable if present)`
)} environment variable if present, defaults to ${cli.colors.value(
'.*'
)} if no argument or environment variable is present)`
}
};

Expand Down Expand Up @@ -87,13 +89,15 @@ const flags = {

permissionsRegex = permissionsRegex || process.env.PERMISSIONS_REGEX || '.*';

spinner.start('Reviewing permission changes');
for (const file of index) {
await file.resetPermissions({
await file.cacheACL({
bucketName,
permissionsRegex,
ignoreErrors: !!ignoreErrors
});
}
spinner.succeed('All permission changes reviewed');

spinner.start(`Saving index to ${cli.colors.url(indexPath)}`);
fs.writeFileSync(indexPath, JSON.stringify(index));
Expand Down
2 changes: 2 additions & 0 deletions apps/indexer/bin/upload.ts → apps/indexer/bin/cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ const flags = {
validate: cli.validators.lengthBetween(6, 30)
}));

spinner.start('Reviewing index files');
const updatedIndex = new Cache(index.root);
for (let i = 0; i < index.length; i++) {
if (index[i].index.path != '.') {
Expand All @@ -108,6 +109,7 @@ const flags = {
}
}
}
spinner.succeed('All indexed files reviewed');

spinner.start(`Saving index to ${cli.colors.url(indexPath)}`);
fs.writeFileSync(indexPath, JSON.stringify(updatedIndex));
Expand Down
47 changes: 45 additions & 2 deletions apps/indexer/bin/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import Cache from '../src/Cache';
import * as Helper from '../src/Helper';
import cli from '@battis/qui-cli';
import Google from '@groton/knowledgebase.google';
import Index from '@groton/knowledgebase.index';
import fs from 'fs';
import path from 'path';

Expand Down Expand Up @@ -31,6 +32,14 @@ const options = {
)})`,
default: defaultIndexPath
},
permissionsRegex: {
short: 'p',
description: `Regular expression to match processed users/groups (will be read from ${cli.colors.value(
'PERMISSIONS_REGEX'
)} environment variable if present or default to ${cli.colors.value(
'.*'
)} if no enviroment variable is set or argument is passed)`
},
keysPath: {
short: 'k',
description: `Path to file containing downloaded OAuth2 credentials (defaults to ${cli.colors.url(
Expand All @@ -50,7 +59,7 @@ const options = {
(async () => {
const CWD = process.cwd();
let {
values: { folderId, indexPath, keysPath, tokensPath }
values: { folderId, indexPath, keysPath, tokensPath, permissionsRegex }
} = cli.init({
env: {
root: path.join(__dirname, '../../..'),
Expand All @@ -60,6 +69,9 @@ const options = {
});

Google.Client.init({ keysPath, tokensPath });
const permissionsPattern = new RegExp(
permissionsRegex || process.env.PERMISSIONS_REGEX || '.*'
);

const spinner = cli.spinner();
Cache.File.event.on(Cache.File.Event.Start, (status): void => {
Expand All @@ -78,7 +90,10 @@ const options = {
const prevIndex = await Cache.fromFile(indexPath, Cache.File);
spinner.succeed(`${cli.colors.value(prevIndex.root.name)} index loaded`);

const currIndex = await prevIndex.root.indexContents();
const currIndex = [
await new Index.FileFactory(Cache.File).fromDriveId(prevIndex.root.id)
];
currIndex.push(...(await currIndex[0].indexContents()));

spinner.start(`Comparing indices`);
// TODO reset permissions
Expand All @@ -88,10 +103,37 @@ const options = {
let update = prev;
const i = currIndex.findIndex((elt) => elt.index.path == prev.index.path);
if (i >= 0) {
const permissions: Google.Drive.drive_v3.Schema$Permission[] = [];
for (const permission of prev.permissions) {
if (
!currIndex[i].permissions.find(
(p) => p.emailAddress == permission.emailAddress
)
) {
permission.indexerAclState = Index.IndexEntry.State.Expired;
spinner.fail(
`Expired ${permission.emailAddress} from ${prev.index.path}`
);
}
permissions.push(permission);
}
for (const permission of currIndex[i].permissions) {
if (
permissionsPattern.test(permission.emailAddress) &&
!permissions.find((p) => p.emailAddress == permission.emailAddress)
) {
permissions.push(permission);
spinner.succeed(
`Added ${permission.emailAddress} to ${currIndex[i].index.path}`
);
}
}
update = currIndex[i];
update.index = prev.index;
update.permissions = permissions;
currIndex.splice(i, 1);
} else {
update.index.status = Index.IndexEntry.State.Expired;
update.index.exists = false;
}
nextIndex.push(update);
Expand Down Expand Up @@ -133,6 +175,7 @@ const options = {
.replace(FOLDER_NAME, folder.name!)
.replace(TIMESTAMP, new Date().toISOString().replace(':', '-'))
);

spinner.start(`Saving index to ${indexPath}`);
const content = JSON.stringify(index);
cli.shell.mkdir('-p', path.dirname(indexPath));
Expand Down
6 changes: 4 additions & 2 deletions apps/indexer/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
"build:groups": "tsx bin/groups.ts",
"upload": "run-s upload:*",
"upload:build": "run-s build",
"upload:upload": "tsx bin/upload.ts",
"upload:reset-permissions": "tsx bin/reset-permissions.ts"
"upload:cache": "tsx bin/cache.ts",
"upload:acl": "tsx bin/acl.ts"
},
"devDependencies": {
"@battis/typescript-tricks": "^0.5.4",
"@google-cloud/storage": "^7.12.0",
"@types/adm-zip": "^0.5.5",
"@types/jsdom": "^21.1.7",
"@types/mime-types": "^2.1.4",
Expand Down
124 changes: 97 additions & 27 deletions apps/indexer/src/Cache/File.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import * as Helper from '../Helper';
import pipelineHTML from './Actions/pipelineHTML';
import { Bucket, File as GCSFile } from '@google-cloud/storage';
import Google from '@groton/knowledgebase.google';
import Index from '@groton/knowledgebase.index';
import Zip from 'adm-zip';
Expand All @@ -12,6 +13,12 @@ const DEFAULT_PERMISSIONS_REGEX = /.*/;
const DEFAULT_FORCE = false;
const DEFAULT_IGNORE_ERRORS = true;

interface File extends Index.File {
permissions: (Google.Drive.drive_v3.Schema$Permission & {
indexerAclState?: Index.IndexEntry.State;
})[];
}

class File extends Index.File {
public static event = new events.EventEmitter();

Expand Down Expand Up @@ -114,14 +121,21 @@ class File extends Index.File {
/**
* Backwards-compatible with Overdrive.io naming scheme
*/
protected static normalizeFilename(filename: string): string {
private static normalizeFilename(filename: string): string {
return filename!
.replace('&', 'and')
.replace(/[^a-z0-9()!@*_.]+/gi, '-')
.replace(/-+$/, '')
.toLowerCase();
}

private subfileFactory(bucket: Bucket) {
return (uri: string) => {
let uriPath = uri.replace(/^gs:\/\/[^/]+\//, '');
return bucket.file(uriPath);
};
}

/**
* TODO _re_ index non-destructively
* TODO delete/rename cached files
Expand Down Expand Up @@ -185,15 +199,15 @@ class File extends Index.File {
}: File.Params.Cache) {
if (!this.isFolder()) {
const bucket = Google.Client.getStorage().bucket(bucketName);
const subfile = this.subfileFactory(bucket);

if (!this.index.exists) {
File.event.emit(File.Event.Start, `${this.index.path} expired`);
let success = true;
await this.exponentialBackoff(async () => {
for (const uri of this.index.uri) {
File.event.emit(File.Event.Start, `${uri} expired`);
const file = bucket.file(
path.join(this.index.path, path.basename(uri))
);
const file = subfile(uri);
try {
await file.delete();
File.event.emit(
Expand Down Expand Up @@ -232,11 +246,13 @@ class File extends Index.File {
const files = await this.fetchAsHtmlIfPossible();
const deleted: string[] = [];
for (const uri in this.index.uri) {
if (!Object.keys(files).includes(path.basename(uri))) {
if (
!Object.keys(files).includes(
uri.replace(new RegExp(`^.*${this.index.path}/(.*)$`), '$1')
)
) {
File.event.emit(File.Event.Start, `${uri} expired`);
const file = bucket.file(
path.join(this.index.path, path.basename(uri))
);
const file = subfile(uri);
try {
await file.delete();
deleted.push(uri);
Expand Down Expand Up @@ -292,25 +308,21 @@ class File extends Index.File {
return this;
}

public async resetPermissions({
public async cacheACL({
bucketName,
permissionsRegex = DEFAULT_PERMISSIONS_REGEX,
ignoreErrors = DEFAULT_IGNORE_ERRORS
}: File.Params.Cache) {
const bucket = Google.Client.getStorage().bucket(bucketName);
for (const uri of this.index.uri) {
const filePath = uri.replace(/^gs:\/\/[^/]+\//, '');
File.event.emit(File.Event.Succeed, filePath);
const file = bucket.file(filePath);
if (!this.isFolder()) {
const bucket = Google.Client.getStorage().bucket(bucketName);
const subfile = this.subfileFactory(bucket);
const updatedPermissions: Google.Drive.drive_v3.Schema$Permission[] = [];
for (const permission of this.permissions!.filter(
(p) =>
p.emailAddress &&
p.indexerAclState != Index.IndexEntry.State.Cached &&
new RegExp(permissionsRegex || '.*').test(p.emailAddress)
)) {
File.event.emit(
File.Event.Start,
`Adding ${permission.displayName} to ACL for ${this.index.path}`
);
let entity: string;
switch (permission.type) {
case 'group':
Expand All @@ -324,19 +336,77 @@ class File extends Index.File {
`Cannot handle permission type ${permission.type} (driveId: ${this.id}, emailAddress: ${permission.emailAddress})`
);
}
await this.exponentialBackoff(async () => {
await file.acl.add({
entity,
role: Google.Storage.acl.READER_ROLE
});

if (permission.indexerAclState == Index.IndexEntry.State.Expired) {
File.event.emit(
File.Event.Succeed,
`${entity} added as reader to ACL for ${this.index.path}`
File.Event.Start,
`Removing ${permission.displayName} from ACL for ${this.index.path}`
);
}, ignoreErrors);
try {
for (const uri of this.index.uri) {
const file = subfile(uri);
File.event.emit(File.Event.Start, file.name);
await file.acl.delete({ entity });
File.event.emit(
File.Event.Succeed,
`${permission.type}:${permission.emailAddress} removed from ACL for ${this.index.path}`
);
}
} catch (error) {
permission.indexerAclState = error.message || 'error';
updatedPermissions.push(permission);
File.event.emit(
File.Event.Fail,
`Error removing from ACL`,
{ entity, driveId: this.id },
error
);
}
} else {
File.event.emit(
File.Event.Start,
`Adding ${permission.displayName} to ACL for ${this.index.path}`
);
await this.exponentialBackoff(async () => {
for (const uri of this.index.uri) {
const file = subfile(uri);
File.event.emit(
File.Event.Succeed,
`${permission.type}:${permission.emailAddress} added as reader to ACL for /${file.name}`
);
try {
await file.acl.add({
entity,
role: Google.Storage.acl.READER_ROLE
});
File.event.emit(
File.Event.Succeed,
`${permission.type}:${permission.emailAddress} added as reader to ACL for /${file.name}`
);
} catch (error) {
File.event.emit(
File.Event.Fail,
Helper.errorMessage(
'Error adding reader to ACL',
{
driveId: this.id,
file: file.name,
email: permission.emailAddress
},
error
)
);
}
}
permission.indexerAclState = Index.IndexEntry.State.Cached;
updatedPermissions.push(permission);
}, ignoreErrors);
}
}
this.permissions = updatedPermissions;

this.index.update();
}
this.index.update();
}
}

Expand Down
4 changes: 2 additions & 2 deletions apps/indexer/src/Helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ import cli from '@battis/qui-cli';

export function colorizeStatus(p: string) {
return p.replace(
/(\/?([a-z0-9._-]+\/)+([a-z0-9._-]+))/g,
/(\/?([a-z0-9._-]+\/)+([a-z0-9()._-]+))/gi,
cli.colors.url('$1')
);
).replace(/((user|group):[a-z0-9._-]+@[a-z0-9._-]+)/gi, cli.colors.value('$1'));
}

export function errorMessage(
Expand Down
3 changes: 2 additions & 1 deletion packages/index/src/IndexEntry/State.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
enum State {
Indexed = 'indexed',
PreparingCache = 'preparing cache',
Cached = 'cached'
Cached = 'cached',
Expired = 'expired'
}

export default State;
6 changes: 6 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b3f01b9

Please sign in to comment.