Skip to content

Commit

Permalink
add tokenizer and prettier
Browse files Browse the repository at this point in the history
  • Loading branch information
codybrom committed Apr 12, 2023
1 parent 2454c6e commit 38832ed
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 181 deletions.
1 change: 1 addition & 0 deletions .prettierrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"prettier-airbnb-config"
34 changes: 18 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
GPT Context Generator
=====================
# GPT Context Generator

This Visual Studio Code extension helps you generate context for .js/.jsx and .ts/.tsx files, making it easier to collaborate with AI models like OpenAI's GPT-4. The extension generates context by pulling in non-CSS dependencies such as API pages that are referenced in your code. It also respects `.gitignore` rules to avoid including unnecessary files.
[![Code Style Airbnb](https://badgen.net/badge/code%20style/airbnb/ff5a5f?icon=airbnb)](https://github.com/airbnb/javascript)

Features
--------
This Visual Studio Code extension helps you generate context for .js/.jsx and .ts/.tsx files, making
it easier to collaborate with AI models like OpenAI's GPT-4. The extension generates context by
pulling in non-CSS dependencies such as API pages that are referenced in your code. It also respects
`.gitignore` rules to avoid including unnecessary files.

- Generate context for the entire workspace
- Generate context for the currently open file and its dependencies
- Estimate the number of OpenAI tokens in the generated context
## Features

Usage
-----
- Generate context for the entire workspace
- Generate context for the currently open file and its dependencies
- Estimate the number of OpenAI tokens in the generated context

## Usage

### Generate context for the entire workspace

Expand All @@ -27,12 +29,12 @@ Usage
3. Type `Generate GPT Friendly Context for Open File` and select the command from the list.
4. The generated context, including dependencies, will be displayed in a new editor tab.

Token Count Estimation
----------------------
## Token Count Estimation

When generating context, the extension will also display an information message with an estimated number of OpenAI tokens in the generated text. This information can be helpful when working with AI models that have token limitations.
When generating context, the extension will also display an information message with an estimated
number of OpenAI tokens in the generated text. This information can be helpful when working with AI
models that have token limitations.

License
-------
## License

This extension is released under the MIT License.
This extension is released under the MIT License.
11 changes: 7 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "gpt-context-generator",
"displayName": "GPT Context Generator",
"description": "Generate GPT-friendly context for js/ts files or workspaces",
"version": "0.0.2",
"version": "0.0.3",
"engines": {
"vscode": "^1.77.0"
},
Expand All @@ -12,7 +12,7 @@
],
"icon": "images/icon.png",
"galleryBanner": {
"color": "#C80000",
"color": "#1F5738",
"theme": "dark"
},
"keywords": [
Expand Down Expand Up @@ -45,7 +45,10 @@
"test": "node ./out/test/runTest.js"
},
"dependencies": {
"ignore": "^5.2.4"
"gpt-3-encoder": "^1.1.4",
"ignore": "^5.2.4",
"prettier": "^2.8.7",
"prettier-airbnb-config": "^1.0.0"
},
"devDependencies": {
"@types/glob": "^8.1.0",
Expand All @@ -66,4 +69,4 @@
},
"license": "MIT",
"publisher": "codybrom"
}
}
293 changes: 158 additions & 135 deletions src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,150 +4,173 @@ import * as vscode from 'vscode';
import * as fs from 'fs';
import * as path from 'path';
import ignoreFactory = require('ignore');
import {encode} from 'gpt-3-encoder';

// This method is called when your extension is activated
// Your extension is activated the very first time the command is executed
export function activate(context: vscode.ExtensionContext) {
let disposable = vscode.commands.registerCommand('gpt-context-generator.createGPTFriendlyContext', async () => {
if (!vscode.workspace.workspaceFolders) {
vscode.window.showErrorMessage('Please open a workspace to use this extension.');
return;
}

const workspacePath = vscode.workspace.workspaceFolders[0].uri.fsPath;
const gptContext = await createGPTFriendlyContext(workspacePath);

const gptContextDocument = await vscode.workspace.openTextDocument({
content: gptContext,
language: 'plaintext'
});

await vscode.window.showTextDocument(gptContextDocument, vscode.ViewColumn.One);

const tokenCount = estimateTokenCount(gptContext);
vscode.window.showInformationMessage(`The generated context is approximately ${tokenCount} tokens.`);
});


// Register a new command for creating context for the open file
let disposableOpenFile = vscode.commands.registerCommand('gpt-context-generator.createGPTFriendlyContextForOpenFile', async () => {
if (!vscode.workspace.workspaceFolders || !vscode.window.activeTextEditor) {
vscode.window.showErrorMessage('Please open a workspace and a file to use this extension.');
return;
}

const workspacePath = vscode.workspace.workspaceFolders[0].uri.fsPath;
const openFilePath = vscode.window.activeTextEditor.document.uri.fsPath;
const gptContext = await createGPTFriendlyContextForOpenFile(workspacePath, openFilePath);

const gptContextDocument = await vscode.workspace.openTextDocument({
content: gptContext,
language: 'plaintext'
});

await vscode.window.showTextDocument(gptContextDocument, vscode.ViewColumn.One);

const tokenCount = estimateTokenCount(gptContext);
vscode.window.showInformationMessage(`The generated context is approximately ${tokenCount} tokens.`);
});

context.subscriptions.push(disposableOpenFile);


context.subscriptions.push(disposable);
}

async function createGPTFriendlyContext(workspacePath: string): Promise<string> {
const gitIgnorePath = path.join(workspacePath, '.gitignore');
const ignoreFilter = ignoreFactory();

if (fs.existsSync(gitIgnorePath)) {
const gitIgnoreContent = fs.readFileSync(gitIgnorePath).toString();
ignoreFilter.add(gitIgnoreContent);
}

const gptContext: string[] = [];

const processDirectory = async (dir: string) => {
const files = fs.readdirSync(dir);

for (const file of files) {
const filePath = path.join(dir, file);
const relFilePath = path.relative(workspacePath, filePath);

if (ignoreFilter.ignores(relFilePath)) {
continue;
}

const fileStat = fs.lstatSync(filePath);

if (fileStat.isDirectory()) {
await processDirectory(filePath);
} else if (fileStat.isFile()) {
const fileExtension = path.extname(filePath).toLowerCase();
const allowedExtensions = ['.ts', '.tsx', '.js', '.jsx'];

if (allowedExtensions.includes(fileExtension)) {
const fileContent = fs.readFileSync(filePath).toString();
gptContext.push(`File: ${relFilePath}\n\n${fileContent}\n\n`);
}
}
}
};

await processDirectory(workspacePath);
return gptContext.join('\n');
let disposable = vscode.commands.registerCommand(
'gpt-context-generator.createGPTFriendlyContext',
async () => {
if (!vscode.workspace.workspaceFolders) {
vscode.window.showErrorMessage('Please open a workspace to use this extension.');
return;
}

const workspacePath = vscode.workspace.workspaceFolders[0].uri.fsPath;
const gptContext = await createGPTFriendlyContext(workspacePath);

const gptContextDocument = await vscode.workspace.openTextDocument({
content: gptContext,
language: 'plaintext',
});

await vscode.window.showTextDocument(gptContextDocument, vscode.ViewColumn.One);

const tokenCount = estimateTokenCount(gptContext);
if (tokenCount > 8000) {
vscode.window.showWarningMessage(
`The generated context is approximately ${tokenCount} tokens, which is greater than 8000 tokens.`
);
} else {
vscode.window.showInformationMessage(
`The generated context is approximately ${tokenCount} tokens.`
);
}
}
);

// Register a new command for creating context for the open file
let disposableOpenFile = vscode.commands.registerCommand(
'gpt-context-generator.createGPTFriendlyContextForOpenFile',
async () => {
if (!vscode.workspace.workspaceFolders || !vscode.window.activeTextEditor) {
vscode.window.showErrorMessage('Please open a workspace and a file to use this extension.');
return;
}

const workspacePath = vscode.workspace.workspaceFolders[0].uri.fsPath;
const openFilePath = vscode.window.activeTextEditor.document.uri.fsPath;
const gptContext = await createGPTFriendlyContextForOpenFile(workspacePath, openFilePath);

const gptContextDocument = await vscode.workspace.openTextDocument({
content: gptContext,
language: 'plaintext',
});

await vscode.window.showTextDocument(gptContextDocument, vscode.ViewColumn.One);

const tokenCount = estimateTokenCount(gptContext);
if (tokenCount > 8000) {
vscode.window.showWarningMessage(
`The generated context is approximately ${tokenCount} tokens, which is greater than 8000 tokens.`
);
} else {
vscode.window.showInformationMessage(
`The generated context is approximately ${tokenCount} tokens.`
);
}
}
);

context.subscriptions.push(disposableOpenFile);

context.subscriptions.push(disposable);
}

async function createGPTFriendlyContextForOpenFile(workspacePath: string, openFilePath: string): Promise<string> {
const gitIgnorePath = path.join(workspacePath, '.gitignore');
const ignoreFilter = ignoreFactory();

if (fs.existsSync(gitIgnorePath)) {
const gitIgnoreContent = fs.readFileSync(gitIgnorePath).toString();
ignoreFilter.add(gitIgnoreContent);
}

const gptContext: string[] = [];

// Add the content of the currently open file
const openFileContent = fs.readFileSync(openFilePath).toString();
const openFileRelPath = path.relative(workspacePath, openFilePath);
gptContext.push(`File: ${openFileRelPath}\n\n${openFileContent}\n\n`);

// Helper function to extract import paths from a file's content
const extractImports = (content: string): string[] => {
const regex = /import\s+.*\s+from\s+['"](.*)['"];/g;
const imports: string[] = [];
let match: RegExpExecArray | null;

while ((match = regex.exec(content)) !== null) {
imports.push(match[1]);
}

return imports;
};

const imports = extractImports(openFileContent);
for (const importPath of imports) {
if (!importPath.endsWith('.css') && !importPath.endsWith('.scss')) {
const absoluteImportPath = path.resolve(path.dirname(openFilePath), `${importPath}.ts`);
const relImportPath = path.relative(workspacePath, absoluteImportPath);
if (!ignoreFilter.ignores(relImportPath) && fs.existsSync(absoluteImportPath)) {
const importedFileContent = fs.readFileSync(absoluteImportPath).toString();
gptContext.push(`File: ${relImportPath}\n\n${importedFileContent}\n\n`);
}
}
}

return gptContext.join('\n');
async function createGPTFriendlyContext(workspacePath: string): Promise<string> {
const gitIgnorePath = path.join(workspacePath, '.gitignore');
const ignoreFilter = ignoreFactory();

if (fs.existsSync(gitIgnorePath)) {
const gitIgnoreContent = fs.readFileSync(gitIgnorePath).toString();
ignoreFilter.add(gitIgnoreContent);
}

const gptContext: string[] = [];

const processDirectory = async (dir: string) => {
const files = fs.readdirSync(dir);

for (const file of files) {
const filePath = path.join(dir, file);
const relFilePath = path.relative(workspacePath, filePath);

if (ignoreFilter.ignores(relFilePath)) {
continue;
}

const fileStat = fs.lstatSync(filePath);

if (fileStat.isDirectory()) {
await processDirectory(filePath);
} else if (fileStat.isFile()) {
const fileExtension = path.extname(filePath).toLowerCase();
const allowedExtensions = ['.ts', '.tsx', '.js', '.jsx'];

if (allowedExtensions.includes(fileExtension)) {
const fileContent = fs.readFileSync(filePath).toString();
gptContext.push(`File: ${relFilePath}\n\n${fileContent}\n\n`);
}
}
}
};

await processDirectory(workspacePath);
return gptContext.join('\n');
}

async function createGPTFriendlyContextForOpenFile(
workspacePath: string,
openFilePath: string
): Promise<string> {
const gitIgnorePath = path.join(workspacePath, '.gitignore');
const ignoreFilter = ignoreFactory();

if (fs.existsSync(gitIgnorePath)) {
const gitIgnoreContent = fs.readFileSync(gitIgnorePath).toString();
ignoreFilter.add(gitIgnoreContent);
}

const gptContext: string[] = [];

// Add the content of the currently open file
const openFileContent = fs.readFileSync(openFilePath).toString();
const openFileRelPath = path.relative(workspacePath, openFilePath);
gptContext.push(`File: ${openFileRelPath}\n\n${openFileContent}\n\n`);

// Helper function to extract import paths from a file's content
const extractImports = (content: string): string[] => {
const regex = /import\s+.*\s+from\s+['"](.*)['"];/g;
const imports: string[] = [];
let match: RegExpExecArray | null;

while ((match = regex.exec(content)) !== null) {
imports.push(match[1]);
}

return imports;
};

const imports = extractImports(openFileContent);
for (const importPath of imports) {
if (!importPath.endsWith('.css') && !importPath.endsWith('.scss')) {
const absoluteImportPath = path.resolve(path.dirname(openFilePath), `${importPath}.ts`);
const relImportPath = path.relative(workspacePath, absoluteImportPath);
if (!ignoreFilter.ignores(relImportPath) && fs.existsSync(absoluteImportPath)) {
const importedFileContent = fs.readFileSync(absoluteImportPath).toString();
gptContext.push(`File: ${relImportPath}\n\n${importedFileContent}\n\n`);
}
}
}

return gptContext.join('\n');
}

function estimateTokenCount(text: string): number {
const whitespace = /\s+/g;
const words = text.trim().split(whitespace);
return words.length;
const encoded = encode(text);
return encoded.length;
}

// This method is called when your extension is deactivated
export function deactivate() {}
export function deactivate() {}
Loading

0 comments on commit 38832ed

Please sign in to comment.