-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat : expose endpoint to upload multiple documents #46
Merged
Merged
Changes from 3 commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
30 changes: 0 additions & 30 deletions
30
...pringai-ollama-llm/src/main/java/com/learning/ai/llmragwithspringai/config/AppConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,44 +1,14 @@ | ||
package com.learning.ai.llmragwithspringai.config; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.springframework.ai.reader.ExtractedTextFormatter; | ||
import org.springframework.ai.reader.pdf.PagePdfDocumentReader; | ||
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig; | ||
import org.springframework.ai.transformer.splitter.TokenTextSplitter; | ||
import org.springframework.ai.vectorstore.VectorStore; | ||
import org.springframework.beans.factory.annotation.Value; | ||
import org.springframework.boot.ApplicationRunner; | ||
import org.springframework.context.annotation.Bean; | ||
import org.springframework.context.annotation.Configuration; | ||
import org.springframework.core.io.Resource; | ||
|
||
@Configuration(proxyBeanMethods = false) | ||
public class AppConfig { | ||
private static final Logger LOGGER = LoggerFactory.getLogger(AppConfig.class); | ||
|
||
@Value("classpath:Rohit_Gurunath_Sharma.pdf") | ||
private Resource resource; | ||
|
||
@Bean | ||
TokenTextSplitter tokenTextSplitter() { | ||
return new TokenTextSplitter(); | ||
} | ||
|
||
@Bean | ||
ApplicationRunner runner(VectorStore vectorStore, TokenTextSplitter tokenTextSplitter) { | ||
return args -> { | ||
LOGGER.info("Loading file(s) as Documents"); | ||
PdfDocumentReaderConfig config = PdfDocumentReaderConfig.builder() | ||
.withPageExtractedTextFormatter(new ExtractedTextFormatter.Builder() | ||
.withNumberOfBottomTextLinesToDelete(3) | ||
.withNumberOfTopPagesToSkipBeforeDelete(1) | ||
.build()) | ||
.withPagesPerDocument(1) | ||
.build(); | ||
PagePdfDocumentReader pagePdfDocumentReader = new PagePdfDocumentReader(resource, config); | ||
vectorStore.accept(tokenTextSplitter.apply(pagePdfDocumentReader.get())); | ||
LOGGER.info("Loaded document to database."); | ||
}; | ||
} | ||
} |
36 changes: 36 additions & 0 deletions
36
...-llm/src/main/java/com/learning/ai/llmragwithspringai/controller/DataIndexController.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package com.learning.ai.llmragwithspringai.controller; | ||
|
||
import com.learning.ai.llmragwithspringai.service.DataIndexerService; | ||
import java.util.Map; | ||
import org.springframework.http.HttpStatus; | ||
import org.springframework.http.MediaType; | ||
import org.springframework.http.ResponseEntity; | ||
import org.springframework.web.bind.annotation.*; | ||
import org.springframework.web.multipart.MultipartFile; | ||
|
||
@RestController | ||
@RequestMapping("/api/data/v1/") | ||
public class DataIndexController { | ||
|
||
private final DataIndexerService dataIndexerService; | ||
|
||
public DataIndexController(DataIndexerService dataIndexerService) { | ||
this.dataIndexerService = dataIndexerService; | ||
} | ||
|
||
@PostMapping(value = "/upload", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) | ||
public ResponseEntity<String> load(@RequestPart("file") MultipartFile multipartFile) { | ||
try { | ||
this.dataIndexerService.loadData(multipartFile.getResource()); | ||
return ResponseEntity.ok("Data indexed successfully!"); | ||
} catch (Exception e) { | ||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR) | ||
.body("An error occurred while indexing data: " + e.getMessage()); | ||
} | ||
} | ||
|
||
@GetMapping("count") | ||
public Map<String, Long> count() { | ||
return Map.of("count", dataIndexerService.count()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
...lama-llm/src/main/java/com/learning/ai/llmragwithspringai/service/DataIndexerService.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package com.learning.ai.llmragwithspringai.service; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.springframework.ai.document.DocumentReader; | ||
import org.springframework.ai.reader.ExtractedTextFormatter; | ||
import org.springframework.ai.reader.JsonReader; | ||
import org.springframework.ai.reader.TextReader; | ||
import org.springframework.ai.reader.pdf.PagePdfDocumentReader; | ||
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig; | ||
import org.springframework.ai.transformer.splitter.TokenTextSplitter; | ||
import org.springframework.ai.vectorstore.VectorStore; | ||
import org.springframework.core.io.Resource; | ||
import org.springframework.stereotype.Service; | ||
|
||
@Service | ||
public class DataIndexerService { | ||
|
||
private static final Logger LOGGER = LoggerFactory.getLogger(DataIndexerService.class); | ||
|
||
private final TokenTextSplitter tokenTextSplitter; | ||
private final VectorStore vectorStore; | ||
|
||
public DataIndexerService(TokenTextSplitter tokenTextSplitter, VectorStore vectorStore) { | ||
this.tokenTextSplitter = tokenTextSplitter; | ||
this.vectorStore = vectorStore; | ||
} | ||
|
||
public void loadData(Resource documentResource) { | ||
DocumentReader documentReader = null; | ||
if (documentResource.getFilename() != null | ||
&& documentResource.getFilename().endsWith(".pdf")) { | ||
LOGGER.info("Loading PDF document"); | ||
PdfDocumentReaderConfig pdfDocumentReaderConfig = PdfDocumentReaderConfig.builder() | ||
.withPageExtractedTextFormatter(ExtractedTextFormatter.builder() | ||
.withNumberOfBottomTextLinesToDelete(3) | ||
.withNumberOfTopPagesToSkipBeforeDelete(1) | ||
.build()) | ||
.withPagesPerDocument(1) | ||
.build(); | ||
documentReader = new PagePdfDocumentReader(documentResource, pdfDocumentReaderConfig); | ||
} else if (documentResource.getFilename() != null | ||
&& documentResource.getFilename().endsWith(".txt")) { | ||
documentReader = new TextReader(documentResource); | ||
} else if (documentResource.getFilename() != null | ||
&& documentResource.getFilename().endsWith(".json")) { | ||
documentReader = new JsonReader(documentResource); | ||
} | ||
if (documentReader != null) { | ||
LOGGER.info("Loading text document to redis vector database"); | ||
vectorStore.accept(tokenTextSplitter.apply(documentReader.get())); | ||
LOGGER.info("Loaded document to redis vector database."); | ||
} | ||
} | ||
|
||
public long count() { | ||
return this.vectorStore.similaritySearch("*").size(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tip
Codebase Verification
The verification process has shown that the file "/Rohit_Gurunath_Sharma.pdf" does not exist in the 'src/test/resources' directory, which was assumed to be the correct location based on the context of a typical Java project structure. This suggests that there might be an issue with how the file path is handled or documented in the test setup. It is recommended to review the implementation of the
getPath
method to ensure it points to the correct location and that the necessary test files are properly placed in the project structure.getPath
method to confirm the intended file path.Analysis chain
Ensure the file path in
getPath
method is correctly handled.The verification script intended to check the existence of the file specified in the
getPath
method failed because the Java command could not be executed in the script's environment. This means we haven't yet verified whether the file path is correctly handled in the code.To proceed, I recommend running a revised script that doesn't rely on executing Java, but instead directly checks the file system based on expected outcomes from the
getPath
method, if such expected outcomes can be inferred from the codebase or documentation.Scripts Executed
The following scripts were executed for the analysis:
Script:
Length of output: 305
Script:
Length of output: 108