-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat : reduce pdf size to parse and adds integrationtest (#25)
* feat : reduce pdf size to parse and adds integrationtest * feat : adds validation * feat : adds validation starter for validation to kick in * feat : fixes assertions
- Loading branch information
1 parent
04d7894
commit 14b4ed7
Showing
15 changed files
with
487 additions
and
243 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
25 changes: 12 additions & 13 deletions
25
...ingai/src/main/java/com/learning/ai/llmragwithspringai/LlmRagWithSpringAiApplication.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,12 @@ | ||
package com.learning.ai.llmragwithspringai; | ||
|
||
import org.springframework.boot.SpringApplication; | ||
import org.springframework.boot.autoconfigure.SpringBootApplication; | ||
|
||
@SpringBootApplication | ||
public class LlmRagWithSpringAiApplication { | ||
|
||
public static void main(String[] args) { | ||
SpringApplication.run(LlmRagWithSpringAiApplication.class, args); | ||
} | ||
|
||
} | ||
package com.learning.ai.llmragwithspringai; | ||
|
||
import org.springframework.boot.SpringApplication; | ||
import org.springframework.boot.autoconfigure.SpringBootApplication; | ||
|
||
@SpringBootApplication | ||
public class LlmRagWithSpringAiApplication { | ||
|
||
public static void main(String[] args) { | ||
SpringApplication.run(LlmRagWithSpringAiApplication.class, args); | ||
} | ||
} |
91 changes: 46 additions & 45 deletions
91
llm-rag-with-springai/src/main/java/com/learning/ai/llmragwithspringai/config/AppConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,45 +1,46 @@ | ||
package com.learning.ai.llmragwithspringai.config; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.springframework.ai.reader.ExtractedTextFormatter; | ||
import org.springframework.ai.reader.pdf.PagePdfDocumentReader; | ||
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig; | ||
import org.springframework.ai.transformer.splitter.TokenTextSplitter; | ||
import org.springframework.ai.vectorstore.VectorStore; | ||
import org.springframework.beans.factory.annotation.Value; | ||
import org.springframework.boot.ApplicationRunner; | ||
import org.springframework.context.annotation.Bean; | ||
import org.springframework.context.annotation.Configuration; | ||
import org.springframework.core.io.Resource; | ||
import org.springframework.jdbc.core.JdbcTemplate; | ||
|
||
@Configuration(proxyBeanMethods = false) | ||
public class AppConfig { | ||
private static final Logger log = LoggerFactory.getLogger(AppConfig.class); | ||
|
||
@Value("classpath:medicaid-wa-faqs.pdf") | ||
private Resource resource; | ||
|
||
@Bean | ||
TokenTextSplitter tokenTextSplitter() { | ||
return new TokenTextSplitter(); | ||
} | ||
|
||
@Bean | ||
ApplicationRunner runner(VectorStore vectorStore, JdbcTemplate template, TokenTextSplitter tokenTextSplitter) { | ||
return args -> { | ||
log.info("Loading file(s) as Documents"); | ||
PdfDocumentReaderConfig config = PdfDocumentReaderConfig.builder() | ||
.withPageExtractedTextFormatter(new ExtractedTextFormatter.Builder().withNumberOfBottomTextLinesToDelete(3) | ||
.withNumberOfTopPagesToSkipBeforeDelete(1) | ||
.build()) | ||
.withPagesPerDocument(1) | ||
.build(); | ||
PagePdfDocumentReader pagePdfDocumentReader = new PagePdfDocumentReader(resource, config); | ||
template.update("delete from vector_store"); | ||
vectorStore.accept(tokenTextSplitter.apply(pagePdfDocumentReader.get())); | ||
}; | ||
} | ||
|
||
} | ||
package com.learning.ai.llmragwithspringai.config; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.springframework.ai.reader.ExtractedTextFormatter; | ||
import org.springframework.ai.reader.pdf.PagePdfDocumentReader; | ||
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig; | ||
import org.springframework.ai.transformer.splitter.TokenTextSplitter; | ||
import org.springframework.ai.vectorstore.VectorStore; | ||
import org.springframework.beans.factory.annotation.Value; | ||
import org.springframework.boot.ApplicationRunner; | ||
import org.springframework.context.annotation.Bean; | ||
import org.springframework.context.annotation.Configuration; | ||
import org.springframework.core.io.Resource; | ||
import org.springframework.jdbc.core.JdbcTemplate; | ||
|
||
@Configuration(proxyBeanMethods = false) | ||
public class AppConfig { | ||
private static final Logger log = LoggerFactory.getLogger(AppConfig.class); | ||
|
||
@Value("classpath:Rohit_Gurunath_Sharma.pdf") | ||
private Resource resource; | ||
|
||
@Bean | ||
TokenTextSplitter tokenTextSplitter() { | ||
return new TokenTextSplitter(); | ||
} | ||
|
||
@Bean | ||
ApplicationRunner runner(VectorStore vectorStore, JdbcTemplate template, TokenTextSplitter tokenTextSplitter) { | ||
return args -> { | ||
log.info("Loading file(s) as Documents"); | ||
PdfDocumentReaderConfig config = PdfDocumentReaderConfig.builder() | ||
.withPageExtractedTextFormatter(new ExtractedTextFormatter.Builder() | ||
.withNumberOfBottomTextLinesToDelete(3) | ||
.withNumberOfTopPagesToSkipBeforeDelete(1) | ||
.build()) | ||
.withPagesPerDocument(1) | ||
.build(); | ||
PagePdfDocumentReader pagePdfDocumentReader = new PagePdfDocumentReader(resource, config); | ||
template.update("delete from vector_store"); | ||
vectorStore.accept(tokenTextSplitter.apply(pagePdfDocumentReader.get())); | ||
log.info("Loaded document to database."); | ||
}; | ||
} | ||
} |
62 changes: 62 additions & 0 deletions
62
...ingai/src/main/java/com/learning/ai/llmragwithspringai/config/GlobalExceptionHandler.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package com.learning.ai.llmragwithspringai.config; | ||
|
||
import jakarta.validation.ConstraintViolationException; | ||
import java.util.Comparator; | ||
import java.util.List; | ||
import java.util.Objects; | ||
import org.springframework.core.Ordered; | ||
import org.springframework.core.annotation.Order; | ||
import org.springframework.http.HttpStatus; | ||
import org.springframework.http.HttpStatusCode; | ||
import org.springframework.http.ProblemDetail; | ||
import org.springframework.validation.FieldError; | ||
import org.springframework.web.bind.MethodArgumentNotValidException; | ||
import org.springframework.web.bind.annotation.ControllerAdvice; | ||
import org.springframework.web.bind.annotation.ExceptionHandler; | ||
import org.springframework.web.bind.annotation.ResponseStatus; | ||
|
||
@Order(Ordered.HIGHEST_PRECEDENCE) | ||
@ControllerAdvice | ||
public class GlobalExceptionHandler { | ||
|
||
@ExceptionHandler(MethodArgumentNotValidException.class) | ||
@ResponseStatus(HttpStatus.BAD_REQUEST) | ||
ProblemDetail onException(MethodArgumentNotValidException methodArgumentNotValidException) { | ||
ProblemDetail problemDetail = | ||
ProblemDetail.forStatusAndDetail(HttpStatusCode.valueOf(400), "Invalid request content."); | ||
problemDetail.setTitle("Constraint Violation"); | ||
List<ApiValidationError> validationErrorsList = methodArgumentNotValidException.getAllErrors().stream() | ||
.map(objectError -> { | ||
FieldError fieldError = (FieldError) objectError; | ||
return new ApiValidationError( | ||
fieldError.getObjectName(), | ||
fieldError.getField(), | ||
fieldError.getRejectedValue(), | ||
Objects.requireNonNull(fieldError.getDefaultMessage(), "")); | ||
}) | ||
.sorted(Comparator.comparing(ApiValidationError::field)) | ||
.toList(); | ||
problemDetail.setProperty("violations", validationErrorsList); | ||
return problemDetail; | ||
} | ||
|
||
@ExceptionHandler(ConstraintViolationException.class) | ||
@ResponseStatus(HttpStatus.BAD_REQUEST) | ||
ProblemDetail onException(ConstraintViolationException constraintViolationException) { | ||
ProblemDetail problemDetail = | ||
ProblemDetail.forStatusAndDetail(HttpStatusCode.valueOf(400), "Invalid request content."); | ||
problemDetail.setTitle("Constraint Violation"); | ||
List<ApiValidationError> validationErrorsList = constraintViolationException.getConstraintViolations().stream() | ||
.map(constraintViolation -> new ApiValidationError( | ||
constraintViolation.getMessage(), | ||
constraintViolation.getPropertyPath().toString(), | ||
constraintViolation.getInvalidValue(), | ||
constraintViolation.getMessage())) | ||
.sorted(Comparator.comparing(ApiValidationError::field)) | ||
.toList(); | ||
problemDetail.setProperty("violations", validationErrorsList); | ||
return problemDetail; | ||
} | ||
|
||
record ApiValidationError(String object, String field, Object rejectedValue, String message) {} | ||
} |
Oops, something went wrong.