Skip to content

Commit

Permalink
feat : reduce pdf size to parse and adds integrationtest (#25)
Browse files Browse the repository at this point in the history
* feat : reduce pdf size to parse and adds integrationtest

* feat : adds validation

* feat : adds validation starter for validation to kick in

* feat : fixes assertions
  • Loading branch information
rajadilipkolli authored Mar 28, 2024
1 parent 04d7894 commit 14b4ed7
Show file tree
Hide file tree
Showing 15 changed files with 487 additions and 243 deletions.
78 changes: 78 additions & 0 deletions llm-rag-with-springai/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
<version>0.0.1-SNAPSHOT</version>
<name>llm-rag-with-springai</name>
<description>Demo project for Spring AI</description>

<properties>
<java.version>17</java.version>
<spring-ai.version>0.8.1</spring-ai.version>
<spotless.version>2.43.0</spotless.version>
</properties>

<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
Expand All @@ -26,6 +29,10 @@
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-validation</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-openai-spring-boot-starter</artifactId>
Expand Down Expand Up @@ -77,6 +84,11 @@
<artifactId>postgresql</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.rest-assured</groupId>
<artifactId>rest-assured</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<dependencyManagement>
Expand All @@ -101,6 +113,72 @@
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>com.diffplug.spotless</groupId>
<artifactId>spotless-maven-plugin</artifactId>
<version>${spotless.version}</version>
<configuration>
<java>
<palantirJavaFormat>
<version>2.40.0</version>
</palantirJavaFormat>
<importOrder />
<removeUnusedImports />
<formatAnnotations />
</java>
</configuration>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>${maven-surefire-plugin.version}</version>
<configuration>
<runOrder>alphabetical</runOrder>
<reportsDirectory>${junit.utReportFolder}</reportsDirectory>
<excludes>
<exclude>**/*IT*</exclude>
<exclude>**/*IntTest*</exclude>
<exclude>**/*IntegrationTest*</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
<version>${maven-failsafe-plugin.version}</version>
<configuration>
<classesDirectory>${project.build.outputDirectory}</classesDirectory>
<runOrder>alphabetical</runOrder>
<reportsDirectory>${junit.itReportFolder}</reportsDirectory>
<includes>
<include>**/*IT*</include>
<include>**/*IntTest*</include>
<include>**/*IntegrationTest*</include>
</includes>
</configuration>
<executions>
<execution>
<id>integration-test</id>
<goals>
<goal>integration-test</goal>
</goals>
</execution>
<execution>
<id>verify</id>
<goals>
<goal>verify</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
package com.learning.ai.llmragwithspringai;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

@SpringBootApplication
public class LlmRagWithSpringAiApplication {

public static void main(String[] args) {
SpringApplication.run(LlmRagWithSpringAiApplication.class, args);
}

}
package com.learning.ai.llmragwithspringai;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

@SpringBootApplication
public class LlmRagWithSpringAiApplication {

public static void main(String[] args) {
SpringApplication.run(LlmRagWithSpringAiApplication.class, args);
}
}
Original file line number Diff line number Diff line change
@@ -1,45 +1,46 @@
package com.learning.ai.llmragwithspringai.config;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.reader.ExtractedTextFormatter;
import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.ApplicationRunner;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.Resource;
import org.springframework.jdbc.core.JdbcTemplate;

@Configuration(proxyBeanMethods = false)
public class AppConfig {
private static final Logger log = LoggerFactory.getLogger(AppConfig.class);

@Value("classpath:medicaid-wa-faqs.pdf")
private Resource resource;

@Bean
TokenTextSplitter tokenTextSplitter() {
return new TokenTextSplitter();
}

@Bean
ApplicationRunner runner(VectorStore vectorStore, JdbcTemplate template, TokenTextSplitter tokenTextSplitter) {
return args -> {
log.info("Loading file(s) as Documents");
PdfDocumentReaderConfig config = PdfDocumentReaderConfig.builder()
.withPageExtractedTextFormatter(new ExtractedTextFormatter.Builder().withNumberOfBottomTextLinesToDelete(3)
.withNumberOfTopPagesToSkipBeforeDelete(1)
.build())
.withPagesPerDocument(1)
.build();
PagePdfDocumentReader pagePdfDocumentReader = new PagePdfDocumentReader(resource, config);
template.update("delete from vector_store");
vectorStore.accept(tokenTextSplitter.apply(pagePdfDocumentReader.get()));
};
}

}
package com.learning.ai.llmragwithspringai.config;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.reader.ExtractedTextFormatter;
import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.ApplicationRunner;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.Resource;
import org.springframework.jdbc.core.JdbcTemplate;

@Configuration(proxyBeanMethods = false)
public class AppConfig {
private static final Logger log = LoggerFactory.getLogger(AppConfig.class);

@Value("classpath:Rohit_Gurunath_Sharma.pdf")
private Resource resource;

@Bean
TokenTextSplitter tokenTextSplitter() {
return new TokenTextSplitter();
}

@Bean
ApplicationRunner runner(VectorStore vectorStore, JdbcTemplate template, TokenTextSplitter tokenTextSplitter) {
return args -> {
log.info("Loading file(s) as Documents");
PdfDocumentReaderConfig config = PdfDocumentReaderConfig.builder()
.withPageExtractedTextFormatter(new ExtractedTextFormatter.Builder()
.withNumberOfBottomTextLinesToDelete(3)
.withNumberOfTopPagesToSkipBeforeDelete(1)
.build())
.withPagesPerDocument(1)
.build();
PagePdfDocumentReader pagePdfDocumentReader = new PagePdfDocumentReader(resource, config);
template.update("delete from vector_store");
vectorStore.accept(tokenTextSplitter.apply(pagePdfDocumentReader.get()));
log.info("Loaded document to database.");
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package com.learning.ai.llmragwithspringai.config;

import jakarta.validation.ConstraintViolationException;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import org.springframework.core.Ordered;
import org.springframework.core.annotation.Order;
import org.springframework.http.HttpStatus;
import org.springframework.http.HttpStatusCode;
import org.springframework.http.ProblemDetail;
import org.springframework.validation.FieldError;
import org.springframework.web.bind.MethodArgumentNotValidException;
import org.springframework.web.bind.annotation.ControllerAdvice;
import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.ResponseStatus;

@Order(Ordered.HIGHEST_PRECEDENCE)
@ControllerAdvice
public class GlobalExceptionHandler {

@ExceptionHandler(MethodArgumentNotValidException.class)
@ResponseStatus(HttpStatus.BAD_REQUEST)
ProblemDetail onException(MethodArgumentNotValidException methodArgumentNotValidException) {
ProblemDetail problemDetail =
ProblemDetail.forStatusAndDetail(HttpStatusCode.valueOf(400), "Invalid request content.");
problemDetail.setTitle("Constraint Violation");
List<ApiValidationError> validationErrorsList = methodArgumentNotValidException.getAllErrors().stream()
.map(objectError -> {
FieldError fieldError = (FieldError) objectError;
return new ApiValidationError(
fieldError.getObjectName(),
fieldError.getField(),
fieldError.getRejectedValue(),
Objects.requireNonNull(fieldError.getDefaultMessage(), ""));
})
.sorted(Comparator.comparing(ApiValidationError::field))
.toList();
problemDetail.setProperty("violations", validationErrorsList);
return problemDetail;
}

@ExceptionHandler(ConstraintViolationException.class)
@ResponseStatus(HttpStatus.BAD_REQUEST)
ProblemDetail onException(ConstraintViolationException constraintViolationException) {
ProblemDetail problemDetail =
ProblemDetail.forStatusAndDetail(HttpStatusCode.valueOf(400), "Invalid request content.");
problemDetail.setTitle("Constraint Violation");
List<ApiValidationError> validationErrorsList = constraintViolationException.getConstraintViolations().stream()
.map(constraintViolation -> new ApiValidationError(
constraintViolation.getMessage(),
constraintViolation.getPropertyPath().toString(),
constraintViolation.getInvalidValue(),
constraintViolation.getMessage()))
.sorted(Comparator.comparing(ApiValidationError::field))
.toList();
problemDetail.setProperty("violations", validationErrorsList);
return problemDetail;
}

record ApiValidationError(String object, String field, Object rejectedValue, String message) {}
}
Loading

0 comments on commit 14b4ed7

Please sign in to comment.