Skip to content

Commit

Permalink
fix(nahsra#456): fix default charset problem
Browse files Browse the repository at this point in the history
Signed-off-by: GodMeowIceSun <[email protected]>
  • Loading branch information
GodMeowIceSun committed May 31, 2024
1 parent 926f57e commit fcc3270
Showing 1 changed file with 20 additions and 1 deletion.
21 changes: 20 additions & 1 deletion src/main/java/org/owasp/validator/html/CleanResults.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,14 @@

package org.owasp.validator.html;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.stream.Collectors;

import org.w3c.dom.DocumentFragment;

/**
Expand Down Expand Up @@ -163,7 +167,22 @@ public DocumentFragment getCleanXMLDocumentFragment() {
* @see #getCleanHTML()
*/
public List<String> getErrorMessages() {
return errorMessages;
return getErrorMessages(Charset.defaultCharset());
}

/**
* Return a list of error messages -- but an empty list returned does not mean there was no attack
* present, due to the serialization and deserialization process automatically cleaning up some
* attacks. Only the output of the {@code getCleanHTML()} should be considered safe. See the
* project README file and {@code CleanResults} class documentation for further discussion.
*
* @param charset - The character set for returning error messages.
* @return An ArrayList object which contains the error messages, if any, after a scan.
* @see <a href="https://github.com/nahsra/antisamy/blob/main/README.md">Project README</a>
* @see #getCleanHTML()
*/
public List<String> getErrorMessages(Charset charset) {
return Collections.unmodifiableList(errorMessages.stream().map(i->new String(i.getBytes(StandardCharsets.ISO_8859_1), charset)).collect(Collectors.toList()));
}

/**
Expand Down

0 comments on commit fcc3270

Please sign in to comment.