diff --git a/src/main/java/uk/ac/cam/cl/dtg/segue/etl/ContentIndexer.java b/src/main/java/uk/ac/cam/cl/dtg/segue/etl/ContentIndexer.java index c8248d44b6..674666d017 100644 --- a/src/main/java/uk/ac/cam/cl/dtg/segue/etl/ContentIndexer.java +++ b/src/main/java/uk/ac/cam/cl/dtg/segue/etl/ContentIndexer.java @@ -71,6 +71,7 @@ import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -962,7 +963,9 @@ public void recordContentTypeSpecificError(final String sha, final Content conte int sizeInKiloBytes = fileData.size() / 1024; this.registerContentProblem(content, String.format("Image (%s) is %s kB and exceeds file size warning limit!", f.getSrc(), sizeInKiloBytes), indexProblemCache); - } else if (f.getSrc().endsWith(".svg")) { + } + + if (f.getSrc().endsWith(".svg") && null != fileData) { // Check file contents for svg with either no defined width or a %-based width String fileContents = fileData.toString(); Pattern pattern = Pattern.compile("(]*width\\s*=\\s*\"\\d+%\"[^>]*>|]*width)[^>]*>)\\n?"); @@ -974,6 +977,15 @@ public void recordContentTypeSpecificError(final String sha, final Content conte ); } } + + Pattern badCharacters = Pattern.compile("[^a-zA-Z0-9-_.~/ ]"); + Matcher badCharactersMatcher = badCharacters.matcher(f.getSrc()); + if (badCharactersMatcher.find()) { + this.registerContentProblem(content, String.format("Filename '%s' contains illegal character '%s'.", + f.getSrc(), badCharactersMatcher.group()), + indexProblemCache + ); + } } // check that there is some alt text.