From f331ee948bb8ee1d7be31955db7b3ac31135c1ab Mon Sep 17 00:00:00 2001 From: Brad Baker Date: Wed, 15 Sep 2021 18:20:29 +1000 Subject: [PATCH 1/3] This adds a maximum number of tokens to be parse in queries by default --- .../parser/GraphqlAntlrToLanguage.java | 4 ++ .../parser/ParseCancelledException.java | 12 +++++ src/main/java/graphql/parser/Parser.java | 39 ++++++++++++++- .../java/graphql/parser/ParserOptions.java | 49 +++++++++++++++++++ .../java/graphql/schema/idl/SchemaParser.java | 33 +++++++++++-- .../groovy/graphql/parser/ParserTest.groovy | 33 +++++++++++++ .../schema/idl/SchemaParserTest.groovy | 21 ++++++++ 7 files changed, 185 insertions(+), 6 deletions(-) create mode 100644 src/main/java/graphql/parser/ParseCancelledException.java diff --git a/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java b/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java index c146b0924e..b1518ec83b 100644 --- a/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java +++ b/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java @@ -99,6 +99,10 @@ public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiS this.parserOptions = parserOptions == null ? ParserOptions.getDefaultParserOptions() : parserOptions; } + public ParserOptions getParserOptions() { + return parserOptions; + } + //MARKER START: Here GraphqlOperation.g4 specific methods begin diff --git a/src/main/java/graphql/parser/ParseCancelledException.java b/src/main/java/graphql/parser/ParseCancelledException.java new file mode 100644 index 0000000000..c416c12504 --- /dev/null +++ b/src/main/java/graphql/parser/ParseCancelledException.java @@ -0,0 +1,12 @@ +package graphql.parser; + +import graphql.PublicApi; +import graphql.language.SourceLocation; + +@PublicApi +public class ParseCancelledException extends InvalidSyntaxException { + + public ParseCancelledException(String msg, SourceLocation sourceLocation, String offendingToken) { + super(sourceLocation, msg, null, offendingToken, null); + } +} diff --git a/src/main/java/graphql/parser/Parser.java b/src/main/java/graphql/parser/Parser.java index 2e79b92666..6272fe8153 100644 --- a/src/main/java/graphql/parser/Parser.java +++ b/src/main/java/graphql/parser/Parser.java @@ -5,6 +5,7 @@ import graphql.language.Node; import graphql.language.SourceLocation; import graphql.language.Value; +import graphql.parser.antlr.GraphqlBaseListener; import graphql.parser.antlr.GraphqlLexer; import graphql.parser.antlr.GraphqlParser; import org.antlr.v4.runtime.BaseErrorListener; @@ -16,6 +17,8 @@ import org.antlr.v4.runtime.Recognizer; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.atn.PredictionMode; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.TerminalNode; import java.io.IOException; import java.io.Reader; @@ -144,7 +147,7 @@ public Document parseDocument(Reader reader, ParserOptions parserOptions) throws return parseDocumentImpl(reader, parserOptions); } - private Document parseDocumentImpl(Reader reader, ParserOptions parserOptions) throws InvalidSyntaxException { + private Document parseDocumentImpl(Reader reader, ParserOptions parserOptions) throws InvalidSyntaxException, ParseCancelledException { BiFunction nodeFunction = (parser, toLanguage) -> { GraphqlParser.DocumentContext documentContext = parser.document(); Document doc = toLanguage.createDocument(documentContext); @@ -188,7 +191,7 @@ private Node parseImpl(Reader reader, BiFunction recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) { SourceLocation sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, line, charPositionInLine); String preview = AntlrHelper.createPreview(multiSourceReader, line); - throw new InvalidSyntaxException(sourceLocation, "Invalid syntax: " + msg, preview, null, null); + throw new InvalidSyntaxException(sourceLocation, msg, preview, null, null); } }); @@ -206,6 +209,13 @@ public void syntaxError(Recognizer recognizer, Object offendingSymbol, int if (toLanguage == null) { toLanguage = getAntlrToLanguage(tokens, multiSourceReader, parserOptions); } + + setupParserListener(multiSourceReader, parser, toLanguage); + + + // + // parsing starts ...... now! + // Object[] contextAndNode = nodeFunction.apply(parser, toLanguage); ParserRuleContext parserRuleContext = (ParserRuleContext) contextAndNode[0]; Node node = (Node) contextAndNode[1]; @@ -227,6 +237,31 @@ public void syntaxError(Recognizer recognizer, Object offendingSymbol, int return node; } + private void setupParserListener(MultiSourceReader multiSourceReader, GraphqlParser parser, GraphqlAntlrToLanguage toLanguage) { + int maxTokens = toLanguage.getParserOptions().getMaxTokens(); + // prevent a billion laugh attacks by restricting how many tokens we allow + ParseTreeListener listener = new GraphqlBaseListener() { + int count = 0; + + @Override + public void visitTerminal(TerminalNode node) { + count++; + if (count > maxTokens) { + String msg = String.format("More than %d parse tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens); + SourceLocation sourceLocation = null; + String offendingToken = null; + if (node.getSymbol() != null) { + offendingToken = node.getText(); + sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, node.getSymbol().getLine(), node.getSymbol().getCharPositionInLine()); + } + + throw new ParseCancelledException(msg, sourceLocation, offendingToken); + } + } + }; + parser.addParseListener(listener); + } + /** * Allows you to override the ANTLR to AST code. * diff --git a/src/main/java/graphql/parser/ParserOptions.java b/src/main/java/graphql/parser/ParserOptions.java index dbf1dc4763..cfd1be903c 100644 --- a/src/main/java/graphql/parser/ParserOptions.java +++ b/src/main/java/graphql/parser/ParserOptions.java @@ -3,15 +3,30 @@ import graphql.Assert; import graphql.PublicApi; +import java.util.function.Consumer; + /** * Options that control how the {@link Parser} behaves. */ @PublicApi public class ParserOptions { + /** + * An graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn + * memory representing a document that wont ever execute. To prevent this for most users, graphql-java + * set this value to 15000. ANTLR parsing time is linear to the number of tokens presented. The more you + * allow the longer it takes. + * + * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this + * JVM wide. + */ + public static int MAX_QUERY_TOKENS = 15000; + private static ParserOptions defaultJvmParserOptions = newParserOptions() .captureIgnoredChars(false) .captureSourceLocation(true) + .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java + .build(); /** @@ -50,10 +65,12 @@ public static void setDefaultParserOptions(ParserOptions options) { private final boolean captureIgnoredChars; private final boolean captureSourceLocation; + private final int maxTokens; private ParserOptions(Builder builder) { this.captureIgnoredChars = builder.captureIgnoredChars; this.captureSourceLocation = builder.captureSourceLocation; + this.maxTokens = builder.maxTokens; } /** @@ -79,6 +96,23 @@ public boolean isCaptureSourceLocation() { return captureSourceLocation; } + /** + * An graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn + * memory representing a document that wont ever execute. To prevent this you can set a maximum number of parse + * tokens that will be accepted before an exception is thrown and the parsing is stopped. + * + * @return the maximum number of raw tokens the parser will accept, after which an exception will be thrown. + */ + public int getMaxTokens() { + return maxTokens; + } + + public ParserOptions transform(Consumer builderConsumer) { + Builder builder = new Builder(this); + builderConsumer.accept(builder); + return builder.build(); + } + public static Builder newParserOptions() { return new Builder(); } @@ -87,6 +121,16 @@ public static class Builder { private boolean captureIgnoredChars = false; private boolean captureSourceLocation = true; + private int maxTokens = MAX_QUERY_TOKENS; + + Builder() { + } + + Builder(ParserOptions parserOptions) { + this.captureIgnoredChars = parserOptions.captureIgnoredChars; + this.captureSourceLocation = parserOptions.captureSourceLocation; + this.maxTokens = parserOptions.maxTokens; + } public Builder captureIgnoredChars(boolean captureIgnoredChars) { this.captureIgnoredChars = captureIgnoredChars; @@ -98,6 +142,11 @@ public Builder captureSourceLocation(boolean captureSourceLocation) { return this; } + public Builder maxTokens(int maxTokens) { + this.maxTokens = maxTokens; + return this; + } + public ParserOptions build() { return new ParserOptions(this); } diff --git a/src/main/java/graphql/schema/idl/SchemaParser.java b/src/main/java/graphql/schema/idl/SchemaParser.java index 586782bc36..e49c47aaa3 100644 --- a/src/main/java/graphql/schema/idl/SchemaParser.java +++ b/src/main/java/graphql/schema/idl/SchemaParser.java @@ -8,13 +8,14 @@ import graphql.language.SDLDefinition; import graphql.parser.InvalidSyntaxException; import graphql.parser.Parser; +import graphql.parser.ParserOptions; import graphql.schema.idl.errors.NonSDLDefinitionError; import graphql.schema.idl.errors.SchemaProblem; -import java.io.InputStream; -import java.io.InputStreamReader; import java.io.File; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import java.nio.file.Files; @@ -71,8 +72,22 @@ public TypeDefinitionRegistry parse(InputStream inputStream) throws SchemaProble * @throws SchemaProblem if there are problems compiling the schema definitions */ public TypeDefinitionRegistry parse(Reader reader) throws SchemaProblem { + return parse(reader, null); + } + + /** + * Parse a reader of schema definitions and create a {@link TypeDefinitionRegistry} + * + * @param reader the reader to parse + * @param parserOptions the parse options to use while parsing + * + * @return registry of type definitions + * + * @throws SchemaProblem if there are problems compiling the schema definitions + */ + public TypeDefinitionRegistry parse(Reader reader, ParserOptions parserOptions) throws SchemaProblem { try (Reader input = reader) { - return parseImpl(input); + return parseImpl(input, parserOptions); } catch (IOException e) { throw new RuntimeException(e); } @@ -92,9 +107,19 @@ public TypeDefinitionRegistry parse(String schemaInput) throws SchemaProblem { } public TypeDefinitionRegistry parseImpl(Reader schemaInput) { + // why it this public - (head shake) + return parseImpl(schemaInput, null); + } + + private TypeDefinitionRegistry parseImpl(Reader schemaInput, ParserOptions parseOptions) { try { + if (parseOptions == null) { + // for SDL we dont stop how many parser tokens there are - its not the attack vector + // to be prevented compared to queries + parseOptions = ParserOptions.getDefaultParserOptions().transform(opts -> opts.maxTokens(Integer.MAX_VALUE)); + } Parser parser = new Parser(); - Document document = parser.parseDocument(schemaInput); + Document document = parser.parseDocument(schemaInput, parseOptions); return buildRegistry(document); } catch (InvalidSyntaxException e) { diff --git a/src/test/groovy/graphql/parser/ParserTest.groovy b/src/test/groovy/graphql/parser/ParserTest.groovy index fe817a1b90..3662390ccc 100644 --- a/src/test/groovy/graphql/parser/ParserTest.groovy +++ b/src/test/groovy/graphql/parser/ParserTest.groovy @@ -1,6 +1,7 @@ package graphql.parser +import graphql.TestUtil import graphql.language.Argument import graphql.language.ArrayValue import graphql.language.AstComparator @@ -1071,4 +1072,36 @@ triple3 : """edge cases \\""" "" " \\"" \\" edge cases""" document.getSourceLocation() == SourceLocation.EMPTY document.getDefinitions()[0].getSourceLocation() == SourceLocation.EMPTY } + + def "a billion laughs attack will be prevented by default"() { + def lol = "@lol" * 10000 // two tokens = 20000+ tokens + def text = "query { f $lol }" + when: + Parser.parse(text) + + then: + def e = thrown(ParseCancelledException) + e.getMessage().contains("parsing has been cancelled") + + when: "integration test to prove it cancels by default" + + def sdl = """type Query { f : ID} """ + def graphQL = TestUtil.graphQL(sdl).build() + def er = graphQL.execute(text) + then: + er.errors.size() == 1 + er.errors[0].message.contains("parsing has been cancelled") + } + + def "they can shoot themselves if they want to with large documents"() { + def lol = "@lol" * 10000 // two tokens = 20000+ tokens + def text = "query { f $lol }" + + def options = ParserOptions.newParserOptions().maxTokens(30000).build() + when: + def doc = new Parser().parseDocument(text, options) + + then: + doc != null + } } diff --git a/src/test/groovy/graphql/schema/idl/SchemaParserTest.groovy b/src/test/groovy/graphql/schema/idl/SchemaParserTest.groovy index 8bd5caacf3..0dd515e1ae 100644 --- a/src/test/groovy/graphql/schema/idl/SchemaParserTest.groovy +++ b/src/test/groovy/graphql/schema/idl/SchemaParserTest.groovy @@ -4,6 +4,7 @@ import graphql.language.EnumTypeDefinition import graphql.language.InterfaceTypeDefinition import graphql.language.ObjectTypeDefinition import graphql.language.ScalarTypeDefinition +import graphql.parser.ParserOptions import graphql.schema.idl.errors.SchemaProblem import spock.lang.Specification import spock.lang.Unroll @@ -338,5 +339,25 @@ class SchemaParserTest extends Specification { schemaProblem.getErrors()[2].getMessage().contains("OperationDefinition") } + def "large schema files can be parsed - there is no limit"() { + def sdl = "type Query {\n" + for (int i = 0; i < 30000; i++) { + sdl += " f" + i + " : ID\n" + } + sdl += "}" + + when: + def typeDefinitionRegistry = new SchemaParser().parse(sdl) + then: + typeDefinitionRegistry != null + + when: "options are used they will be respected" + def options = ParserOptions.defaultParserOptions.transform({ it.maxTokens(100) }) + new SchemaParser().parse(new StringReader(sdl), options) + then: + def e = thrown(SchemaProblem) + e.errors[0].message.contains("parsing has been cancelled") + + } } From b6878dbad389d9ad5a9300e36fcc2c101a76f6e9 Mon Sep 17 00:00:00 2001 From: Brad Baker Date: Wed, 15 Sep 2021 18:40:45 +1000 Subject: [PATCH 2/3] Fixed test --- src/test/groovy/graphql/parser/ParserTest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/groovy/graphql/parser/ParserTest.groovy b/src/test/groovy/graphql/parser/ParserTest.groovy index 3662390ccc..34f641d313 100644 --- a/src/test/groovy/graphql/parser/ParserTest.groovy +++ b/src/test/groovy/graphql/parser/ParserTest.groovy @@ -797,7 +797,7 @@ triple3 : """edge cases \\""" "" " \\"" \\" edge cases""" println document then: def e = thrown(InvalidSyntaxException) - e.message.contains("Invalid syntax") + e.message.contains("Invalid Syntax") e.sourcePreview == input + "\n" e.location.line == 3 e.location.column == 20 From ddecadcb77fe1689f5738d8b1ea640fad19eee7f Mon Sep 17 00:00:00 2001 From: Brad Baker Date: Fri, 17 Sep 2021 14:57:40 +1000 Subject: [PATCH 3/3] Update src/main/java/graphql/parser/Parser.java Co-authored-by: Jordie <30464310+jord1e@users.noreply.github.com> --- src/main/java/graphql/parser/Parser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/graphql/parser/Parser.java b/src/main/java/graphql/parser/Parser.java index 6272fe8153..3a560344b4 100644 --- a/src/main/java/graphql/parser/Parser.java +++ b/src/main/java/graphql/parser/Parser.java @@ -247,7 +247,7 @@ private void setupParserListener(MultiSourceReader multiSourceReader, GraphqlPar public void visitTerminal(TerminalNode node) { count++; if (count > maxTokens) { - String msg = String.format("More than %d parse tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens); + String msg = String.format("More than %d parse tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens); SourceLocation sourceLocation = null; String offendingToken = null; if (node.getSymbol() != null) {