webkit  2cdf99a9e3038c7e01b3c37e8ad903ecbe5eecf1
https://github.com/WebKit/webkit
json_stream_parser.h
Go to the documentation of this file.
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #ifndef GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
32 #define GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
33 
34 #include <stack>
35 #include <string>
36 
40 
41 namespace google {
42 namespace util {
43 class Status;
44 } // namespace util
45 
46 namespace protobuf {
47 namespace util {
48 namespace converter {
49 
50 class ObjectWriter;
51 
52 // A JSON parser that can parse a stream of JSON chunks rather than needing the
53 // entire JSON string up front. It is a modified version of the parser in
54 // //net/proto/json/json-parser.h that has been changed in the following ways:
55 // - Changed from recursion to an explicit stack to allow resumption
56 // - Added support for int64 and uint64 numbers
57 // - Removed support for octal and decimal escapes
58 // - Removed support for numeric keys
59 // - Removed support for functions (javascript)
60 // - Removed some lax-comma support (but kept trailing comma support)
61 // - Writes directly to an ObjectWriter rather than using subclassing
62 //
63 // Here is an example usage:
64 // JsonStreamParser parser(ow_.get());
65 // util::Status result = parser.Parse(chunk1);
66 // result.Update(parser.Parse(chunk2));
67 // result.Update(parser.FinishParse());
68 // GOOGLE_DCHECK(result.ok()) << "Failed to parse JSON";
69 //
70 // This parser is thread-compatible as long as only one thread is calling a
71 // Parse() method at a time.
73  public:
74  // Creates a JsonStreamParser that will write to the given ObjectWriter.
75  explicit JsonStreamParser(ObjectWriter* ow);
76  virtual ~JsonStreamParser();
77 
78  // Parses a UTF-8 encoded JSON string from a StringPiece.
80 
81 
82  // Finish parsing the JSON string.
83  util::Status FinishParse();
84 
85 
86  private:
87  enum TokenType {
88  BEGIN_STRING, // " or '
89  BEGIN_NUMBER, // - or digit
90  BEGIN_TRUE, // true
91  BEGIN_FALSE, // false
92  BEGIN_NULL, // null
93  BEGIN_OBJECT, // {
94  END_OBJECT, // }
95  BEGIN_ARRAY, // [
96  END_ARRAY, // ]
97  ENTRY_SEPARATOR, // :
98  VALUE_SEPARATOR, // ,
99  BEGIN_KEY, // letter, _, $ or digit. Must begin with non-digit
100  UNKNOWN // Unknown token or we ran out of the stream.
101  };
102 
103  enum ParseType {
104  VALUE, // Expects a {, [, true, false, null, string or number
105  OBJ_MID, // Expects a ',' or }
106  ENTRY, // Expects a key or }
107  ENTRY_MID, // Expects a :
108  ARRAY_VALUE, // Expects a value or ]
109  ARRAY_MID // Expects a ',' or ]
110  };
111 
112  // Holds the result of parsing a number
113  struct NumberResult {
114  enum Type { DOUBLE, INT, UINT };
115  Type type;
116  union {
117  double double_val;
118  int64 int_val;
119  uint64 uint_val;
120  };
121  };
122 
123  // Parses a single chunk of JSON, returning an error if the JSON was invalid.
124  util::Status ParseChunk(StringPiece json);
125 
126  // Runs the parser based on stack_ and p_, until the stack is empty or p_ runs
127  // out of data. If we unexpectedly run out of p_ we push the latest back onto
128  // the stack and return.
129  util::Status RunParser();
130 
131  // Parses a value from p_ and writes it to ow_.
132  // A value may be an object, array, true, false, null, string or number.
133  util::Status ParseValue(TokenType type);
134 
135  // Parses a string and writes it out to the ow_.
136  util::Status ParseString();
137 
138  // Parses a string, storing the result in parsed_.
139  util::Status ParseStringHelper();
140 
141  // This function parses unicode escape sequences in strings. It returns an
142  // error when there's a parsing error, either the size is not the expected
143  // size or a character is not a hex digit. When it returns str will contain
144  // what has been successfully parsed so far.
145  util::Status ParseUnicodeEscape();
146 
147  // Expects p_ to point to a JSON number, writes the number to the writer using
148  // the appropriate Render method based on the type of number.
149  util::Status ParseNumber();
150 
151  // Parse a number into a NumberResult, reporting an error if no number could
152  // be parsed. This method will try to parse into a uint64, int64, or double
153  // based on whether the number was positive or negative or had a decimal
154  // component.
155  util::Status ParseNumberHelper(NumberResult* result);
156 
157  // Handles a { during parsing of a value.
158  util::Status HandleBeginObject();
159 
160  // Parses from the ENTRY state.
161  util::Status ParseEntry(TokenType type);
162 
163  // Parses from the ENTRY_MID state.
164  util::Status ParseEntryMid(TokenType type);
165 
166  // Parses from the OBJ_MID state.
167  util::Status ParseObjectMid(TokenType type);
168 
169  // Handles a [ during parsing of a value.
170  util::Status HandleBeginArray();
171 
172  // Parses from the ARRAY_VALUE state.
173  util::Status ParseArrayValue(TokenType type);
174 
175  // Parses from the ARRAY_MID state.
176  util::Status ParseArrayMid(TokenType type);
177 
178  // Expects p_ to point to an unquoted literal
179  util::Status ParseTrue();
180  util::Status ParseFalse();
181  util::Status ParseNull();
182 
183  // Report a failure as a util::Status.
184  util::Status ReportFailure(StringPiece message);
185 
186  // Report a failure due to an UNKNOWN token type. We check if we hit the
187  // end of the stream and if we're finishing or not to detect what type of
188  // status to return in this case.
189  util::Status ReportUnknown(StringPiece message);
190 
191  // Advance p_ past all whitespace or until the end of the string.
192  void SkipWhitespace();
193 
194  // Advance p_ one UTF-8 character
195  void Advance();
196 
197  // Expects p_ to point to the beginning of a key.
198  util::Status ParseKey();
199 
200  // Return the type of the next token at p_.
201  TokenType GetNextTokenType();
202 
203  // The object writer to write parse events to.
204  ObjectWriter* ow_;
205 
206  // The stack of parsing we still need to do. When the stack runs empty we will
207  // have parsed a single value from the root (e.g. an object or list).
208  std::stack<ParseType> stack_;
209 
210  // Contains any leftover text from a previous chunk that we weren't able to
211  // fully parse, for example the start of a key or number.
212  string leftover_;
213 
214  // The current chunk of JSON being parsed. Primarily used for providing
215  // context during error reporting.
216  StringPiece json_;
217 
218  // A pointer within the current JSON being parsed, used to track location.
219  StringPiece p_;
220 
221  // Stores the last key read, as we separate parsing of keys and values.
222  StringPiece key_;
223 
224  // Storage for key_ if we need to keep ownership, for example between chunks
225  // or if the key was unescaped from a JSON string.
226  string key_storage_;
227 
228  // True during the FinishParse() call, so we know that any errors are fatal.
229  // For example an unterminated string will normally result in cancelling and
230  // trying during the next chunk, but during FinishParse() it is an error.
231  bool finishing_;
232 
233  // String we parsed during a call to ParseStringHelper().
234  StringPiece parsed_;
235 
236  // Storage for the string we parsed. This may be empty if the string was able
237  // to be parsed directly from the input.
238  string parsed_storage_;
239 
240  // The character that opened the string, either ' or ".
241  // A value of 0 indicates that string parsing is not in process.
242  char string_open_;
243 
244  // Storage for the chunk that are being parsed in ParseChunk().
245  string chunk_storage_;
246 
247  // Whether to allow non UTF-8 encoded input and replace invalid code points.
248  bool coerce_to_utf8_;
249 
251 };
252 
253 } // namespace converter
254 } // namespace util
255 } // namespace protobuf
256 
257 } // namespace google
258 #endif // GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
Definition: util.py:1
Definition: json_stream_parser.h:72
Definition: stringpiece.h:178
#define GOOGLE_DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName)
Definition: macros.h:45
void SkipWhitespace(InputStream &is)
Skip the JSON white spaces in a stream.
Definition: reader.h:253
unrestricted float DOUBLE
Definition: TestTypedefs.idl:71
Definition: __init__.py:1
uint64_t uint64
Definition: port.h:136
Definition: type.pb.h:133
EGLenum type
Definition: eglext.h:63
result
Definition: target-blank-opener-post-window.php:5
int64_t int64
Definition: port.h:131
#define LIBPROTOBUF_EXPORT
Definition: port.h:97
Definition: gflags_completions.h:115
Definition: XMLHttpRequest.idl:38
def Parse(text, message)
Definition: json_format.py:298
GLuint GLsizei const GLchar * message
Definition: gl2ext.h:137
Definition: status.h:69