webkit  2cdf99a9e3038c7e01b3c37e8ad903ecbe5eecf1
https://github.com/WebKit/webkit
text_format.h
Go to the documentation of this file.
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Utilities for printing and parsing protocol messages in a human-readable,
36 // text-based format.
37 
38 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
39 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
40 
41 #include <map>
42 #include <memory>
43 #ifndef _SHARED_PTR_H
45 #endif
46 #include <string>
47 #include <vector>
48 
52 
53 namespace google {
54 namespace protobuf {
55 
56 namespace io {
57  class ErrorCollector; // tokenizer.h
58 }
59 
60 // This class implements protocol buffer text format. Printing and parsing
61 // protocol messages in text format is useful for debugging and human editing
62 // of messages.
63 //
64 // This class is really a namespace that contains only static methods.
66  public:
67  // Outputs a textual representation of the given message to the given
68  // output stream.
69  static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
70 
71  // Print the fields in an UnknownFieldSet. They are printed by tag number
72  // only. Embedded messages are heuristically identified by attempting to
73  // parse them.
74  static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
75  io::ZeroCopyOutputStream* output);
76 
77  // Like Print(), but outputs directly to a string.
78  static bool PrintToString(const Message& message, string* output);
79 
80  // Like PrintUnknownFields(), but outputs directly to a string.
81  static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
82  string* output);
83 
84  // Outputs a textual representation of the value of the field supplied on
85  // the message supplied. For non-repeated fields, an index of -1 must
86  // be supplied. Note that this method will print the default value for a
87  // field if it is not set.
88  static void PrintFieldValueToString(const Message& message,
89  const FieldDescriptor* field,
90  int index,
91  string* output);
92 
93  // The default printer that converts scalar values from fields into
94  // their string representation.
95  // You can derive from this FieldValuePrinter if you want to have
96  // fields to be printed in a different way and register it at the
97  // Printer.
99  public:
101  virtual ~FieldValuePrinter();
102  virtual string PrintBool(bool val) const;
103  virtual string PrintInt32(int32 val) const;
104  virtual string PrintUInt32(uint32 val) const;
105  virtual string PrintInt64(int64 val) const;
106  virtual string PrintUInt64(uint64 val) const;
107  virtual string PrintFloat(float val) const;
108  virtual string PrintDouble(double val) const;
109  virtual string PrintString(const string& val) const;
110  virtual string PrintBytes(const string& val) const;
111  virtual string PrintEnum(int32 val, const string& name) const;
112  virtual string PrintFieldName(const Message& message,
113  const Reflection* reflection,
114  const FieldDescriptor* field) const;
115  virtual string PrintMessageStart(const Message& message,
116  int field_index,
117  int field_count,
118  bool single_line_mode) const;
119  virtual string PrintMessageEnd(const Message& message,
120  int field_index,
121  int field_count,
122  bool single_line_mode) const;
123 
124  private:
126  };
127 
128  // Class for those users which require more fine-grained control over how
129  // a protobuffer message is printed out.
131  public:
132  Printer();
133  ~Printer();
134 
135  // Like TextFormat::Print
136  bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
137  // Like TextFormat::PrintUnknownFields
138  bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
139  io::ZeroCopyOutputStream* output) const;
140  // Like TextFormat::PrintToString
141  bool PrintToString(const Message& message, string* output) const;
142  // Like TextFormat::PrintUnknownFieldsToString
143  bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
144  string* output) const;
145  // Like TextFormat::PrintFieldValueToString
146  void PrintFieldValueToString(const Message& message,
147  const FieldDescriptor* field,
148  int index,
149  string* output) const;
150 
151  // Adjust the initial indent level of all output. Each indent level is
152  // equal to two spaces.
153  void SetInitialIndentLevel(int indent_level) {
154  initial_indent_level_ = indent_level;
155  }
156 
157  // If printing in single line mode, then the entire message will be output
158  // on a single line with no line breaks.
159  void SetSingleLineMode(bool single_line_mode) {
160  single_line_mode_ = single_line_mode;
161  }
162 
164  return single_line_mode_;
165  }
166 
167  // If use_field_number is true, uses field number instead of field name.
168  void SetUseFieldNumber(bool use_field_number) {
169  use_field_number_ = use_field_number;
170  }
171 
172  // Set true to print repeated primitives in a format like:
173  // field_name: [1, 2, 3, 4]
174  // instead of printing each value on its own line. Short format applies
175  // only to primitive values -- i.e. everything except strings and
176  // sub-messages/groups.
177  void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
178  use_short_repeated_primitives_ = use_short_repeated_primitives;
179  }
180 
181  // Set true to output UTF-8 instead of ASCII. The only difference
182  // is that bytes >= 0x80 in string fields will not be escaped,
183  // because they are assumed to be part of UTF-8 multi-byte
184  // sequences. This will change the default FieldValuePrinter.
185  void SetUseUtf8StringEscaping(bool as_utf8);
186 
187  // Set the default FieldValuePrinter that is used for all fields that
188  // don't have a field-specific printer registered.
189  // Takes ownership of the printer.
190  void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
191 
192  // Sets whether we want to hide unknown fields or not.
193  // Usually unknown fields are printed in a generic way that includes the
194  // tag number of the field instead of field name. However, sometimes it
195  // is useful to be able to print the message without unknown fields (e.g.
196  // for the python protobuf version to maintain consistency between its pure
197  // python and c++ implementations).
198  void SetHideUnknownFields(bool hide) {
199  hide_unknown_fields_ = hide;
200  }
201 
202  // If print_message_fields_in_index_order is true, print fields of a proto
203  // message using the order defined in source code instead of the field
204  // number. By default, use the field number order.
206  bool print_message_fields_in_index_order) {
207  print_message_fields_in_index_order_ =
208  print_message_fields_in_index_order;
209  }
210 
211  // If expand==true, expand google.protobuf.Any payloads. The output
212  // will be of form
213  // [type_url] { <value_printed_in_text> }
214  //
215  // If expand==false, print Any using the default printer. The output will
216  // look like
217  // type_url: "<type_url>" value: "serialized_content"
218  void SetExpandAny(bool expand) {
219  expand_any_ = expand;
220  }
221 
222  // If non-zero, we truncate all string fields that are longer than this
223  // threshold. This is useful when the proto message has very long strings,
224  // e.g., dump of encoded image file.
225  //
226  // NOTE(hfgong): Setting a non-zero value breaks round-trip safe
227  // property of TextFormat::Printer. That is, from the printed message, we
228  // cannot fully recover the original string field any more.
230  const int64 truncate_string_field_longer_than) {
231  truncate_string_field_longer_than_ = truncate_string_field_longer_than;
232  }
233 
234  // Register a custom field-specific FieldValuePrinter for fields
235  // with a particular FieldDescriptor.
236  // Returns "true" if the registration succeeded, or "false", if there is
237  // already a printer for that FieldDescriptor.
238  // Takes ownership of the printer on successful registration.
239  bool RegisterFieldValuePrinter(const FieldDescriptor* field,
240  const FieldValuePrinter* printer);
241 
242  private:
243  // Forward declaration of an internal class used to print the text
244  // output to the OutputStream (see text_format.cc for implementation).
245  class TextGenerator;
246 
247  // Internal Print method, used for writing to the OutputStream via
248  // the TextGenerator class.
249  void Print(const Message& message,
250  TextGenerator& generator) const;
251 
252  // Print a single field.
253  void PrintField(const Message& message,
254  const Reflection* reflection,
255  const FieldDescriptor* field,
256  TextGenerator& generator) const;
257 
258  // Print a repeated primitive field in short form.
259  void PrintShortRepeatedField(const Message& message,
260  const Reflection* reflection,
261  const FieldDescriptor* field,
262  TextGenerator& generator) const;
263 
264  // Print the name of a field -- i.e. everything that comes before the
265  // ':' for a single name/value pair.
266  void PrintFieldName(const Message& message,
267  const Reflection* reflection,
268  const FieldDescriptor* field,
269  TextGenerator& generator) const;
270 
271  // Outputs a textual representation of the value of the field supplied on
272  // the message supplied or the default value if not set.
273  void PrintFieldValue(const Message& message,
274  const Reflection* reflection,
275  const FieldDescriptor* field,
276  int index,
277  TextGenerator& generator) const;
278 
279  // Print the fields in an UnknownFieldSet. They are printed by tag number
280  // only. Embedded messages are heuristically identified by attempting to
281  // parse them.
282  void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
283  TextGenerator& generator) const;
284 
285  bool PrintAny(const Message& message, TextGenerator& generator) const;
286 
287  int initial_indent_level_;
288 
289  bool single_line_mode_;
290 
291  bool use_field_number_;
292 
293  bool use_short_repeated_primitives_;
294 
295  bool hide_unknown_fields_;
296 
297  bool print_message_fields_in_index_order_;
298 
299  bool expand_any_;
300 
301  int64 truncate_string_field_longer_than_;
302 
303  google::protobuf::scoped_ptr<const FieldValuePrinter> default_field_value_printer_;
304  typedef map<const FieldDescriptor*,
305  const FieldValuePrinter*> CustomPrinterMap;
306  CustomPrinterMap custom_printers_;
307  };
308 
309  // Parses a text-format protocol message from the given input stream to
310  // the given message object. This function parses the human-readable format
311  // written by Print(). Returns true on success. The message is cleared first,
312  // even if the function fails -- See Merge() to avoid this behavior.
313  //
314  // Example input: "user {\n id: 123 extra { gender: MALE language: 'en' }\n}"
315  //
316  // One use for this function is parsing handwritten strings in test code.
317  // Another use is to parse the output from google::protobuf::Message::DebugString()
318  // (or ShortDebugString()), because these functions output using
319  // google::protobuf::TextFormat::Print().
320  //
321  // If you would like to read a protocol buffer serialized in the
322  // (non-human-readable) binary wire format, see
323  // google::protobuf::MessageLite::ParseFromString().
324  static bool Parse(io::ZeroCopyInputStream* input, Message* output);
325  // Like Parse(), but reads directly from a string.
326  static bool ParseFromString(const string& input, Message* output);
327 
328  // Like Parse(), but the data is merged into the given message, as if
329  // using Message::MergeFrom().
330  static bool Merge(io::ZeroCopyInputStream* input, Message* output);
331  // Like Merge(), but reads directly from a string.
332  static bool MergeFromString(const string& input, Message* output);
333 
334  // Parse the given text as a single field value and store it into the
335  // given field of the given message. If the field is a repeated field,
336  // the new value will be added to the end
337  static bool ParseFieldValueFromString(const string& input,
338  const FieldDescriptor* field,
339  Message* message);
340 
341  // Interface that TextFormat::Parser can use to find extensions.
342  // This class may be extended in the future to find more information
343  // like fields, etc.
345  public:
346  virtual ~Finder();
347 
348  // Try to find an extension of *message by fully-qualified field
349  // name. Returns NULL if no extension is known for this name or number.
350  virtual const FieldDescriptor* FindExtension(
351  Message* message,
352  const string& name) const = 0;
353  };
354 
355  // A location in the parsed text.
356  struct ParseLocation {
357  int line;
358  int column;
359 
360  ParseLocation() : line(-1), column(-1) {}
361  ParseLocation(int line_param, int column_param)
362  : line(line_param), column(column_param) {}
363  };
364 
365  // Data structure which is populated with the locations of each field
366  // value parsed from the text.
368  public:
369  ParseInfoTree();
370  ~ParseInfoTree();
371 
372  // Returns the parse location for index-th value of the field in the parsed
373  // text. If none exists, returns a location with line = -1. Index should be
374  // -1 for not-repeated fields.
375  ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
376 
377  // Returns the parse info tree for the given field, which must be a message
378  // type. The nested information tree is owned by the root tree and will be
379  // deleted when it is deleted.
380  ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
381  int index) const;
382 
383  private:
384  // Allow the text format parser to record information into the tree.
385  friend class TextFormat;
386 
387  // Records the starting location of a single value for a field.
388  void RecordLocation(const FieldDescriptor* field, ParseLocation location);
389 
390  // Create and records a nested tree for a nested message field.
391  ParseInfoTree* CreateNested(const FieldDescriptor* field);
392 
393  // Defines the map from the index-th field descriptor to its parse location.
394  typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap;
395 
396  // Defines the map from the index-th field descriptor to the nested parse
397  // info tree.
398  typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap;
399 
400  LocationMap locations_;
401  NestedMap nested_;
402 
404  };
405 
406  // For more control over parsing, use this class.
408  public:
409  Parser();
410  ~Parser();
411 
412  // Like TextFormat::Parse().
413  bool Parse(io::ZeroCopyInputStream* input, Message* output);
414  // Like TextFormat::ParseFromString().
415  bool ParseFromString(const string& input, Message* output);
416  // Like TextFormat::Merge().
417  bool Merge(io::ZeroCopyInputStream* input, Message* output);
418  // Like TextFormat::MergeFromString().
419  bool MergeFromString(const string& input, Message* output);
420 
421  // Set where to report parse errors. If NULL (the default), errors will
422  // be printed to stderr.
423  void RecordErrorsTo(io::ErrorCollector* error_collector) {
424  error_collector_ = error_collector;
425  }
426 
427  // Set how parser finds extensions. If NULL (the default), the
428  // parser will use the standard Reflection object associated with
429  // the message being parsed.
430  void SetFinder(Finder* finder) {
431  finder_ = finder;
432  }
433 
434  // Sets where location information about the parse will be written. If NULL
435  // (the default), then no location will be written.
437  parse_info_tree_ = tree;
438  }
439 
440  // Normally parsing fails if, after parsing, output->IsInitialized()
441  // returns false. Call AllowPartialMessage(true) to skip this check.
442  void AllowPartialMessage(bool allow) {
443  allow_partial_ = allow;
444  }
445 
446  // Allow field names to be matched case-insensitively.
447  // This is not advisable if there are fields that only differ in case, or
448  // if you want to enforce writing in the canonical form.
449  // This is 'false' by default.
450  void AllowCaseInsensitiveField(bool allow) {
451  allow_case_insensitive_field_ = allow;
452  }
453 
454  // Like TextFormat::ParseFieldValueFromString
455  bool ParseFieldValueFromString(const string& input,
456  const FieldDescriptor* field,
457  Message* output);
458 
459 
460  void AllowFieldNumber(bool allow) {
461  allow_field_number_ = allow;
462  }
463 
464  private:
465  // Forward declaration of an internal class used to parse text
466  // representations (see text_format.cc for implementation).
467  class ParserImpl;
468 
469  // Like TextFormat::Merge(). The provided implementation is used
470  // to do the parsing.
471  bool MergeUsingImpl(io::ZeroCopyInputStream* input,
472  Message* output,
473  ParserImpl* parser_impl);
474 
476  Finder* finder_;
477  ParseInfoTree* parse_info_tree_;
478  bool allow_partial_;
479  bool allow_case_insensitive_field_;
480  bool allow_unknown_field_;
481  bool allow_unknown_enum_;
482  bool allow_field_number_;
483  bool allow_relaxed_whitespace_;
484  bool allow_singular_overwrites_;
485  };
486 
487 
488  private:
489  // Hack: ParseInfoTree declares TextFormat as a friend which should extend
490  // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
491  // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
492  // helpers for ParserImpl to call methods of ParseInfoTree.
493  static inline void RecordLocation(ParseInfoTree* info_tree,
494  const FieldDescriptor* field,
496  static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
497  const FieldDescriptor* field);
498 
500 };
501 
502 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
503  const FieldDescriptor* field,
505  info_tree->RecordLocation(field, location);
506 }
507 
508 
509 inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
510  ParseInfoTree* info_tree, const FieldDescriptor* field) {
511  return info_tree->CreateNested(field);
512 }
513 
514 } // namespace protobuf
515 
516 } // namespace google
517 #endif // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
Definition: text_format.h:407
def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False, pointy_brackets=False, use_index_order=False, float_format=None)
Definition: text_format.py:154
void AllowCaseInsensitiveField(bool allow)
Definition: text_format.h:450
Definition: message.h:179
void WriteLocationsTo(ParseInfoTree *tree)
Definition: text_format.h:436
const FieldDescriptor * field
Definition: parser_unittest.cc:2279
Definition: message.h:401
void SetExpandAny(bool expand)
Definition: text_format.h:218
void SetFinder(Finder *finder)
Definition: text_format.h:430
ParseLocation(int line_param, int column_param)
Definition: text_format.h:361
#define GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TypeName)
Definition: macros.h:40
void SetPrintMessageFieldsInIndexOrder(bool print_message_fields_in_index_order)
Definition: text_format.h:205
typename detail::make_map< Ts... >::type map
Definition: Brigand.h:223
Definition: text_format.h:344
int line
Definition: text_format.h:357
void SetTruncateStringFieldLongerThan(const int64 truncate_string_field_longer_than)
Definition: text_format.h:229
bool IsInSingleLineMode()
Definition: text_format.h:163
Definition: text_format.h:356
def PrintFieldValue(field, value, out, indent=0, as_utf8=False, as_one_line=False, pointy_brackets=False, use_index_order=False, float_format=None)
Definition: text_format.py:164
#define output
Definition: wire_format_lite.h:418
Definition: descriptor.h:439
GLint location
Definition: gl2.h:455
int32_t int32
Definition: port.h:130
void AllowPartialMessage(bool allow)
Definition: text_format.h:442
void AllowFieldNumber(bool allow)
Definition: text_format.h:460
def Merge(text, message, allow_unknown_extension=False, allow_field_number=False)
Definition: text_format.py:349
Definition: zero_copy_stream.h:181
void SetUseFieldNumber(bool use_field_number)
Definition: text_format.h:168
GLuint index
Definition: gl2.h:383
uint32_t uint32
Definition: port.h:135
EGLImageKHR EGLint * name
Definition: eglext.h:851
ParseLocation()
Definition: text_format.h:360
Definition: __init__.py:1
struct node * tree
Definition: float-mm.c:92
uint64_t uint64
Definition: port.h:136
void RecordErrorsTo(io::ErrorCollector *error_collector)
Definition: text_format.h:423
void SetSingleLineMode(bool single_line_mode)
Definition: text_format.h:159
Definition: unknown_field_set.h:75
Definition: scoped_ptr.h:48
Definition: text_format.h:130
void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives)
Definition: text_format.h:177
int64_t int64
Definition: port.h:131
Definition: zero_copy_stream.h:124
Definition: generator.py:1
void SetInitialIndentLevel(int indent_level)
Definition: text_format.h:153
MockErrorCollector error_collector_
Definition: importer_unittest.cc:136
#define LIBPROTOBUF_EXPORT
Definition: port.h:97
Definition: gflags_completions.h:115
string input
Definition: tokenizer_unittest.cc:198
def Parse(text, message)
Definition: json_format.py:298
Definition: tokenizer.h:64
GLuint GLsizei const GLchar * message
Definition: gl2ext.h:137
void SetHideUnknownFields(bool hide)
Definition: text_format.h:198
MergeFromString
Definition: python_message.py:1090
GLuint GLsizei GLsizei GLfloat * val
Definition: gl2ext.h:3301
int column
Definition: text_format.h:358
Definition: text_format.h:367
Definition: text_format.h:65