webkit  2cdf99a9e3038c7e01b3c37e8ad903ecbe5eecf1
https://github.com/WebKit/webkit
encodedstream.h
Go to the documentation of this file.
1 // Copyright (C) 2011 Milo Yip
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef RAPIDJSON_ENCODEDSTREAM_H_
22 #define RAPIDJSON_ENCODEDSTREAM_H_
23 
24 #include "rapidjson.h"
25 
26 #ifdef __GNUC__
27 RAPIDJSON_DIAG_PUSH
28 RAPIDJSON_DIAG_OFF(effc++)
29 #endif
30 
32 
34 
38 template <typename Encoding, typename InputByteStream>
40  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
41 public:
42  typedef typename Encoding::Ch Ch;
43 
44  EncodedInputStream(InputByteStream& is) : is_(is) {
45  current_ = Encoding::TakeBOM(is_);
46  }
47 
48  Ch Peek() const { return current_; }
49  Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; }
50  size_t Tell() const { return is_.Tell(); }
51 
52  // Not implemented
53  void Put(Ch) { RAPIDJSON_ASSERT(false); }
54  void Flush() { RAPIDJSON_ASSERT(false); }
55  Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
56  size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
57 
58 private:
60  EncodedInputStream& operator=(const EncodedInputStream&);
61 
62  InputByteStream& is_;
63  Ch current_;
64 };
65 
67 
71 template <typename Encoding, typename OutputByteStream>
73  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
74 public:
75  typedef typename Encoding::Ch Ch;
76 
77  EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) {
78  if (putBOM)
79  Encoding::PutBOM(os_);
80  }
81 
82  void Put(Ch c) { Encoding::Put(os_, c); }
83  void Flush() { os_.Flush(); }
84 
85  // Not implemented
86  Ch Peek() const { RAPIDJSON_ASSERT(false); }
87  Ch Take() { RAPIDJSON_ASSERT(false); }
88  size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
89  Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
90  size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
91 
92 private:
94  EncodedOutputStream& operator=(const EncodedOutputStream&);
95 
96  OutputByteStream& os_;
97 };
98 
99 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
100 
102 
106 template <typename CharType, typename InputByteStream>
108  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
109 public:
110  typedef CharType Ch;
111 
113 
117  AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) {
118  DetectType();
119  static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
120  takeFunc_ = f[type_];
121  current_ = takeFunc_(*is_);
122  }
123 
124  UTFType GetType() const { return type_; }
125  bool HasBOM() const { return hasBOM_; }
126 
127  Ch Peek() const { return current_; }
128  Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; }
129  size_t Tell() const { return is_->Tell(); }
130 
131  // Not implemented
132  void Put(Ch) { RAPIDJSON_ASSERT(false); }
133  void Flush() { RAPIDJSON_ASSERT(false); }
134  Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
135  size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
136 
137 private:
139  AutoUTFInputStream& operator=(const AutoUTFInputStream&);
140 
141  // Detect encoding type with BOM or RFC 4627
142  void DetectType() {
143  // BOM (Byte Order Mark):
144  // 00 00 FE FF UTF-32BE
145  // FF FE 00 00 UTF-32LE
146  // FE FF UTF-16BE
147  // FF FE UTF-16LE
148  // EF BB BF UTF-8
149 
150  const unsigned char* c = (const unsigned char *)is_->Peek4();
151  if (!c)
152  return;
153 
154  unsigned bom = c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
155  hasBOM_ = false;
156  if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
157  else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
158  else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); }
159  else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); }
160  else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); }
161 
162  // RFC 4627: Section 3
163  // "Since the first two characters of a JSON text will always be ASCII
164  // characters [RFC0020], it is possible to determine whether an octet
165  // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
166  // at the pattern of nulls in the first four octets."
167  // 00 00 00 xx UTF-32BE
168  // 00 xx 00 xx UTF-16BE
169  // xx 00 00 00 UTF-32LE
170  // xx 00 xx 00 UTF-16LE
171  // xx xx xx xx UTF-8
172 
173  if (!hasBOM_) {
174  unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
175  switch (pattern) {
176  case 0x08: type_ = kUTF32BE; break;
177  case 0x0A: type_ = kUTF16BE; break;
178  case 0x01: type_ = kUTF32LE; break;
179  case 0x05: type_ = kUTF16LE; break;
180  case 0x0F: type_ = kUTF8; break;
181  default: break; // Use type defined by user.
182  }
183  }
184 
185  // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
186  switch (type_) {
187  case kUTF8:
188  // Do nothing
189  break;
190  case kUTF16LE:
191  case kUTF16BE:
192  RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
193  break;
194  case kUTF32LE:
195  case kUTF32BE:
196  RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
197  break;
198  default:
199  RAPIDJSON_ASSERT(false); // Invalid type
200  }
201  }
202 
203  typedef Ch (*TakeFunc)(InputByteStream& is);
204  InputByteStream* is_;
205  UTFType type_;
206  Ch current_;
207  TakeFunc takeFunc_;
208  bool hasBOM_;
209 };
210 
212 
216 template <typename CharType, typename OutputByteStream>
218  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
219 public:
220  typedef CharType Ch;
221 
223 
228  AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) {
229  // RUntime check whether the size of character type is sufficient. It only perform checks with assertion.
230  switch (type_) {
231  case kUTF16LE:
232  case kUTF16BE:
233  RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
234  break;
235  case kUTF32LE:
236  case kUTF32BE:
237  RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
238  break;
239  case kUTF8:
240  // Do nothing
241  break;
242  default:
243  RAPIDJSON_ASSERT(false); // Invalid UTFType
244  }
245 
246  static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) };
247  putFunc_ = f[type_];
248 
249  if (putBOM)
250  PutBOM();
251  }
252 
253  UTFType GetType() const { return type_; }
254 
255  void Put(Ch c) { putFunc_(*os_, c); }
256  void Flush() { os_->Flush(); }
257 
258  // Not implemented
259  Ch Peek() const { RAPIDJSON_ASSERT(false); }
260  Ch Take() { RAPIDJSON_ASSERT(false); }
261  size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
262  Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
263  size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
264 
265 private:
267  AutoUTFOutputStream& operator=(const AutoUTFOutputStream&);
268 
269  void PutBOM() {
270  typedef void (*PutBOMFunc)(OutputByteStream&);
271  static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) };
272  f[type_](*os_);
273  }
274 
275  typedef void (*PutFunc)(OutputByteStream&, Ch);
276 
277  OutputByteStream* os_;
278  UTFType type_;
279  PutFunc putFunc_;
280 };
281 
282 #undef RAPIDJSON_ENCODINGS_FUNC
283 
285 
286 #ifdef __GNUC__
287 RAPIDJSON_DIAG_POP
288 #endif
289 
290 #endif // RAPIDJSON_FILESTREAM_H_
#define RAPIDJSON_ENCODINGS_FUNC(x)
Definition: encodedstream.h:99
UTFType GetType() const
Definition: encodedstream.h:124
CharType Ch
Definition: encodedstream.h:220
UTFType
Runtime-specified UTF encoding type of a stream.
Definition: encodings.h:543
Ch Peek() const
Definition: encodedstream.h:86
AutoUTFInputStream(InputByteStream &is, UTFType type=kUTF8)
Constructor.
Definition: encodedstream.h:117
int c
Definition: cpp_unittests.cpp:275
Output byte stream wrapper with statically bound encoding.
Definition: encodedstream.h:72
Input stream wrapper with dynamically bound encoding and automatic encoding detection.
Definition: encodedstream.h:107
#define Ch(x, y, z)
Definition: sha256.c:217
UTF-32 big endian.
Definition: encodings.h:548
UTF-16 little endian.
Definition: encodings.h:545
UTF-8.
Definition: encodings.h:544
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:346
Ch Take()
Definition: encodedstream.h:260
size_t PutEnd(Ch *)
Definition: encodedstream.h:90
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition: rapidjson.h:91
UTF-16 big endian.
Definition: encodings.h:546
Ch Take()
Definition: encodedstream.h:49
size_t Tell() const
Definition: encodedstream.h:88
EncodedInputStream(InputByteStream &is)
Definition: encodedstream.h:44
void Put(Ch c)
Definition: encodedstream.h:82
Definition: GetPutInfo.h:232
void
Definition: AVFoundationCFSoftLinking.h:81
Encoding::Ch Ch
Definition: encodedstream.h:42
Ch * PutBegin()
Definition: encodedstream.h:55
size_t PutEnd(Ch *)
Definition: encodedstream.h:263
void Put(Ch)
Definition: encodedstream.h:53
size_t Tell() const
Definition: encodedstream.h:261
size_t PutEnd(Ch *)
Definition: encodedstream.h:135
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition: rapidjson.h:94
Ch Peek() const
Definition: encodedstream.h:127
Output stream wrapper with dynamically bound encoding and automatic encoding detection.
Definition: encodedstream.h:217
GLfloat f
Definition: gl2.h:417
Ch * PutBegin()
Definition: encodedstream.h:134
CharType Ch
Definition: encodedstream.h:110
AutoUTFOutputStream(OutputByteStream &os, UTFType type, bool putBOM)
Constructor.
Definition: encodedstream.h:228
void Flush()
Definition: encodedstream.h:133
EGLenum type
Definition: eglext.h:63
void Flush()
Definition: encodedstream.h:256
bool is(Ref< ArgType > &source)
Definition: Ref.h:220
EncodedOutputStream(OutputByteStream &os, bool putBOM=true)
Definition: encodedstream.h:77
UTFType GetType() const
Definition: encodedstream.h:253
common definitions and configuration
Ch Take()
Definition: encodedstream.h:87
#define false
Definition: float-mm.c:5
Ch Take()
Definition: encodedstream.h:128
bool HasBOM() const
Definition: encodedstream.h:125
void Flush()
Definition: encodedstream.h:83
UTF-32 little endian.
Definition: encodings.h:547
Ch * PutBegin()
Definition: encodedstream.h:89
Ch Peek() const
Definition: encodedstream.h:48
Input byte stream wrapper with a statically bound encoding.
Definition: encodedstream.h:39
void Flush()
Definition: encodedstream.h:54
Ch * PutBegin()
Definition: encodedstream.h:262
size_t Tell() const
Definition: encodedstream.h:129
void Put(Ch)
Definition: encodedstream.h:132
size_t Tell() const
Definition: encodedstream.h:50
Encoding::Ch Ch
Definition: encodedstream.h:75
size_t PutEnd(Ch *)
Definition: encodedstream.h:56
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:315
void Put(Ch c)
Definition: encodedstream.h:255
Ch Peek() const
Definition: encodedstream.h:259