WebSocket++  0.8.3-dev
C++ websocket client/server library
parser.hpp
1 /*
2  * Copyright (c) 2014, Peter Thorson. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright
7  * notice, this list of conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright
9  * notice, this list of conditions and the following disclaimer in the
10  * documentation and/or other materials provided with the distribution.
11  * * Neither the name of the WebSocket++ Project nor the
12  * names of its contributors may be used to endorse or promote products
13  * derived from this software without specific prior written permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL PETER THORSON BE LIABLE FOR ANY
19  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 #ifndef HTTP_PARSER_HPP
29 #define HTTP_PARSER_HPP
30 
31 #include <algorithm>
32 #include <map>
33 #include <string>
34 #include <utility>
35 
36 #include <websocketpp/utilities.hpp>
37 #include <websocketpp/http/constants.hpp>
38 
39 namespace websocketpp {
40 namespace http {
41 namespace parser {
42 
43 namespace state {
44  enum value {
45  method,
46  resource,
47  version,
48  headers
49  };
50 }
51 
52 namespace body_encoding {
53  enum value {
54  unknown,
55  plain,
56  chunked
57  };
58 }
59 
60 typedef std::map<std::string, std::string, utility::ci_less > header_list;
61 
62 /// Read and return the next token in the stream
63 /**
64  * Read until a non-token character is found and then return the token and
65  * iterator to the next character to read
66  *
67  * @param begin An iterator to the beginning of the sequence
68  * @param end An iterator to the end of the sequence
69  * @return A pair containing the token and an iterator to the next character in
70  * the stream
71  */
72 template <typename InputIterator>
73 std::pair<std::string,InputIterator> extract_token(InputIterator begin,
74  InputIterator end)
75 {
76  InputIterator it = std::find_if(begin,end,&is_not_token_char);
77  return std::make_pair(std::string(begin,it),it);
78 }
79 
80 /// Read and return the next quoted string in the stream
81 /**
82  * Read a double quoted string starting at `begin`. The quotes themselves are
83  * stripped. The quoted value is returned along with an iterator to the next
84  * character to read
85  *
86  * @param begin An iterator to the beginning of the sequence
87  * @param end An iterator to the end of the sequence
88  * @return A pair containing the string read and an iterator to the next
89  * character in the stream
90  */
91 template <typename InputIterator>
92 std::pair<std::string,InputIterator> extract_quoted_string(InputIterator begin,
93  InputIterator end)
94 {
95  std::string s;
96 
97  if (end == begin) {
98  return std::make_pair(s,begin);
99  }
100 
101  if (*begin != '"') {
102  return std::make_pair(s,begin);
103  }
104 
105  InputIterator cursor = begin+1;
106  InputIterator marker = cursor;
107 
108  cursor = std::find(cursor,end,'"');
109 
110  while (cursor != end) {
111  // either this is the end or a quoted string
112  if (*(cursor-1) == '\\') {
113  s.append(marker,cursor-1);
114  s.append(1,'"');
115  ++cursor;
116  marker = cursor;
117  } else {
118  s.append(marker,cursor);
119  ++cursor;
120  return std::make_pair(s,cursor);
121  }
122 
123  cursor = std::find(cursor,end,'"');
124  }
125 
126  return std::make_pair("",begin);
127 }
128 
129 /// Read and discard one unit of linear whitespace
130 /**
131  * Read one unit of linear white space and return the iterator to the character
132  * afterwards. If `begin` is returned, no whitespace was extracted.
133  *
134  * @param begin An iterator to the beginning of the sequence
135  * @param end An iterator to the end of the sequence
136  * @return An iterator to the character after the linear whitespace read
137  */
138 template <typename InputIterator>
139 InputIterator extract_lws(InputIterator begin, InputIterator end) {
140  InputIterator it = begin;
141 
142  // strip leading CRLF
143  if (end-begin > 2 && *begin == '\r' && *(begin+1) == '\n' &&
144  is_whitespace_char(static_cast<unsigned char>(*(begin+2))))
145  {
146  it+=3;
147  }
148 
149  it = std::find_if(it,end,&is_not_whitespace_char);
150  return it;
151 }
152 
153 /// Read and discard linear whitespace
154 /**
155  * Read linear white space until a non-lws character is read and return an
156  * iterator to that character. If `begin` is returned, no whitespace was
157  * extracted.
158  *
159  * @param begin An iterator to the beginning of the sequence
160  * @param end An iterator to the end of the sequence
161  * @return An iterator to the character after the linear whitespace read
162  */
163 template <typename InputIterator>
164 InputIterator extract_all_lws(InputIterator begin, InputIterator end) {
165  InputIterator old_it;
166  InputIterator new_it = begin;
167 
168  do {
169  // Pull value from previous iteration
170  old_it = new_it;
171 
172  // look ahead another pass
173  new_it = extract_lws(old_it,end);
174  } while (new_it != end && old_it != new_it);
175 
176  return new_it;
177 }
178 
179 /// Extract HTTP attributes
180 /**
181  * An http attributes list is a semicolon delimited list of key value pairs in
182  * the format: *( ";" attribute "=" value ) where attribute is a token and value
183  * is a token or quoted string.
184  *
185  * Attributes extracted are appended to the supplied attributes list
186  * `attributes`.
187  *
188  * @param [in] begin An iterator to the beginning of the sequence
189  * @param [in] end An iterator to the end of the sequence
190  * @param [out] attributes A reference to the attributes list to append
191  * attribute/value pairs extracted to
192  * @return An iterator to the character after the last atribute read
193  */
194 template <typename InputIterator>
195 InputIterator extract_attributes(InputIterator begin, InputIterator end,
196  attribute_list & attributes)
197 {
198  InputIterator cursor;
199  bool first = true;
200 
201  if (begin == end) {
202  return begin;
203  }
204 
205  cursor = begin;
206  std::pair<std::string,InputIterator> ret;
207 
208  while (cursor != end) {
209  std::string name;
210 
211  cursor = http::parser::extract_all_lws(cursor,end);
212  if (cursor == end) {
213  break;
214  }
215 
216  if (first) {
217  // ignore this check for the very first pass
218  first = false;
219  } else {
220  if (*cursor == ';') {
221  // advance past the ';'
222  ++cursor;
223  } else {
224  // non-semicolon in this position indicates end end of the
225  // attribute list, break and return.
226  break;
227  }
228  }
229 
230  cursor = http::parser::extract_all_lws(cursor,end);
231  ret = http::parser::extract_token(cursor,end);
232 
233  if (ret.first.empty()) {
234  // error: expected a token
235  return begin;
236  } else {
237  name = ret.first;
238  cursor = ret.second;
239  }
240 
241  cursor = http::parser::extract_all_lws(cursor,end);
242  if (cursor == end || *cursor != '=') {
243  // if there is an equals sign, read the attribute value. Otherwise
244  // record a blank value and continue
245  attributes[name].clear();
246  continue;
247  }
248 
249  // advance past the '='
250  ++cursor;
251 
252  cursor = http::parser::extract_all_lws(cursor,end);
253  if (cursor == end) {
254  // error: expected a token or quoted string
255  return begin;
256  }
257 
258  ret = http::parser::extract_quoted_string(cursor,end);
259  if (ret.second != cursor) {
260  attributes[name] = ret.first;
261  cursor = ret.second;
262  continue;
263  }
264 
265  ret = http::parser::extract_token(cursor,end);
266  if (ret.first.empty()) {
267  // error : expected token or quoted string
268  return begin;
269  } else {
270  attributes[name] = ret.first;
271  cursor = ret.second;
272  }
273  }
274 
275  return cursor;
276 }
277 
278 /// Extract HTTP parameters
279 /**
280  * An http parameters list is a comma delimited list of tokens followed by
281  * optional semicolon delimited attributes lists.
282  *
283  * Parameters extracted are appended to the supplied parameters list
284  * `parameters`.
285  *
286  * @param [in] begin An iterator to the beginning of the sequence
287  * @param [in] end An iterator to the end of the sequence
288  * @param [out] parameters A reference to the parameters list to append
289  * paramter values extracted to
290  * @return An iterator to the character after the last parameter read
291  */
292 template <typename InputIterator>
293 InputIterator extract_parameters(InputIterator begin, InputIterator end,
294  parameter_list &parameters)
295 {
296  InputIterator cursor;
297 
298  if (begin == end) {
299  // error: expected non-zero length range
300  return begin;
301  }
302 
303  cursor = begin;
304  std::pair<std::string,InputIterator> ret;
305 
306  /**
307  * LWS
308  * token
309  * LWS
310  * *(";" method-param)
311  * LWS
312  * ,=loop again
313  */
314  while (cursor != end) {
315  std::string parameter_name;
316  attribute_list attributes;
317 
318  // extract any stray whitespace
319  cursor = http::parser::extract_all_lws(cursor,end);
320  if (cursor == end) {break;}
321 
322  ret = http::parser::extract_token(cursor,end);
323 
324  if (ret.first.empty()) {
325  // error: expected a token
326  return begin;
327  } else {
328  parameter_name = ret.first;
329  cursor = ret.second;
330  }
331 
332  // Safe break point, insert parameter with blank attributes and exit
333  cursor = http::parser::extract_all_lws(cursor,end);
334  if (cursor == end) {
335  //parameters[parameter_name] = attributes;
336  parameters.push_back(std::make_pair(parameter_name,attributes));
337  break;
338  }
339 
340  // If there is an attribute list, read it in
341  if (*cursor == ';') {
342  InputIterator acursor;
343 
344  ++cursor;
345  acursor = http::parser::extract_attributes(cursor,end,attributes);
346 
347  if (acursor == cursor) {
348  // attribute extraction ended in syntax error
349  return begin;
350  }
351 
352  cursor = acursor;
353  }
354 
355  // insert parameter into output list
356  //parameters[parameter_name] = attributes;
357  parameters.push_back(std::make_pair(parameter_name,attributes));
358 
359  cursor = http::parser::extract_all_lws(cursor,end);
360  if (cursor == end) {break;}
361 
362  // if next char is ',' then read another parameter, else stop
363  if (*cursor != ',') {
364  break;
365  }
366 
367  // advance past comma
368  ++cursor;
369 
370  if (cursor == end) {
371  // expected more bytes after a comma
372  return begin;
373  }
374  }
375 
376  return cursor;
377 }
378 
379 inline std::string strip_lws(std::string const & input) {
380  std::string::const_iterator begin = extract_all_lws(input.begin(),input.end());
381  if (begin == input.end()) {
382  return std::string();
383  }
384 
385  std::string::const_reverse_iterator rbegin = extract_all_lws(input.rbegin(),input.rend());
386  if (rbegin == input.rend()) {
387  return std::string();
388  }
389 
390  return std::string(begin,rbegin.base());
391 }
392 
393 /// Base HTTP parser
394 /**
395  * Includes methods and data elements common to all types of HTTP messages such
396  * as headers, versions, bodies, etc.
397  */
398 class parser {
399 public:
400  parser()
401  : m_header_bytes(0)
402  , m_body_bytes_needed(0)
403  , m_body_bytes_max(max_body_size)
404  , m_body_encoding(body_encoding::unknown) {}
405 
406  /// Get the HTTP version string
407  /**
408  * @return The version string for this parser
409  */
410  std::string const & get_version() const {
411  return m_version;
412  }
413 
414  /// Set HTTP parser Version
415  /**
416  * Input should be in format: HTTP/x.y where x and y are positive integers.
417  * @todo Does this method need any validation?
418  *
419  * @param [in] version The value to set the HTTP version to.
420  */
421  void set_version(std::string const & version);
422 
423  /// Get the value of an HTTP header
424  /**
425  * Note: per HTTP specs header values are compared case insensitively.
426  *
427  * @param [in] key The name/key of the header to get.
428  * @return The value associated with the given HTTP header key.
429  */
430  std::string const & get_header(std::string const & key) const;
431 
432  /// Extract an HTTP parameter list from a parser header.
433  /**
434  * If the header requested doesn't exist or exists and is empty the
435  * parameter list is valid (but empty).
436  *
437  * @param [in] key The name/key of the HTTP header to use as input.
438  * @param [out] out The parameter list to store extracted parameters in.
439  * @return Whether or not the input was a valid parameter list.
440  */
441  bool get_header_as_plist(std::string const & key, parameter_list & out)
442  const;
443 
444  /// Return a list of all HTTP headers
445  /**
446  * Return a list of all HTTP headers
447  *
448  * @since 0.8.0
449  *
450  * @return A list of all HTTP headers
451  */
452  header_list const & get_headers() const;
453 
454  /// Append a value to an existing HTTP header
455  /**
456  * This method will set the value of the HTTP header `key` with the
457  * indicated value. If a header with the name `key` already exists, `val`
458  * will be appended to the existing value.
459  *
460  * Note: per HTTP specs header values are compared case insensitively.
461  *
462  * @todo Should there be any restrictions on which keys are allowed?
463  *
464  * @see replace_header
465  *
466  * @param [in] key The name/key of the header to append to.
467  * @param [in] val The value to append.
468  * @param [out] ec A status code describing the outcome of the operation.
469  */
470  void append_header(std::string const & key, std::string const & val,
471  lib::error_code & ec);
472 
473  /// Set a value for an HTTP header, replacing an existing value
474  /**
475  * This method will set the value of the HTTP header `key` with the
476  * indicated value. If a header with the name `key` already exists, `val`
477  * will replace the existing value.
478  *
479  * Note: per HTTP specs header values are compared case insensitively.
480  *
481  * @see append_header
482  *
483  * @param [in] key The name/key of the header to append to.
484  * @param [in] val The value to append.
485  * @param [out] ec A status code describing the outcome of the operation.
486  */
487  void replace_header(std::string const & key, std::string const & val,
488  lib::error_code & ec);
489 
490  /// Remove a header from the parser
491  /**
492  * Removes the header entirely from the parser. This is different than
493  * setting the value of the header to blank.
494  *
495  * Note: per HTTP specs header values are compared case insensitively.
496  *
497  * @param [in] key The name/key of the header to remove.
498  * @param [out] ec A status code describing the outcome of the operation.
499  */
500  void remove_header(std::string const & key, lib::error_code & ec);
501 
502  // todo exception varients for the above 3?
503 
504  /// Get HTTP body
505  /**
506  * Gets the body of the HTTP object
507  *
508  * @return The body of the HTTP message.
509  */
510  std::string const & get_body() const {
511  return m_body;
512  }
513 
514  /// Set body content
515  /**
516  * Set the body content of the HTTP response to the parameter string. Note
517  * set_body will also set the Content-Length HTTP header to the appropriate
518  * value. If you want the Content-Length header to be something else, do so
519  * via replace_header("Content-Length") after calling set_body()
520  *
521  * @param value String data to include as the body content.
522  * @param [out] ec A status code describing the outcome of the operation.
523  */
524  void set_body(std::string const & value, lib::error_code & ec);
525 
526  /// Get body size limit
527  /**
528  * Retrieves the maximum number of bytes to parse & buffer before canceling
529  * a request.
530  *
531  * @since 0.5.0
532  *
533  * @return The maximum length of a message body.
534  */
536  return m_body_bytes_max;
537  }
538 
539  /// Set body size limit
540  /**
541  * Set the maximum number of bytes to parse and buffer before canceling a
542  * request.
543  *
544  * @since 0.5.0
545  *
546  * @param value The size to set the max body length to.
547  */
548  void set_max_body_size(size_t value) {
549  m_body_bytes_max = value;
550  }
551 
552  /// Extract an HTTP parameter list from a string.
553  /**
554  * @param [in] in The input string.
555  * @param [out] out The parameter list to store extracted parameters in.
556  * @return Whether or not the input was a valid parameter list.
557  */
558  bool parse_parameter_list(std::string const & in, parameter_list & out)
559  const;
560 protected:
561  /// Process a header line
562  /**
563  * @todo Update this method to be exception free.
564  *
565  * @param [in] begin An iterator to the beginning of the sequence.
566  * @param [in] end An iterator to the end of the sequence.
567  */
568  void process_header(std::string::iterator begin, std::string::iterator end);
569 
570  /// Prepare the parser to begin parsing body data
571  /**
572  * Inspects headers to determine if the message has a body that needs to be
573  * read. If so, sets up the necessary state, otherwise returns false. If
574  * this method returns true and loading the message body is desired call
575  * `process_body` until it returns zero bytes or an error.
576  *
577  * Must not be called until after all headers have been processed.
578  *
579  * @since 0.5.0 (no parameters)
580  * @since 0.9.0 (the ec parameter)
581  *
582  * @param [out] ec A status code describing the outcome of the operation.
583  * @return True if more bytes are needed to load the body, false otherwise.
584  */
585  bool prepare_body(lib::error_code & ec);
586 
587  /// Process body data
588  /**
589  * Parses body data.
590  *
591  * @since 0.5.0
592  *
593  * @param [in] begin An iterator to the beginning of the sequence.
594  * @param [in] end An iterator to the end of the sequence.
595  * @return The number of bytes processed
596  */
597  size_t process_body(char const * buf, size_t len);
598 
599  /// Check if the parser is done parsing the body
600  /**
601  * Behavior before a call to `prepare_body` is undefined.
602  *
603  * @since 0.5.0
604  *
605  * @return True if the message body has been completed loaded.
606  */
607  bool body_ready() const {
608  return (m_body_bytes_needed == 0);
609  }
610 
611  /// Generate and return the HTTP headers as a string
612  /**
613  * Each headers will be followed by the \r\n sequence including the last one.
614  * A second \r\n sequence (blank header) is not appended by this method
615  *
616  * @return The HTTP headers as a string.
617  */
618  std::string raw_headers() const;
619 
620  std::string m_version;
621  header_list m_headers;
622 
623  size_t m_header_bytes;
624 
625  std::string m_body;
626  size_t m_body_bytes_needed;
627  size_t m_body_bytes_max;
628  body_encoding::value m_body_encoding;
629 };
630 
631 } // namespace parser
632 } // namespace http
633 } // namespace websocketpp
634 
635 #include <websocketpp/http/impl/parser.hpp>
636 
637 #endif // HTTP_PARSER_HPP
websocketpp::http::parser::parser::set_body
void set_body(std::string const &value, lib::error_code &ec)
Set body content.
Definition: parser.hpp:109
websocketpp::http::parser::parser::body_ready
bool body_ready() const
Check if the parser is done parsing the body.
Definition: parser.hpp:607
websocketpp::http::parser::parser::process_body
size_t process_body(char const *buf, size_t len)
Process body data.
Definition: parser.hpp:170
websocketpp::http::parser::parser::get_max_body_size
size_t get_max_body_size() const
Get body size limit.
Definition: parser.hpp:535
websocketpp::http::parser::parser::set_max_body_size
void set_max_body_size(size_t value)
Set body size limit.
Definition: parser.hpp:548
websocketpp::http::parser::parser::get_headers
header_list const & get_headers() const
Return a list of all HTTP headers.
Definition: parser.hpp:204
websocketpp::http::parser::parser::get_version
std::string const & get_version() const
Get the HTTP version string.
Definition: parser.hpp:410
websocketpp::http::parser::parser::get_header_as_plist
bool get_header_as_plist(std::string const &key, parameter_list &out) const
Extract an HTTP parameter list from a parser header.
Definition: parser.hpp:57
websocketpp::http::parser::parser::prepare_body
bool prepare_body(lib::error_code &ec)
Prepare the parser to begin parsing body data.
Definition: parser.hpp:144
websocketpp::versions_supported
static std::vector< int > const versions_supported(helper, helper+4)
Container that stores the list of protocol versions supported.
websocketpp::http
HTTP handling support.
Definition: constants.hpp:39
websocketpp::http::parser::parser::raw_headers
std::string raw_headers() const
Generate and return the HTTP headers as a string.
Definition: parser.hpp:208
websocketpp::http::parser::parser::append_header
void append_header(std::string const &key, std::string const &val, lib::error_code &ec)
Append a value to an existing HTTP header.
Definition: parser.hpp:69
websocketpp::http::parser::parser::get_body
std::string const & get_body() const
Get HTTP body.
Definition: parser.hpp:510
websocketpp::http::parser::parser::parse_parameter_list
bool parse_parameter_list(std::string const &in, parameter_list &out) const
Extract an HTTP parameter list from a string.
Definition: parser.hpp:132
websocketpp::http::parser::parser::remove_header
void remove_header(std::string const &key, lib::error_code &ec)
Remove a header from the parser.
Definition: parser.hpp:97
websocketpp::http::parser::parser::process_header
void process_header(std::string::iterator begin, std::string::iterator end)
Process a header line.
Definition: parser.hpp:186
websocketpp::http::parser::parser
Base HTTP parser.
Definition: parser.hpp:398
websocketpp::http::parser::parser::get_header
std::string const & get_header(std::string const &key) const
Get the value of an HTTP header.
Definition: parser.hpp:45
websocketpp::http::parser::parser::set_version
void set_version(std::string const &version)
Set HTTP parser Version.
Definition: parser.hpp:41
websocketpp::http::parser::parser::replace_header
void replace_header(std::string const &key, std::string const &val, lib::error_code &ec)
Set a value for an HTTP header, replacing an existing value.
Definition: parser.hpp:85