WebSocket++  0.8.0-dev
C++ websocket client/server library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
parser.hpp
1 /*
2  * Copyright (c) 2014, Peter Thorson. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright
7  * notice, this list of conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright
9  * notice, this list of conditions and the following disclaimer in the
10  * documentation and/or other materials provided with the distribution.
11  * * Neither the name of the WebSocket++ Project nor the
12  * names of its contributors may be used to endorse or promote products
13  * derived from this software without specific prior written permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL PETER THORSON BE LIABLE FOR ANY
19  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 #ifndef HTTP_PARSER_HPP
29 #define HTTP_PARSER_HPP
30 
31 #include <algorithm>
32 #include <map>
33 #include <string>
34 #include <utility>
35 
36 #include <websocketpp/utilities.hpp>
37 #include <websocketpp/http/constants.hpp>
38 
39 namespace websocketpp {
40 namespace http {
41 namespace parser {
42 
43 namespace state {
44  enum value {
45  method,
46  resource,
47  version,
48  headers
49  };
50 }
51 
52 namespace body_encoding {
53  enum value {
54  unknown,
55  plain,
56  chunked
57  };
58 }
59 
60 typedef std::map<std::string, std::string, utility::ci_less > header_list;
61 
62 /// Read and return the next token in the stream
63 /**
64  * Read until a non-token character is found and then return the token and
65  * iterator to the next character to read
66  *
67  * @param begin An iterator to the beginning of the sequence
68  * @param end An iterator to the end of the sequence
69  * @return A pair containing the token and an iterator to the next character in
70  * the stream
71  */
72 template <typename InputIterator>
73 std::pair<std::string,InputIterator> extract_token(InputIterator begin,
74  InputIterator end)
75 {
76  InputIterator it = std::find_if(begin,end,&is_not_token_char);
77  return std::make_pair(std::string(begin,it),it);
78 }
79 
80 /// Read and return the next quoted string in the stream
81 /**
82  * Read a double quoted string starting at `begin`. The quotes themselves are
83  * stripped. The quoted value is returned along with an iterator to the next
84  * character to read
85  *
86  * @param begin An iterator to the beginning of the sequence
87  * @param end An iterator to the end of the sequence
88  * @return A pair containing the string read and an iterator to the next
89  * character in the stream
90  */
91 template <typename InputIterator>
92 std::pair<std::string,InputIterator> extract_quoted_string(InputIterator begin,
93  InputIterator end)
94 {
95  std::string s;
96 
97  if (end == begin) {
98  return std::make_pair(s,begin);
99  }
100 
101  if (*begin != '"') {
102  return std::make_pair(s,begin);
103  }
104 
105  InputIterator cursor = begin+1;
106  InputIterator marker = cursor;
107 
108  cursor = std::find(cursor,end,'"');
109 
110  while (cursor != end) {
111  // either this is the end or a quoted string
112  if (*(cursor-1) == '\\') {
113  s.append(marker,cursor-1);
114  s.append(1,'"');
115  ++cursor;
116  marker = cursor;
117  } else {
118  s.append(marker,cursor);
119  ++cursor;
120  return std::make_pair(s,cursor);
121  }
122 
123  cursor = std::find(cursor,end,'"');
124  }
125 
126  return std::make_pair("",begin);
127 }
128 
129 /// Read and discard one unit of linear whitespace
130 /**
131  * Read one unit of linear white space and return the iterator to the character
132  * afterwards. If `begin` is returned, no whitespace was extracted.
133  *
134  * @param begin An iterator to the beginning of the sequence
135  * @param end An iterator to the end of the sequence
136  * @return An iterator to the character after the linear whitespace read
137  */
138 template <typename InputIterator>
139 InputIterator extract_lws(InputIterator begin, InputIterator end) {
140  InputIterator it = begin;
141 
142  // strip leading CRLF
143  if (end-begin > 2 && *begin == '\r' && *(begin+1) == '\n' &&
144  is_whitespace_char(static_cast<unsigned char>(*(begin+2))))
145  {
146  it+=3;
147  }
148 
149  it = std::find_if(it,end,&is_not_whitespace_char);
150  return it;
151 }
152 
153 /// Read and discard linear whitespace
154 /**
155  * Read linear white space until a non-lws character is read and return an
156  * iterator to that character. If `begin` is returned, no whitespace was
157  * extracted.
158  *
159  * @param begin An iterator to the beginning of the sequence
160  * @param end An iterator to the end of the sequence
161  * @return An iterator to the character after the linear whitespace read
162  */
163 template <typename InputIterator>
164 InputIterator extract_all_lws(InputIterator begin, InputIterator end) {
165  InputIterator old_it;
166  InputIterator new_it = begin;
167 
168  do {
169  // Pull value from previous iteration
170  old_it = new_it;
171 
172  // look ahead another pass
173  new_it = extract_lws(old_it,end);
174  } while (new_it != end && old_it != new_it);
175 
176  return new_it;
177 }
178 
179 /// Extract HTTP attributes
180 /**
181  * An http attributes list is a semicolon delimited list of key value pairs in
182  * the format: *( ";" attribute "=" value ) where attribute is a token and value
183  * is a token or quoted string.
184  *
185  * Attributes extracted are appended to the supplied attributes list
186  * `attributes`.
187  *
188  * @param [in] begin An iterator to the beginning of the sequence
189  * @param [in] end An iterator to the end of the sequence
190  * @param [out] attributes A reference to the attributes list to append
191  * attribute/value pairs extracted to
192  * @return An iterator to the character after the last atribute read
193  */
194 template <typename InputIterator>
195 InputIterator extract_attributes(InputIterator begin, InputIterator end,
196  attribute_list & attributes)
197 {
198  InputIterator cursor;
199  bool first = true;
200 
201  if (begin == end) {
202  return begin;
203  }
204 
205  cursor = begin;
206  std::pair<std::string,InputIterator> ret;
207 
208  while (cursor != end) {
209  std::string name;
210 
211  cursor = http::parser::extract_all_lws(cursor,end);
212  if (cursor == end) {
213  break;
214  }
215 
216  if (first) {
217  // ignore this check for the very first pass
218  first = false;
219  } else {
220  if (*cursor == ';') {
221  // advance past the ';'
222  ++cursor;
223  } else {
224  // non-semicolon in this position indicates end end of the
225  // attribute list, break and return.
226  break;
227  }
228  }
229 
230  cursor = http::parser::extract_all_lws(cursor,end);
231  ret = http::parser::extract_token(cursor,end);
232 
233  if (ret.first.empty()) {
234  // error: expected a token
235  return begin;
236  } else {
237  name = ret.first;
238  cursor = ret.second;
239  }
240 
241  cursor = http::parser::extract_all_lws(cursor,end);
242  if (cursor == end || *cursor != '=') {
243  // if there is an equals sign, read the attribute value. Otherwise
244  // record a blank value and continue
245  attributes[name].clear();
246  continue;
247  }
248 
249  // advance past the '='
250  ++cursor;
251 
252  cursor = http::parser::extract_all_lws(cursor,end);
253  if (cursor == end) {
254  // error: expected a token or quoted string
255  return begin;
256  }
257 
258  ret = http::parser::extract_quoted_string(cursor,end);
259  if (ret.second != cursor) {
260  attributes[name] = ret.first;
261  cursor = ret.second;
262  continue;
263  }
264 
265  ret = http::parser::extract_token(cursor,end);
266  if (ret.first.empty()) {
267  // error : expected token or quoted string
268  return begin;
269  } else {
270  attributes[name] = ret.first;
271  cursor = ret.second;
272  }
273  }
274 
275  return cursor;
276 }
277 
278 /// Extract HTTP parameters
279 /**
280  * An http parameters list is a comma delimited list of tokens followed by
281  * optional semicolon delimited attributes lists.
282  *
283  * Parameters extracted are appended to the supplied parameters list
284  * `parameters`.
285  *
286  * @param [in] begin An iterator to the beginning of the sequence
287  * @param [in] end An iterator to the end of the sequence
288  * @param [out] parameters A reference to the parameters list to append
289  * paramter values extracted to
290  * @return An iterator to the character after the last parameter read
291  */
292 template <typename InputIterator>
293 InputIterator extract_parameters(InputIterator begin, InputIterator end,
294  parameter_list &parameters)
295 {
296  InputIterator cursor;
297 
298  if (begin == end) {
299  // error: expected non-zero length range
300  return begin;
301  }
302 
303  cursor = begin;
304  std::pair<std::string,InputIterator> ret;
305 
306  /**
307  * LWS
308  * token
309  * LWS
310  * *(";" method-param)
311  * LWS
312  * ,=loop again
313  */
314  while (cursor != end) {
315  std::string parameter_name;
316  attribute_list attributes;
317 
318  // extract any stray whitespace
319  cursor = http::parser::extract_all_lws(cursor,end);
320  if (cursor == end) {break;}
321 
322  ret = http::parser::extract_token(cursor,end);
323 
324  if (ret.first.empty()) {
325  // error: expected a token
326  return begin;
327  } else {
328  parameter_name = ret.first;
329  cursor = ret.second;
330  }
331 
332  // Safe break point, insert parameter with blank attributes and exit
333  cursor = http::parser::extract_all_lws(cursor,end);
334  if (cursor == end) {
335  //parameters[parameter_name] = attributes;
336  parameters.push_back(std::make_pair(parameter_name,attributes));
337  break;
338  }
339 
340  // If there is an attribute list, read it in
341  if (*cursor == ';') {
342  InputIterator acursor;
343 
344  ++cursor;
345  acursor = http::parser::extract_attributes(cursor,end,attributes);
346 
347  if (acursor == cursor) {
348  // attribute extraction ended in syntax error
349  return begin;
350  }
351 
352  cursor = acursor;
353  }
354 
355  // insert parameter into output list
356  //parameters[parameter_name] = attributes;
357  parameters.push_back(std::make_pair(parameter_name,attributes));
358 
359  cursor = http::parser::extract_all_lws(cursor,end);
360  if (cursor == end) {break;}
361 
362  // if next char is ',' then read another parameter, else stop
363  if (*cursor != ',') {
364  break;
365  }
366 
367  // advance past comma
368  ++cursor;
369 
370  if (cursor == end) {
371  // expected more bytes after a comma
372  return begin;
373  }
374  }
375 
376  return cursor;
377 }
378 
379 inline std::string strip_lws(std::string const & input) {
380  std::string::const_iterator begin = extract_all_lws(input.begin(),input.end());
381  if (begin == input.end()) {
382  return std::string();
383  }
384 
385  std::string::const_reverse_iterator rbegin = extract_all_lws(input.rbegin(),input.rend());
386  if (rbegin == input.rend()) {
387  return std::string();
388  }
389 
390  return std::string(begin,rbegin.base());
391 }
392 
393 /// Base HTTP parser
394 /**
395  * Includes methods and data elements common to all types of HTTP messages such
396  * as headers, versions, bodies, etc.
397  */
398 class parser {
399 public:
400  parser()
401  : m_header_bytes(0)
402  , m_body_bytes_needed(0)
403  , m_body_bytes_max(max_body_size)
404  , m_body_encoding(body_encoding::unknown) {}
405 
406  /// Get the HTTP version string
407  /**
408  * @return The version string for this parser
409  */
410  std::string const & get_version() const {
411  return m_version;
412  }
413 
414  /// Set HTTP parser Version
415  /**
416  * Input should be in format: HTTP/x.y where x and y are positive integers.
417  * @todo Does this method need any validation?
418  *
419  * @param [in] version The value to set the HTTP version to.
420  */
421  void set_version(std::string const & version);
422 
423  /// Get the value of an HTTP header
424  /**
425  * @todo Make this method case insensitive.
426  *
427  * @param [in] key The name/key of the header to get.
428  * @return The value associated with the given HTTP header key.
429  */
430  std::string const & get_header(std::string const & key) const;
431 
432  /// Extract an HTTP parameter list from a parser header.
433  /**
434  * If the header requested doesn't exist or exists and is empty the
435  * parameter list is valid (but empty).
436  *
437  * @param [in] key The name/key of the HTTP header to use as input.
438  * @param [out] out The parameter list to store extracted parameters in.
439  * @return Whether or not the input was a valid parameter list.
440  */
441  bool get_header_as_plist(std::string const & key, parameter_list & out)
442  const;
443 
444  /// Append a value to an existing HTTP header
445  /**
446  * This method will set the value of the HTTP header `key` with the
447  * indicated value. If a header with the name `key` already exists, `val`
448  * will be appended to the existing value.
449  *
450  * @todo Make this method case insensitive.
451  * @todo Should there be any restrictions on which keys are allowed?
452  * @todo Exception free varient
453  *
454  * @see replace_header
455  *
456  * @param [in] key The name/key of the header to append to.
457  * @param [in] val The value to append.
458  */
459  void append_header(std::string const & key, std::string const & val);
460 
461  /// Set a value for an HTTP header, replacing an existing value
462  /**
463  * This method will set the value of the HTTP header `key` with the
464  * indicated value. If a header with the name `key` already exists, `val`
465  * will replace the existing value.
466  *
467  * @todo Make this method case insensitive.
468  * @todo Should there be any restrictions on which keys are allowed?
469  * @todo Exception free varient
470  *
471  * @see append_header
472  *
473  * @param [in] key The name/key of the header to append to.
474  * @param [in] val The value to append.
475  */
476  void replace_header(std::string const & key, std::string const & val);
477 
478  /// Remove a header from the parser
479  /**
480  * Removes the header entirely from the parser. This is different than
481  * setting the value of the header to blank.
482  *
483  * @todo Make this method case insensitive.
484  *
485  * @param [in] key The name/key of the header to remove.
486  */
487  void remove_header(std::string const & key);
488 
489  /// Get HTTP body
490  /**
491  * Gets the body of the HTTP object
492  *
493  * @return The body of the HTTP message.
494  */
495  std::string const & get_body() const {
496  return m_body;
497  }
498 
499  /// Set body content
500  /**
501  * Set the body content of the HTTP response to the parameter string. Note
502  * set_body will also set the Content-Length HTTP header to the appropriate
503  * value. If you want the Content-Length header to be something else, do so
504  * via replace_header("Content-Length") after calling set_body()
505  *
506  * @param value String data to include as the body content.
507  */
508  void set_body(std::string const & value);
509 
510  /// Get body size limit
511  /**
512  * Retrieves the maximum number of bytes to parse & buffer before canceling
513  * a request.
514  *
515  * @since 0.5.0
516  *
517  * @return The maximum length of a message body.
518  */
519  size_t get_max_body_size() const {
520  return m_body_bytes_max;
521  }
522 
523  /// Set body size limit
524  /**
525  * Set the maximum number of bytes to parse and buffer before canceling a
526  * request.
527  *
528  * @since 0.5.0
529  *
530  * @param value The size to set the max body length to.
531  */
532  void set_max_body_size(size_t value) {
533  m_body_bytes_max = value;
534  }
535 
536  /// Extract an HTTP parameter list from a string.
537  /**
538  * @param [in] in The input string.
539  * @param [out] out The parameter list to store extracted parameters in.
540  * @return Whether or not the input was a valid parameter list.
541  */
542  bool parse_parameter_list(std::string const & in, parameter_list & out)
543  const;
544 protected:
545  /// Process a header line
546  /**
547  * @todo Update this method to be exception free.
548  *
549  * @param [in] begin An iterator to the beginning of the sequence.
550  * @param [in] end An iterator to the end of the sequence.
551  */
552  void process_header(std::string::iterator begin, std::string::iterator end);
553 
554  /// Prepare the parser to begin parsing body data
555  /**
556  * Inspects headers to determine if the message has a body that needs to be
557  * read. If so, sets up the necessary state, otherwise returns false. If
558  * this method returns true and loading the message body is desired call
559  * `process_body` until it returns zero bytes or an error.
560  *
561  * Must not be called until after all headers have been processed.
562  *
563  * @since 0.5.0
564  *
565  * @return True if more bytes are needed to load the body, false otherwise.
566  */
567  bool prepare_body();
568 
569  /// Process body data
570  /**
571  * Parses body data.
572  *
573  * @since 0.5.0
574  *
575  * @param [in] begin An iterator to the beginning of the sequence.
576  * @param [in] end An iterator to the end of the sequence.
577  * @return The number of bytes processed
578  */
579  size_t process_body(char const * buf, size_t len);
580 
581  /// Check if the parser is done parsing the body
582  /**
583  * Behavior before a call to `prepare_body` is undefined.
584  *
585  * @since 0.5.0
586  *
587  * @return True if the message body has been completed loaded.
588  */
589  bool body_ready() const {
590  return (m_body_bytes_needed == 0);
591  }
592 
593  /// Generate and return the HTTP headers as a string
594  /**
595  * Each headers will be followed by the \r\n sequence including the last one.
596  * A second \r\n sequence (blank header) is not appended by this method
597  *
598  * @return The HTTP headers as a string.
599  */
600  std::string raw_headers() const;
601 
602  std::string m_version;
603  header_list m_headers;
604 
605  size_t m_header_bytes;
606 
607  std::string m_body;
608  size_t m_body_bytes_needed;
609  size_t m_body_bytes_max;
610  body_encoding::value m_body_encoding;
611 };
612 
613 } // namespace parser
614 } // namespace http
615 } // namespace websocketpp
616 
617 #include <websocketpp/http/impl/parser.hpp>
618 
619 #endif // HTTP_PARSER_HPP
size_t get_max_body_size() const
Get body size limit.
Definition: parser.hpp:519
bool get_header_as_plist(std::string const &key, parameter_list &out) const
Extract an HTTP parameter list from a parser header.
Definition: parser.hpp:55
std::string const & get_body() const
Get HTTP body.
Definition: parser.hpp:495
void set_body(std::string const &value)
Set body content.
Definition: parser.hpp:91
bool parse_parameter_list(std::string const &in, parameter_list &out) const
Extract an HTTP parameter list from a string.
Definition: parser.hpp:107
size_t process_body(char const *buf, size_t len)
Process body data.
Definition: parser.hpp:145
HTTP handling support.
Definition: constants.hpp:39
bool prepare_body()
Prepare the parser to begin parsing body data.
Definition: parser.hpp:119
void process_header(std::string::iterator begin, std::string::iterator end)
Process a header line.
Definition: parser.hpp:161
std::string const & get_header(std::string const &key) const
Get the value of an HTTP header.
Definition: parser.hpp:45
std::string const & get_version() const
Get the HTTP version string.
Definition: parser.hpp:410
void append_header(std::string const &key, std::string const &val)
Append a value to an existing HTTP header.
Definition: parser.hpp:67
void handle_accept(connection_ptr con, lib::error_code const &ec)
Handler callback for start_accept.
bool body_ready() const
Check if the parser is done parsing the body.
Definition: parser.hpp:589
void replace_header(std::string const &key, std::string const &val)
Set a value for an HTTP header, replacing an existing value.
Definition: parser.hpp:81
std::string raw_headers() const
Generate and return the HTTP headers as a string.
Definition: parser.hpp:179
void set_version(std::string const &version)
Set HTTP parser Version.
Definition: parser.hpp:41
void remove_header(std::string const &key)
Remove a header from the parser.
Definition: parser.hpp:87
void set_max_body_size(size_t value)
Set body size limit.
Definition: parser.hpp:532