CustomParser.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 /* DEBUG: section 86 ESI processing */
10 
11 #include "squid.h"
12 #include "Debug.h"
13 #include "esi/CustomParser.h"
14 #include "fatal.h"
15 #include "libTrie/Trie.h"
16 #include "libTrie/TrieCharTransform.h"
17 
18 #include <vector>
19 
21 
23 
24 Trie *
26 {
27  if (SearchTrie)
28  return SearchTrie;
29 
30  SearchTrie = new Trie(new TrieCaseless);
31 
32  static const ESITAG_t ESITAG_value = ESITAG;
33 
34  assert (SearchTrie->add
35  ("<esi:",5,(void *)&ESITAG_value));
36 
37  static const ESITAG_t ESIENDTAG_value = ESIENDTAG;
38 
39  assert (SearchTrie->add
40  ("</esi:",6,(void *)&ESIENDTAG_value));
41 
42  static const ESITAG_t ESICOMMENT_value = ESICOMMENT;
43 
44  assert (SearchTrie->add
45  ("<!--",4,(void *)&ESICOMMENT_value));
46 
47  return SearchTrie;
48 }
49 
51  theClient(aClient),
52  lastTag(ESITAG)
53 {}
54 
56 {
57  theClient = NULL;
58 }
59 
60 char const *
61 ESICustomParser::findTag(char const *buffer, size_t bufferLength)
62 {
63  size_t myOffset (0);
64  ESITAG_t *resulttype = NULL;
65 
66  while (myOffset < bufferLength &&
67  (resulttype = static_cast<ESITAG_t *>(GetTrie()->findPrefix (buffer + myOffset, bufferLength - myOffset)))
68  == NULL)
69  ++myOffset;
70 
71  if (myOffset == bufferLength)
72  return NULL;
73 
74  debugs(86, 9, "ESICustomParser::findTag: found " << *resulttype);
75 
76  lastTag = *resulttype;
77 
78  return buffer + myOffset;
79 }
80 
81 bool
82 ESICustomParser::parse(char const *dataToParse, size_t const lengthOfData, bool const endOfStream)
83 {
84  debugs(86, 9, "ESICustomParser::parse: Appending data to internal buffer");
85  content.append (dataToParse, lengthOfData);
86 
87  if (!endOfStream) {
88  return true;
89  }
90 
91  size_t openESITags (0);
92  // TODO: convert to Tokenizer parse
93  // erring on the safe side for now. Probably rawContent would be ok too
94  // note that operations below do *X='\0' ... altering the 'const' buffer content.
95  char const *currentPos = content.c_str();
96  SBuf::size_type remainingCount = content.length();
97  char const *tag = NULL;
98 
99  while ((tag = findTag(currentPos, remainingCount))) {
100  if (tag - currentPos)
101  theClient->parserDefault (currentPos,tag - currentPos);
102 
103  switch (lastTag) {
104 
105  case ESITAG: {
106  ++openESITags;
107  char *tagEnd = strchr(const_cast<char *>(tag), '>');
108 
109  if (!tagEnd) {
110  error = "Could not find end ('>') of tag";
111  return false;
112  }
113 
114  if (tagEnd - tag > (ssize_t)remainingCount) {
115  error = "Tag ends beyond the parse buffer.";
116  return false;
117  }
118 
119  if (*(tagEnd - 1) == '/')
120  --openESITags;
121 
122  char * endofName = strpbrk(const_cast<char *>(tag), w_space);
123 
124  if (endofName > tagEnd)
125  endofName = const_cast<char *>(tagEnd);
126 
127  *endofName = '\0';
128 
129  *tagEnd = '\0';
130 
131  std::vector<char *>attributes;
132 
133  char *attribute = const_cast<char *>(endofName + 1);
134 
135  while (attribute > tag && attribute < tagEnd) {
136  /* leading spaces */
137 
138  while (attribute < tagEnd && (xisspace(*attribute) || (*attribute == '/')))
139  ++attribute;
140 
141  if (! (attribute < tagEnd))
142  break;
143 
144  /* attribute name */
145  attributes.push_back(attribute);
146 
147  char *nextSpace = strpbrk(attribute, w_space);
148 
149  char *equals = strchr(attribute, '=');
150 
151  if (!equals) {
152  error = "Missing attribute value.";
153  return false;
154  }
155 
156  if (nextSpace && nextSpace < equals)
157  *nextSpace = '\0';
158  else
159  *equals = '\0';
160 
161  ++equals;
162 
163  while (equals < tagEnd && xisspace(*equals))
164  ++equals;
165 
166  char sep = *equals;
167 
168  if (sep != '\'' && sep != '"') {
169  error = "Unknown identifier (";
170  error.append (sep);
171  error.append (")");
172  return false;
173  }
174 
175  char *value = equals + 1;
176  char *end = strchr(value, sep);
177 
178  if (!end) {
179  error = "Missing attribute ending separator (";
180  error.append(sep);
181  error.append(")");
182  return false;
183  }
184  attributes.push_back(value);
185  *end = '\0';
186  attribute = end + 1;
187  }
188 
189  // TODO: after c++11, replace &attributes.front() with attributes.data()
190  theClient->start (tag + 1, const_cast<const char **>(&attributes.front()), attributes.size() >> 1);
191  /* TODO: attributes */
192 
193  if (*(tagEnd - 1) == '/')
194  theClient->end (tag + 1);
195 
196  remainingCount -= tagEnd - currentPos + 1;
197 
198  currentPos = tagEnd + 1;
199  }
200 
201  break;
202 
203  case ESIENDTAG: {
204  if (!openESITags)
205  return false;
206 
207  char const *tagEnd = strchr(tag, '>');
208 
209  if (!tagEnd)
210  return false;
211 
212  if (tagEnd - tag > (ssize_t)remainingCount)
213  return false;
214 
215  char * endofName = strpbrk(const_cast<char *>(tag), w_space);
216 
217  if (endofName > tagEnd)
218  endofName = const_cast<char *>(tagEnd);
219 
220  *endofName = '\0';
221 
222  theClient->end (tag + 2);
223 
224  --openESITags;
225 
226  remainingCount -= tagEnd - currentPos + 1;
227 
228  currentPos = tagEnd + 1;
229  }
230 
231  break;
232 
233  case ESICOMMENT: {
234  /* Further optimisation potential:
235  * 1) recognize end comments for esi and don't callback on
236  * comments.
237  * 2) provide the comment length to the caller.
238  */
239  /* Comments must not be nested, without CDATA
240  * and we don't support CDATA
241  */
242  char *commentEnd = strstr (const_cast<char *>(tag), "-->");
243 
244  if (!commentEnd) {
245  error = "missing end of comment";
246  return false;
247  }
248 
249  if (commentEnd - tag > (ssize_t)remainingCount) {
250  error = "comment ends beyond parse buffer";
251  return false;
252  }
253 
254  *commentEnd = '\0';
255  theClient->parserComment (tag + 4);
256  remainingCount -= commentEnd - currentPos + 3;
257  currentPos = commentEnd + 3;
258  }
259 
260  break;
261  break;
262 
263  default:
264  fatal ("unknown ESI tag type found");
265  };
266 
267  /*
268  * Find next esi tag (open or closing) or comment
269  * send tag, or full comment text
270  * rinse
271  */
272  }
273 
274  if (remainingCount)
275  theClient->parserDefault (currentPos,remainingCount);
276 
277  debugs(86, 5, "ESICustomParser::parse: Finished parsing, will return " << !openESITags);
278 
279  if (openESITags)
280  error = "ESI Tags still open";
281 
282  return !openESITags;
283 }
284 
285 long int
287 {
288  /* We don't track lines in the body */
289  return 0;
290 }
291 
292 char const *
294 {
295  if (error.size())
296  return error.termedBuf();
297  else
298  return "Parsing error strings not implemented";
299 }
300 
ESIParserClient * theClient
Definition: CustomParser.h:46
long int lineNumber() const
#define assert(EX)
Definition: assert.h:17
static Trie * GetTrie()
Definition: CustomParser.cc:25
ESICustomParser(ESIParserClient *)
Definition: CustomParser.cc:50
SBuf & append(const SBuf &S)
Definition: SBuf.cc:207
char const * errorString() const
virtual void parserComment(const char *s)=0
#define xisspace(x)
Definition: xis.h:17
EsiParserDefinition(ESICustomParser)
size_type size() const
Definition: SquidString.h:71
void append(char const *buf, int len)
Definition: String.cc:161
#define w_space
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:405
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Debug.h:123
char const * termedBuf() const
Definition: SquidString.h:90
char const * findTag(char const *a, size_t b)
Definition: CustomParser.cc:61
void fatal(const char *message)
Definition: fatal.cc:39
ESITAG_t lastTag
Definition: CustomParser.h:51
const char * c_str()
Definition: SBuf.cc:546
virtual void start(const char *el, const char **attr, size_t attrCount)=0
virtual void end(const char *el)=0
static Trie * SearchTrie
Definition: CustomParser.h:37
bool parse(char const *dataToParse, size_t const lengthOfData, bool const endOfStream)
Definition: CustomParser.cc:82
MemBlob::size_type size_type
Definition: SBuf.h:90
#define NULL
Definition: types.h:166
virtual void parserDefault(const char *s, int len)=0

 

Introduction

Documentation

Support

Miscellaneous

Web Site Translations

Mirrors