00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00034 #include "blocxx/PosixRegEx.hpp"
00035 #ifdef BLOCXX_HAVE_REGEX
00036 #ifdef BLOCXX_HAVE_REGEX_H
00037
00038 #include "blocxx/ExceptionIds.hpp"
00039 #include "blocxx/Assertion.hpp"
00040 #include "blocxx/Format.hpp"
00041
00042
00043 namespace BLOCXX_NAMESPACE
00044 {
00045
00046 #if defined(BLOCXX_DARWIN) && !defined(REG_NOERROR)
00047 #define REG_NOERROR 0
00048 #endif
00049
00050
00051 static String
00052 substitute_caps(const PosixRegEx::MatchArray &sub,
00053 const String &str, const String &rep)
00054 {
00055 static const char *cap_refs[] = {
00056 NULL, "\\1", "\\2", "\\3", "\\4",
00057 "\\5", "\\6", "\\7", "\\8", "\\9", NULL
00058 };
00059
00060 String res( rep);
00061 size_t pos;
00062
00063 for(size_t i=1; cap_refs[i] != NULL; i++)
00064 {
00065 String cap;
00066
00067 if( i < sub.size() && sub[i].rm_so >= 0 && sub[i].rm_eo >= 0)
00068 {
00069 cap = str.substring(sub[i].rm_so, sub[i].rm_eo
00070 - sub[i].rm_so);
00071 }
00072
00073 pos = res.indexOf(cap_refs[i]);
00074 while( pos != String::npos)
00075 {
00076 size_t quotes = 0;
00077 size_t at = pos;
00078
00079 while( at > 0 && res.charAt(--at) == '\\')
00080 quotes++;
00081
00082 if( quotes % 2)
00083 {
00084 quotes = (quotes + 1) / 2;
00085
00086 res = res.erase(pos - quotes, quotes);
00087
00088 pos = res.indexOf(cap_refs[i],
00089 pos + 2 - quotes);
00090 }
00091 else
00092 {
00093 quotes = quotes / 2;
00094
00095 res = res.substring(0, pos - quotes) +
00096 cap +
00097 res.substring(pos + 2);
00098
00099 pos = res.indexOf(cap_refs[i],
00100 pos + cap.length() - quotes);
00101 }
00102 }
00103 }
00104 return res;
00105 }
00106
00107
00108
00109 static inline String
00110 getError(const regex_t *preg, const int code)
00111 {
00112 char err[256] = { '\0'};
00113 ::regerror(code, preg, err, sizeof(err));
00114 return String(err);
00115 }
00116
00117
00118
00119 PosixRegEx::PosixRegEx()
00120 : compiled(false)
00121 , m_flags(0)
00122 , m_ecode(REG_NOERROR)
00123 {
00124 }
00125
00126
00127
00128 PosixRegEx::PosixRegEx(const String ®ex, int cflags)
00129 : compiled(false)
00130 , m_flags(0)
00131 , m_ecode(REG_NOERROR)
00132 {
00133 if( !compile(regex, cflags))
00134 {
00135 BLOCXX_THROW_ERR(RegExCompileException,
00136 errorString().c_str(), m_ecode);
00137 }
00138 }
00139
00140
00141
00142 PosixRegEx::PosixRegEx(const PosixRegEx &ref)
00143 : compiled(false)
00144 , m_flags(ref.m_flags)
00145 , m_ecode(REG_NOERROR)
00146 , m_rxstr(ref.m_rxstr)
00147 {
00148 if( ref.compiled && !compile(ref.m_rxstr, ref.m_flags))
00149 {
00150 BLOCXX_THROW_ERR(RegExCompileException,
00151 errorString().c_str(), m_ecode);
00152 }
00153 }
00154
00155
00156
00157 PosixRegEx::~PosixRegEx()
00158 {
00159 if( compiled)
00160 {
00161 regfree(&m_regex);
00162 }
00163 }
00164
00165
00166
00167 PosixRegEx &
00168 PosixRegEx::operator = (const PosixRegEx &ref)
00169 {
00170 if( !ref.compiled)
00171 {
00172 m_ecode = REG_NOERROR;
00173 m_error.erase();
00174 m_flags = ref.m_flags;
00175 m_rxstr = ref.m_rxstr;
00176 if( compiled)
00177 {
00178 regfree(&m_regex);
00179 compiled = false;
00180 }
00181 }
00182 else if( !compile(ref.m_rxstr, ref.m_flags))
00183 {
00184 BLOCXX_THROW_ERR(RegExCompileException,
00185 errorString().c_str(), m_ecode);
00186 }
00187 return *this;
00188 }
00189
00190
00191
00192 bool
00193 PosixRegEx::compile(const String ®ex, int cflags)
00194 {
00195 if( compiled)
00196 {
00197 regfree(&m_regex);
00198 compiled = false;
00199 }
00200
00201 m_rxstr = regex;
00202 m_flags = cflags;
00203 m_ecode = ::regcomp(&m_regex, regex.c_str(), cflags);
00204 if( m_ecode == REG_NOERROR)
00205 {
00206 compiled = true;
00207 m_error.erase();
00208 return true;
00209 }
00210 else
00211 {
00212 m_error = getError(&m_regex, m_ecode);
00213 return false;
00214 }
00215 }
00216
00217
00218
00219 int
00220 PosixRegEx::errorCode()
00221 {
00222 return m_ecode;
00223 }
00224
00225
00226
00227 String
00228 PosixRegEx::errorString() const
00229 {
00230 return m_error;
00231 }
00232
00233
00234
00235 String
00236 PosixRegEx::patternString() const
00237 {
00238 return m_rxstr;
00239 }
00240
00241
00242
00243 int
00244 PosixRegEx::compileFlags() const
00245 {
00246 return m_flags;
00247 }
00248
00249
00250
00251 bool
00252 PosixRegEx::isCompiled() const
00253 {
00254 return compiled;
00255 }
00256
00257
00258
00259 bool
00260 PosixRegEx::execute(MatchArray &sub, const String &str,
00261 size_t index, size_t count, int eflags)
00262 {
00263 if( !compiled)
00264 {
00265 BLOCXX_THROW(RegExCompileException,
00266 "Regular expression is not compiled");
00267 }
00268
00269 if( index > str.length())
00270 {
00271 BLOCXX_THROW(OutOfBoundsException,
00272 Format("String index out of bounds ("
00273 "length = %1, index = %2).",
00274 str.length(), index
00275 ).c_str());
00276 }
00277
00278 if( count == 0)
00279 {
00280 count = m_regex.re_nsub + 1;
00281 }
00282 regmatch_t rsub[count];
00283 rsub[0].rm_so = -1;
00284 rsub[0].rm_eo = -1;
00285
00286 sub.clear();
00287 m_ecode = ::regexec(&m_regex, str.c_str() + index,
00288 count, rsub, eflags);
00289 if( m_ecode == REG_NOERROR)
00290 {
00291 m_error.erase();
00292 if( m_flags & REG_NOSUB)
00293 {
00294 return true;
00295 }
00296
00297 sub.resize(count);
00298 for(size_t n = 0; n < count; n++)
00299 {
00300 if( rsub[n].rm_so < 0 || rsub[n].rm_eo < 0)
00301 {
00302 sub[n] = rsub[n];
00303 }
00304 else
00305 {
00306 rsub[n].rm_so += index;
00307 rsub[n].rm_eo += index;
00308 sub[n] = rsub[n];
00309 }
00310 }
00311 return true;
00312 }
00313 else
00314 {
00315 m_error = getError(&m_regex, m_ecode);
00316 return false;
00317 }
00318 }
00319
00320
00321
00322 StringArray
00323 PosixRegEx::capture(const String &str, size_t index, size_t count, int eflags)
00324 {
00325 if( !compiled)
00326 {
00327 BLOCXX_THROW(RegExCompileException,
00328 "Regular expression is not compiled");
00329 }
00330
00331 MatchArray rsub;
00332 StringArray ssub;
00333
00334 bool match = execute(rsub, str, index, count, eflags);
00335 if( match)
00336 {
00337 if( rsub.empty())
00338 {
00339 BLOCXX_THROW(RegExCompileException,
00340 "Non-capturing regular expression");
00341 }
00342
00343 MatchArray::const_iterator i=rsub.begin();
00344 for( ; i != rsub.end(); ++i)
00345 {
00346 if( i->rm_so >= 0 && i->rm_eo >= 0)
00347 {
00348 ssub.push_back(str.substring(i->rm_so,
00349 i->rm_eo - i->rm_so));
00350 }
00351 else
00352 {
00353 ssub.push_back(String(""));
00354 }
00355 }
00356 }
00357 else if(m_ecode != REG_NOMATCH)
00358 {
00359 BLOCXX_THROW_ERR(RegExExecuteException,
00360 errorString().c_str(), m_ecode);
00361 }
00362 return ssub;
00363 }
00364
00365
00366
00367 blocxx::String
00368 PosixRegEx::replace(const String &str, const String &rep,
00369 bool global, int eflags)
00370 {
00371 if( !compiled)
00372 {
00373 BLOCXX_THROW(RegExCompileException,
00374 "Regular expression is not compiled");
00375 }
00376
00377 MatchArray rsub;
00378 bool match;
00379 size_t off = 0;
00380 String out = str;
00381
00382 do
00383 {
00384 match = execute(rsub, out, off, 0, eflags);
00385 if( match)
00386 {
00387 if( rsub.empty() ||
00388 rsub[0].rm_so < 0 ||
00389 rsub[0].rm_eo < 0)
00390 {
00391
00392 BLOCXX_THROW(RegExCompileException,
00393 "Non-capturing regular expression");
00394 }
00395
00396 String res = substitute_caps(rsub, out, rep);
00397
00398 out = out.substring(0, rsub[0].rm_so) +
00399 res + out.substring(rsub[0].rm_eo);
00400
00401 off = rsub[0].rm_so + res.length();
00402 }
00403 else if(m_ecode == REG_NOMATCH)
00404 {
00405 m_ecode = REG_NOERROR;
00406 m_error.erase();
00407 }
00408 else
00409 {
00410 BLOCXX_THROW_ERR(RegExExecuteException,
00411 errorString().c_str(), m_ecode);
00412 }
00413 } while(global && match && out.length() > off);
00414
00415 return out;
00416 }
00417
00418
00419 StringArray
00420 PosixRegEx::split(const String &str, bool empty, int eflags)
00421 {
00422 if( !compiled)
00423 {
00424 BLOCXX_THROW(RegExCompileException,
00425 "Regular expression is not compiled");
00426 }
00427
00428 MatchArray rsub;
00429 StringArray ssub;
00430 bool match;
00431 size_t off = 0;
00432 size_t len = str.length();
00433
00434 do
00435 {
00436 match = execute(rsub, str, off, 1, eflags);
00437 if( match)
00438 {
00439 if( rsub.empty() ||
00440 rsub[0].rm_so < 0 ||
00441 rsub[0].rm_eo < 0)
00442 {
00443 BLOCXX_THROW(RegExCompileException,
00444 "Non-capturing regular expression");
00445 }
00446
00447 if( empty || ((size_t)rsub[0].rm_so > off))
00448 {
00449 ssub.push_back(str.substring(off,
00450 rsub[0].rm_so - off));
00451 }
00452 off = rsub[0].rm_eo;
00453 }
00454 else if(m_ecode == REG_NOMATCH)
00455 {
00456 String tmp = str.substring(off);
00457 if( empty || !tmp.empty())
00458 {
00459 ssub.push_back(tmp);
00460 }
00461 m_ecode = REG_NOERROR;
00462 m_error.erase();
00463 }
00464 else
00465 {
00466 BLOCXX_THROW_ERR(RegExExecuteException,
00467 errorString().c_str(), m_ecode);
00468 }
00469 } while(match && len > off);
00470
00471 return ssub;
00472 }
00473
00474
00475
00476 StringArray
00477 PosixRegEx::grep(const StringArray &src, int eflags)
00478 {
00479 if( !compiled)
00480 {
00481 BLOCXX_THROW(RegExCompileException,
00482 "Regular expression is not compiled");
00483 }
00484
00485 m_ecode = REG_NOERROR;
00486 m_error.erase();
00487
00488 StringArray out;
00489 if( !src.empty())
00490 {
00491 StringArray::const_iterator i=src.begin();
00492 for( ; i != src.end(); ++i)
00493 {
00494 int ret = ::regexec(&m_regex, i->c_str(),
00495 0, NULL, eflags);
00496 if( ret == REG_NOERROR)
00497 {
00498 out.push_back(*i);
00499 }
00500 else if(ret != REG_NOMATCH)
00501 {
00502 m_ecode = ret;
00503 m_error = getError(&m_regex, m_ecode);
00504 BLOCXX_THROW_ERR(RegExExecuteException,
00505 errorString().c_str(), m_ecode);
00506 }
00507 }
00508 }
00509
00510 return out;
00511 }
00512
00513
00514
00515 bool
00516 PosixRegEx::match(const String &str, size_t index, int eflags) const
00517 {
00518 if( !compiled)
00519 {
00520 BLOCXX_THROW(RegExCompileException,
00521 "Regular expression is not compiled");
00522 }
00523
00524 if( index > str.length())
00525 {
00526 BLOCXX_THROW(OutOfBoundsException,
00527 Format("String index out of bounds ("
00528 "length = %1, index = %2).",
00529 str.length(), index
00530 ).c_str());
00531 }
00532
00533 m_ecode = ::regexec(&m_regex, str.c_str() + index,
00534 0, NULL, eflags);
00535
00536 if( m_ecode == REG_NOERROR)
00537 {
00538 m_error.erase();
00539 return true;
00540 }
00541 else if(m_ecode == REG_NOMATCH)
00542 {
00543 m_error = getError(&m_regex, m_ecode);
00544 return false;
00545 }
00546 else
00547 {
00548 m_error = getError(&m_regex, m_ecode);
00549 BLOCXX_THROW_ERR(RegExExecuteException,
00550 errorString().c_str(), m_ecode);
00551 }
00552 }
00553
00554
00555
00556 }
00557
00558 #endif // BLOCXX_HAVE_REGEX_H
00559 #endif // BLOCXX_HAVE_REGEX
00560
00561
00562