PosixRegEx.cpp

Go to the documentation of this file.
00001 /*******************************************************************************
00002 * Copyright (C) 2005 Novell, Inc. All rights reserved.
00003 *
00004 * Redistribution and use in source and binary forms, with or without
00005 * modification, are permitted provided that the following conditions are met:
00006 *
00007 *  - Redistributions of source code must retain the above copyright notice,
00008 *    this list of conditions and the following disclaimer.
00009 *
00010 *  - Redistributions in binary form must reproduce the above copyright notice,
00011 *    this list of conditions and the following disclaimer in the documentation
00012 *    and/or other materials provided with the distribution.
00013 *
00014 *  - Neither the name of Vintela, Inc., Novell, Inc., nor the names of its
00015 *    contributors may be used to endorse or promote products derived from this
00016 *    software without specific prior written permission.
00017 *
00018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
00019 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 * ARE DISCLAIMED. IN NO EVENT SHALL Vintela, Inc., Novell, Inc., OR THE 
00022 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
00023 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
00024 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
00025 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
00026 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
00027 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
00028 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 *******************************************************************************/
00034 #include "blocxx/PosixRegEx.hpp"
00035 #ifdef BLOCXX_HAVE_REGEX
00036 #ifdef BLOCXX_HAVE_REGEX_H
00037 
00038 #include "blocxx/ExceptionIds.hpp"
00039 #include "blocxx/Assertion.hpp"
00040 #include "blocxx/Format.hpp"
00041 
00042 
00043 namespace BLOCXX_NAMESPACE
00044 {
00045 
00046 #if defined(BLOCXX_DARWIN) && !defined(REG_NOERROR)
00047 #define REG_NOERROR 0
00048 #endif
00049 
00050 // -------------------------------------------------------------------
00051 static String
00052 substitute_caps(const PosixRegEx::MatchArray &sub,
00053                 const String &str, const String &rep)
00054 {
00055    static const char *cap_refs[] = {
00056       NULL,  "\\1", "\\2", "\\3", "\\4",
00057       "\\5", "\\6", "\\7", "\\8", "\\9", NULL
00058    };
00059 
00060    String res( rep);
00061    size_t pos;
00062 
00063    for(size_t i=1; cap_refs[i] != NULL; i++)
00064    {
00065       String cap;
00066 
00067       if( i < sub.size() && sub[i].rm_so >= 0 && sub[i].rm_eo >= 0)
00068       {
00069          cap = str.substring(sub[i].rm_so, sub[i].rm_eo
00070                                          - sub[i].rm_so);
00071       }
00072 
00073       pos = res.indexOf(cap_refs[i]);
00074       while( pos != String::npos)
00075       {
00076          size_t quotes = 0;
00077          size_t at = pos;
00078 
00079          while( at > 0 && res.charAt(--at) == '\\')
00080             quotes++;
00081 
00082          if( quotes % 2)
00083          {
00084             quotes = (quotes + 1) / 2;
00085 
00086             res = res.erase(pos - quotes, quotes);
00087 
00088             pos = res.indexOf(cap_refs[i],
00089                               pos + 2 - quotes);
00090          }
00091          else
00092          {
00093             quotes = quotes / 2;
00094 
00095             res = res.substring(0, pos - quotes) +
00096                   cap +
00097                   res.substring(pos + 2);
00098 
00099             pos = res.indexOf(cap_refs[i],
00100                               pos + cap.length() - quotes);
00101          }
00102       }
00103    }
00104    return res;
00105 }
00106 
00107 
00108 // -------------------------------------------------------------------
00109 static inline String
00110 getError(const regex_t *preg, const int code)
00111 {
00112    char err[256] = { '\0'};
00113    ::regerror(code, preg, err, sizeof(err));
00114    return String(err);
00115 }
00116 
00117 
00118 // -------------------------------------------------------------------
00119 PosixRegEx::PosixRegEx()
00120    : compiled(false)
00121    , m_flags(0)
00122    , m_ecode(REG_NOERROR)
00123 {
00124 }
00125 
00126 
00127 // -------------------------------------------------------------------
00128 PosixRegEx::PosixRegEx(const String &regex, int cflags)
00129    : compiled(false)
00130    , m_flags(0)
00131    , m_ecode(REG_NOERROR)
00132 {
00133    if( !compile(regex, cflags))
00134    {
00135       BLOCXX_THROW_ERR(RegExCompileException,
00136          errorString().c_str(), m_ecode);
00137    }
00138 }
00139 
00140 
00141 // -------------------------------------------------------------------
00142 PosixRegEx::PosixRegEx(const PosixRegEx &ref)
00143    : compiled(false)
00144    , m_flags(ref.m_flags)
00145    , m_ecode(REG_NOERROR)
00146    , m_rxstr(ref.m_rxstr)
00147 {
00148    if( ref.compiled && !compile(ref.m_rxstr, ref.m_flags))
00149    {
00150       BLOCXX_THROW_ERR(RegExCompileException,
00151          errorString().c_str(), m_ecode);
00152    }
00153 }
00154 
00155 
00156 // -------------------------------------------------------------------
00157 PosixRegEx::~PosixRegEx()
00158 {
00159    if( compiled)
00160    {
00161       regfree(&m_regex);
00162    }
00163 }
00164 
00165 
00166 // -------------------------------------------------------------------
00167 PosixRegEx &
00168 PosixRegEx::operator = (const PosixRegEx &ref)
00169 {
00170    if( !ref.compiled)
00171    {
00172       m_ecode = REG_NOERROR;
00173       m_error.erase();
00174       m_flags = ref.m_flags;
00175       m_rxstr = ref.m_rxstr;
00176       if( compiled)
00177       {
00178          regfree(&m_regex);
00179          compiled = false;
00180       }
00181    }
00182    else if( !compile(ref.m_rxstr, ref.m_flags))
00183    {
00184       BLOCXX_THROW_ERR(RegExCompileException,
00185          errorString().c_str(), m_ecode);
00186    }
00187    return *this;
00188 }
00189 
00190 
00191 // -------------------------------------------------------------------
00192 bool
00193 PosixRegEx::compile(const String &regex, int cflags)
00194 {
00195    if( compiled)
00196    {
00197       regfree(&m_regex);
00198       compiled = false;
00199    }
00200 
00201    m_rxstr = regex;
00202    m_flags = cflags;
00203    m_ecode = ::regcomp(&m_regex, regex.c_str(), cflags);
00204    if( m_ecode == REG_NOERROR)
00205    {
00206       compiled = true;
00207       m_error.erase();
00208       return true;
00209    }
00210    else
00211    {
00212       m_error = getError(&m_regex, m_ecode);
00213       return false;
00214    }
00215 }
00216 
00217 
00218 // -------------------------------------------------------------------
00219 int
00220 PosixRegEx::errorCode()
00221 {
00222    return m_ecode;
00223 }
00224 
00225 
00226 // -------------------------------------------------------------------
00227 String
00228 PosixRegEx::errorString() const
00229 {
00230    return m_error;
00231 }
00232 
00233 
00234 // -------------------------------------------------------------------
00235 String
00236 PosixRegEx::patternString() const
00237 {
00238    return m_rxstr;
00239 }
00240 
00241 
00242 // -------------------------------------------------------------------
00243 int
00244 PosixRegEx::compileFlags() const
00245 {
00246    return m_flags;
00247 }
00248 
00249 
00250 // -------------------------------------------------------------------
00251 bool
00252 PosixRegEx::isCompiled() const
00253 {
00254    return compiled;
00255 }
00256 
00257 
00258 // -------------------------------------------------------------------
00259 bool
00260 PosixRegEx::execute(MatchArray &sub, const String &str,
00261                size_t index, size_t count, int eflags)
00262 {
00263    if( !compiled)
00264    {
00265       BLOCXX_THROW(RegExCompileException,
00266          "Regular expression is not compiled");
00267    }
00268 
00269    if( index > str.length())
00270    {
00271       BLOCXX_THROW(OutOfBoundsException,
00272          Format("String index out of bounds ("
00273                 "length = %1, index = %2).",
00274                 str.length(), index
00275          ).c_str());
00276    }
00277 
00278    if( count == 0)
00279    {
00280       count = m_regex.re_nsub + 1;
00281    }
00282    regmatch_t rsub[count];
00283    rsub[0].rm_so = -1;
00284    rsub[0].rm_eo = -1;
00285 
00286    sub.clear();
00287    m_ecode = ::regexec(&m_regex, str.c_str() + index,
00288                        count, rsub, eflags);
00289    if( m_ecode == REG_NOERROR)
00290    {
00291       m_error.erase();
00292       if( m_flags & REG_NOSUB)
00293       {
00294          return true;
00295       }
00296 
00297       sub.resize(count);
00298       for(size_t n = 0; n < count; n++)
00299       {
00300          if( rsub[n].rm_so < 0 || rsub[n].rm_eo < 0)
00301          {
00302             sub[n] = rsub[n];
00303          }
00304          else
00305          {
00306             rsub[n].rm_so += index;
00307             rsub[n].rm_eo += index;
00308             sub[n] = rsub[n];
00309          }
00310       }
00311       return true;
00312    }
00313    else
00314    {
00315       m_error = getError(&m_regex, m_ecode);
00316       return false;
00317    }
00318 }
00319 
00320 
00321 // -------------------------------------------------------------------
00322 StringArray
00323 PosixRegEx::capture(const String &str, size_t index, size_t count, int eflags)
00324 {
00325    if( !compiled)
00326    {
00327       BLOCXX_THROW(RegExCompileException,
00328          "Regular expression is not compiled");
00329    }
00330 
00331    MatchArray  rsub;
00332    StringArray ssub;
00333 
00334    bool match = execute(rsub, str, index, count, eflags);
00335    if( match)
00336    {
00337       if( rsub.empty())
00338       {
00339          BLOCXX_THROW(RegExCompileException,
00340             "Non-capturing regular expression");
00341       }
00342 
00343       MatchArray::const_iterator i=rsub.begin();
00344       for( ; i != rsub.end(); ++i)
00345       {
00346          if( i->rm_so >= 0 && i->rm_eo >= 0)
00347          {
00348             ssub.push_back(str.substring(i->rm_so,
00349                                 i->rm_eo - i->rm_so));
00350          }
00351          else
00352          {
00353             ssub.push_back(String(""));
00354          }
00355       }
00356    }
00357    else if(m_ecode != REG_NOMATCH)
00358    {
00359       BLOCXX_THROW_ERR(RegExExecuteException,
00360          errorString().c_str(), m_ecode);
00361    }
00362    return ssub;
00363 }
00364 
00365 
00366 // -------------------------------------------------------------------
00367 blocxx::String
00368 PosixRegEx::replace(const String &str, const String &rep,
00369                     bool global, int eflags)
00370 {
00371    if( !compiled)
00372    {
00373       BLOCXX_THROW(RegExCompileException,
00374          "Regular expression is not compiled");
00375    }
00376 
00377    MatchArray  rsub;
00378    bool        match;
00379    size_t      off = 0;
00380    String      out = str;
00381 
00382    do
00383    {
00384       match = execute(rsub, out, off, 0, eflags);
00385       if( match)
00386       {
00387          if( rsub.empty()      ||
00388              rsub[0].rm_so < 0 ||
00389              rsub[0].rm_eo < 0)
00390          {
00391             // only if empty (missused as guard).
00392             BLOCXX_THROW(RegExCompileException,
00393                "Non-capturing regular expression");
00394          }
00395 
00396          String res = substitute_caps(rsub, out, rep);
00397 
00398          out = out.substring(0, rsub[0].rm_so) +
00399                res + out.substring(rsub[0].rm_eo);
00400 
00401          off = rsub[0].rm_so + res.length();
00402       }
00403       else if(m_ecode == REG_NOMATCH)
00404       {
00405          m_ecode = REG_NOERROR;
00406          m_error.erase();
00407       }
00408       else
00409       {
00410          BLOCXX_THROW_ERR(RegExExecuteException,
00411             errorString().c_str(), m_ecode);
00412       }
00413    } while(global && match && out.length() > off);
00414 
00415    return out;
00416 }
00417 
00418 // -------------------------------------------------------------------
00419 StringArray
00420 PosixRegEx::split(const String &str, bool empty, int eflags)
00421 {
00422    if( !compiled)
00423    {
00424       BLOCXX_THROW(RegExCompileException,
00425          "Regular expression is not compiled");
00426    }
00427 
00428    MatchArray  rsub;
00429    StringArray ssub;
00430    bool        match;
00431    size_t      off = 0;
00432    size_t      len = str.length();
00433 
00434    do
00435    {
00436       match = execute(rsub, str, off, 1, eflags);
00437       if( match)
00438       {
00439          if( rsub.empty()      ||
00440              rsub[0].rm_so < 0 ||
00441              rsub[0].rm_eo < 0)
00442          {
00443             BLOCXX_THROW(RegExCompileException,
00444                "Non-capturing regular expression");
00445          }
00446 
00447          if( empty || ((size_t)rsub[0].rm_so > off))
00448          {
00449             ssub.push_back(str.substring(off,
00450                                rsub[0].rm_so - off));
00451          }
00452          off = rsub[0].rm_eo;
00453       }
00454       else if(m_ecode == REG_NOMATCH)
00455       {
00456          String tmp = str.substring(off);
00457          if( empty || !tmp.empty())
00458          {
00459             ssub.push_back(tmp);
00460          }
00461          m_ecode = REG_NOERROR;
00462          m_error.erase();
00463       }
00464       else
00465       {
00466          BLOCXX_THROW_ERR(RegExExecuteException,
00467             errorString().c_str(), m_ecode);
00468       }
00469    } while(match && len > off);
00470 
00471    return ssub;
00472 }
00473 
00474 
00475 // -------------------------------------------------------------------
00476 StringArray
00477 PosixRegEx::grep(const StringArray &src, int eflags)
00478 {
00479    if( !compiled)
00480    {
00481       BLOCXX_THROW(RegExCompileException,
00482          "Regular expression is not compiled");
00483    }
00484 
00485    m_ecode = REG_NOERROR;
00486    m_error.erase();
00487 
00488    StringArray out;
00489    if( !src.empty())
00490    {
00491       StringArray::const_iterator i=src.begin();
00492       for( ; i != src.end(); ++i)
00493       {
00494          int ret = ::regexec(&m_regex, i->c_str(),
00495                              0, NULL, eflags);
00496          if( ret == REG_NOERROR)
00497          {
00498             out.push_back(*i);
00499          }
00500          else if(ret != REG_NOMATCH)
00501          {
00502             m_ecode = ret;
00503             m_error = getError(&m_regex, m_ecode);
00504             BLOCXX_THROW_ERR(RegExExecuteException,
00505                errorString().c_str(), m_ecode);
00506          }
00507       }
00508    }
00509 
00510    return out;
00511 }
00512 
00513 
00514 // -------------------------------------------------------------------
00515 bool
00516 PosixRegEx::match(const String &str, size_t index, int eflags) const
00517 {
00518    if( !compiled)
00519    {
00520       BLOCXX_THROW(RegExCompileException,
00521          "Regular expression is not compiled");
00522    }
00523 
00524    if( index > str.length())
00525    {
00526       BLOCXX_THROW(OutOfBoundsException,
00527          Format("String index out of bounds ("
00528                 "length = %1, index = %2).",
00529                 str.length(), index
00530          ).c_str());
00531    }
00532 
00533    m_ecode = ::regexec(&m_regex, str.c_str() + index,
00534                        0, NULL, eflags);
00535 
00536    if( m_ecode == REG_NOERROR)
00537    {
00538       m_error.erase();
00539       return true;
00540    }
00541    else if(m_ecode == REG_NOMATCH)
00542    {
00543       m_error = getError(&m_regex, m_ecode);
00544       return false;
00545    }
00546    else
00547    {
00548       m_error = getError(&m_regex, m_ecode);
00549       BLOCXX_THROW_ERR(RegExExecuteException,
00550          errorString().c_str(), m_ecode);
00551    }
00552 }
00553 
00554 
00555 // -------------------------------------------------------------------
00556 } // namespace BLOCXX_NAMESPACE
00557 
00558 #endif // BLOCXX_HAVE_REGEX_H
00559 #endif // BLOCXX_HAVE_REGEX
00560 
00561 /* vim: set ts=8 sts=8 sw=8 ai noet: */
00562 

Generated on Fri Jun 16 15:39:08 2006 for blocxx by  doxygen 1.4.6