IConv.cpp

Go to the documentation of this file.
00001 /*******************************************************************************
00002 * Copyright (C) 2005 Novell, Inc. All rights reserved.
00003 *
00004 * Redistribution and use in source and binary forms, with or without
00005 * modification, are permitted provided that the following conditions are met:
00006 *
00007 *  - Redistributions of source code must retain the above copyright notice,
00008 *    this list of conditions and the following disclaimer.
00009 *
00010 *  - Redistributions in binary form must reproduce the above copyright notice,
00011 *    this list of conditions and the following disclaimer in the documentation
00012 *    and/or other materials provided with the distribution.
00013 *
00014 *  - Neither the name of Novell, Inc., nor the names of its
00015 *    contributors may be used to endorse or promote products derived from this
00016 *    software without specific prior written permission.
00017 *
00018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
00019 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 * ARE DISCLAIMED. IN NO EVENT SHALL Novell, Inc., OR THE 
00022 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
00023 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
00024 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
00025 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
00026 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
00027 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
00028 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 *******************************************************************************/
00034 #include "blocxx/IConv.hpp"
00035 
00036 #ifdef BLOCXX_HAVE_ICONV_H
00037 #include "blocxx/Assertion.hpp"
00038 #include "blocxx/Format.hpp"
00039 #include "blocxx/Exec.hpp"
00040 
00041 #include <cwchar>
00042 #include <cwctype>
00043 
00044 #include <errno.h>
00045 
00046 namespace BLOCXX_NAMESPACE
00047 {
00048 
00049 // -------------------------------------------------------------------
00050 IConv_t::IConv_t()
00051    : m_iconv(iconv_t(-1))
00052 {
00053 }
00054 
00055 
00056 // -------------------------------------------------------------------
00057 IConv_t::IConv_t(const String &fromEncoding, const String &toEncoding)
00058 {
00059    m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
00060    if( m_iconv == iconv_t(-1))
00061    {
00062       BLOCXX_THROW(StringConversionException,
00063                    Format("Unable to convert from \"%1\" to \"%2\"",
00064                           fromEncoding, toEncoding).c_str());
00065    }
00066 }
00067 
00068 
00069 // -------------------------------------------------------------------
00070 IConv_t::~IConv_t()
00071 {
00072    close();
00073 }
00074 
00075 
00076 // -------------------------------------------------------------------
00077 bool
00078 IConv_t::open(const String &fromEncoding, const String &toEncoding)
00079 {
00080    close();
00081    m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
00082    return ( m_iconv != iconv_t(-1));
00083 }
00084 
00085 
00086 // -------------------------------------------------------------------
00087 size_t
00088 IConv_t::convert(char **istr, size_t *ibytesleft,
00089                char **ostr, size_t *obytesleft)
00090 {
00091 #ifdef BLOCXX_DARWIN
00092    BLOCXX_ASSERT(istr != NULL); 
00093    const char *ptr = (char*)*istr; 
00094    int ret = ::iconv(m_iconv, &ptr, ibytesleft, ostr, obytesleft); 
00095    *istr = (char*)ptr; 
00096    return ret; 
00097 #else
00098    return ::iconv(m_iconv, istr, ibytesleft, ostr, obytesleft);
00099 #endif
00100 }
00101 
00102 
00103 // -------------------------------------------------------------------
00104 bool
00105 IConv_t::close()
00106 {
00107    bool ret = true;
00108    int  err = errno;
00109 
00110    if( m_iconv != iconv_t(-1))
00111    {
00112       if( ::iconv_close(m_iconv) == -1)
00113          ret = false;
00114       m_iconv = iconv_t(-1);
00115    }
00116 
00117    errno = err;
00118    return ret;
00119 }
00120 
00121 
00122 // *******************************************************************
00123 namespace IConv
00124 {
00125 
00126 // -------------------------------------------------------------------
00127 static inline void
00128 mayThrowStringConversionException()
00129 {
00130    switch( errno)
00131    {
00132       case E2BIG:
00133       break;
00134 
00135       case EILSEQ:
00136          BLOCXX_THROW(StringConversionException,
00137          "Invalid character or multibyte sequence in the input");
00138       break;
00139 
00140       case EINVAL:
00141       default:
00142          BLOCXX_THROW(StringConversionException,
00143          "Incomplete multibyte sequence in the input");
00144       break;
00145    }
00146 }
00147 
00148 // -------------------------------------------------------------------
00149 String
00150 fromByteString(const String &enc, const char *str, size_t len)
00151 {
00152    if( !str || len == 0)
00153       return String();
00154 
00155    IConv_t      iconv(enc, "UTF-8"); // throws error
00156    String       out;
00157    char         obuf[4097];
00158    char        *optr;
00159    size_t       olen;
00160 
00161    char        *sptr = (char *)str;
00162    size_t       slen = len;
00163 
00164    while( slen > 0)
00165    {
00166       obuf[0] = '\0';
00167       optr = (char *)obuf;
00168       olen = sizeof(obuf) - sizeof(obuf[0]);
00169 
00170       size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00171       if( ret == size_t(-1))
00172       {
00173          mayThrowStringConversionException();
00174       }
00175       *optr = '\0';
00176       out  += obuf;
00177    }
00178 
00179    return out;
00180 }
00181 
00182 
00183 // -------------------------------------------------------------------
00184 String
00185 fromByteString(const String &enc, const std::string  &str)
00186 {
00187    return fromByteString(enc, str.c_str(), str.length());
00188 }
00189 
00190 
00191 // -------------------------------------------------------------------
00192 String
00193 fromWideString(const String &enc, const std::wstring &str)
00194 {
00195    if( str.empty())
00196       return String();
00197 
00198    IConv_t      iconv(enc, "UTF-8"); // throws error
00199    String       out;
00200    char         obuf[4097];
00201    char        *optr;
00202    size_t       olen;
00203 
00204    char        *sptr = (char *)str.c_str();
00205    size_t       slen = str.length() * sizeof(wchar_t);
00206 
00207    while( slen > 0)
00208    {
00209       obuf[0] = '\0';
00210       optr = (char *)obuf;
00211       olen = sizeof(obuf) - sizeof(obuf[0]);
00212 
00213       size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00214       if( ret == size_t(-1))
00215       {
00216          mayThrowStringConversionException();
00217       }
00218       *optr = '\0';
00219       out  += obuf;
00220    }
00221 
00222    return out;
00223 }
00224 
00225 // -------------------------------------------------------------------
00226 std::string
00227 toByteString(const String &enc, const String &utf8)
00228 {
00229    if( utf8.empty())
00230       return std::string();
00231 
00232    IConv_t      iconv("UTF-8", enc); // throws error
00233    std::string  out;
00234    char         obuf[4097];
00235    char        *optr;
00236    size_t       olen;
00237 
00238    char        *sptr = (char *)utf8.c_str();
00239    size_t       slen = utf8.length();
00240 
00241    while( slen > 0)
00242    {
00243       obuf[0] = '\0';
00244       optr = (char *)obuf;
00245       olen = sizeof(obuf) - sizeof(obuf[0]);
00246 
00247       size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00248       if( ret == size_t(-1))
00249       {
00250          mayThrowStringConversionException();
00251       }
00252       *optr = '\0';
00253       out  += obuf;
00254    }
00255 
00256    return out;
00257 }
00258 
00259 // -------------------------------------------------------------------
00260 std::wstring
00261 toWideString(const String &enc, const String &utf8)
00262 {
00263    if( utf8.empty())
00264       return std::wstring();
00265 
00266    IConv_t      iconv("UTF-8", enc); // throws error
00267    std::wstring out;
00268    wchar_t      obuf[1025];
00269    char        *optr;
00270    size_t       olen;
00271 
00272    char        *sptr = (char *)utf8.c_str();
00273    size_t       slen = utf8.length();
00274 
00275    while( slen > 0)
00276    {
00277       obuf[0] = L'\0';
00278       optr = (char *)obuf;
00279       olen = sizeof(obuf) - sizeof(obuf[0]);
00280 
00281       size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00282       if( ret == size_t(-1))
00283       {
00284          mayThrowStringConversionException();
00285       }
00286       *((wchar_t *)optr) = L'\0';
00287       out += obuf;
00288    }
00289 
00290    return out;
00291 }
00292 
00293 
00294 #if 0
00295 // -------------------------------------------------------------------
00296 StringArray
00297 encodings()
00298 {
00299    StringArray   command;
00300    String        output;
00301    int           status = -1;
00302 
00303    command.push_back("/usr/bin/iconv");
00304    command.push_back("--list");
00305 
00306    try
00307    {
00308       Exec::executeProcessAndGatherOutput(command, output, status);
00309    }
00310    catch(...)
00311    {
00312    }
00313 
00314    if(status == 0)
00315    {
00316       return output.tokenize("\r\n");
00317    }
00318    return StringArray();
00319 }
00320 #endif
00321 
00322 
00323 }  // End of IConv namespace
00324 }  // End of BLOCXX_NAMESPACE
00325 
00326 #endif // BLOCXX_HAVE_ICONV_H
00327 
00328 /* vim: set ts=8 sts=8 sw=8 ai noet: */
00329 

Generated on Fri Jun 16 15:39:08 2006 for blocxx by  doxygen 1.4.6