00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00034 #include "blocxx/IConv.hpp"
00035
00036 #ifdef BLOCXX_HAVE_ICONV_H
00037 #include "blocxx/Assertion.hpp"
00038 #include "blocxx/Format.hpp"
00039 #include "blocxx/Exec.hpp"
00040
00041 #include <cwchar>
00042 #include <cwctype>
00043
00044 #include <errno.h>
00045
00046 namespace BLOCXX_NAMESPACE
00047 {
00048
00049
00050 IConv_t::IConv_t()
00051 : m_iconv(iconv_t(-1))
00052 {
00053 }
00054
00055
00056
00057 IConv_t::IConv_t(const String &fromEncoding, const String &toEncoding)
00058 {
00059 m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
00060 if( m_iconv == iconv_t(-1))
00061 {
00062 BLOCXX_THROW(StringConversionException,
00063 Format("Unable to convert from \"%1\" to \"%2\"",
00064 fromEncoding, toEncoding).c_str());
00065 }
00066 }
00067
00068
00069
00070 IConv_t::~IConv_t()
00071 {
00072 close();
00073 }
00074
00075
00076
00077 bool
00078 IConv_t::open(const String &fromEncoding, const String &toEncoding)
00079 {
00080 close();
00081 m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
00082 return ( m_iconv != iconv_t(-1));
00083 }
00084
00085
00086
00087 size_t
00088 IConv_t::convert(char **istr, size_t *ibytesleft,
00089 char **ostr, size_t *obytesleft)
00090 {
00091 #ifdef BLOCXX_DARWIN
00092 BLOCXX_ASSERT(istr != NULL);
00093 const char *ptr = (char*)*istr;
00094 int ret = ::iconv(m_iconv, &ptr, ibytesleft, ostr, obytesleft);
00095 *istr = (char*)ptr;
00096 return ret;
00097 #else
00098 return ::iconv(m_iconv, istr, ibytesleft, ostr, obytesleft);
00099 #endif
00100 }
00101
00102
00103
00104 bool
00105 IConv_t::close()
00106 {
00107 bool ret = true;
00108 int err = errno;
00109
00110 if( m_iconv != iconv_t(-1))
00111 {
00112 if( ::iconv_close(m_iconv) == -1)
00113 ret = false;
00114 m_iconv = iconv_t(-1);
00115 }
00116
00117 errno = err;
00118 return ret;
00119 }
00120
00121
00122
00123 namespace IConv
00124 {
00125
00126
00127 static inline void
00128 mayThrowStringConversionException()
00129 {
00130 switch( errno)
00131 {
00132 case E2BIG:
00133 break;
00134
00135 case EILSEQ:
00136 BLOCXX_THROW(StringConversionException,
00137 "Invalid character or multibyte sequence in the input");
00138 break;
00139
00140 case EINVAL:
00141 default:
00142 BLOCXX_THROW(StringConversionException,
00143 "Incomplete multibyte sequence in the input");
00144 break;
00145 }
00146 }
00147
00148
00149 String
00150 fromByteString(const String &enc, const char *str, size_t len)
00151 {
00152 if( !str || len == 0)
00153 return String();
00154
00155 IConv_t iconv(enc, "UTF-8");
00156 String out;
00157 char obuf[4097];
00158 char *optr;
00159 size_t olen;
00160
00161 char *sptr = (char *)str;
00162 size_t slen = len;
00163
00164 while( slen > 0)
00165 {
00166 obuf[0] = '\0';
00167 optr = (char *)obuf;
00168 olen = sizeof(obuf) - sizeof(obuf[0]);
00169
00170 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00171 if( ret == size_t(-1))
00172 {
00173 mayThrowStringConversionException();
00174 }
00175 *optr = '\0';
00176 out += obuf;
00177 }
00178
00179 return out;
00180 }
00181
00182
00183
00184 String
00185 fromByteString(const String &enc, const std::string &str)
00186 {
00187 return fromByteString(enc, str.c_str(), str.length());
00188 }
00189
00190
00191
00192 String
00193 fromWideString(const String &enc, const std::wstring &str)
00194 {
00195 if( str.empty())
00196 return String();
00197
00198 IConv_t iconv(enc, "UTF-8");
00199 String out;
00200 char obuf[4097];
00201 char *optr;
00202 size_t olen;
00203
00204 char *sptr = (char *)str.c_str();
00205 size_t slen = str.length() * sizeof(wchar_t);
00206
00207 while( slen > 0)
00208 {
00209 obuf[0] = '\0';
00210 optr = (char *)obuf;
00211 olen = sizeof(obuf) - sizeof(obuf[0]);
00212
00213 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00214 if( ret == size_t(-1))
00215 {
00216 mayThrowStringConversionException();
00217 }
00218 *optr = '\0';
00219 out += obuf;
00220 }
00221
00222 return out;
00223 }
00224
00225
00226 std::string
00227 toByteString(const String &enc, const String &utf8)
00228 {
00229 if( utf8.empty())
00230 return std::string();
00231
00232 IConv_t iconv("UTF-8", enc);
00233 std::string out;
00234 char obuf[4097];
00235 char *optr;
00236 size_t olen;
00237
00238 char *sptr = (char *)utf8.c_str();
00239 size_t slen = utf8.length();
00240
00241 while( slen > 0)
00242 {
00243 obuf[0] = '\0';
00244 optr = (char *)obuf;
00245 olen = sizeof(obuf) - sizeof(obuf[0]);
00246
00247 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00248 if( ret == size_t(-1))
00249 {
00250 mayThrowStringConversionException();
00251 }
00252 *optr = '\0';
00253 out += obuf;
00254 }
00255
00256 return out;
00257 }
00258
00259
00260 std::wstring
00261 toWideString(const String &enc, const String &utf8)
00262 {
00263 if( utf8.empty())
00264 return std::wstring();
00265
00266 IConv_t iconv("UTF-8", enc);
00267 std::wstring out;
00268 wchar_t obuf[1025];
00269 char *optr;
00270 size_t olen;
00271
00272 char *sptr = (char *)utf8.c_str();
00273 size_t slen = utf8.length();
00274
00275 while( slen > 0)
00276 {
00277 obuf[0] = L'\0';
00278 optr = (char *)obuf;
00279 olen = sizeof(obuf) - sizeof(obuf[0]);
00280
00281 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00282 if( ret == size_t(-1))
00283 {
00284 mayThrowStringConversionException();
00285 }
00286 *((wchar_t *)optr) = L'\0';
00287 out += obuf;
00288 }
00289
00290 return out;
00291 }
00292
00293
00294 #if 0
00295
00296 StringArray
00297 encodings()
00298 {
00299 StringArray command;
00300 String output;
00301 int status = -1;
00302
00303 command.push_back("/usr/bin/iconv");
00304 command.push_back("--list");
00305
00306 try
00307 {
00308 Exec::executeProcessAndGatherOutput(command, output, status);
00309 }
00310 catch(...)
00311 {
00312 }
00313
00314 if(status == 0)
00315 {
00316 return output.tokenize("\r\n");
00317 }
00318 return StringArray();
00319 }
00320 #endif
00321
00322
00323 }
00324 }
00325
00326 #endif // BLOCXX_HAVE_ICONV_H
00327
00328
00329