TagFileParser.cc

Go to the documentation of this file.
00001 /*---------------------------------------------------------------------\
00002 |                          ____ _   __ __ ___                          |
00003 |                         |__  / \ / / . \ . \                         |
00004 |                           / / \ V /|  _/  _/                         |
00005 |                          / /__ | | | | | |                           |
00006 |                         /_____||_| |_| |_|                           |
00007 |                                                                      |
00008 \---------------------------------------------------------------------*/
00012 #include <iostream>
00013 #include <fstream>
00014 #include <sstream>
00015 
00016 #include <boost/tokenizer.hpp>
00017 #include <boost/algorithm/string.hpp>
00018 
00019 #include "zypp/base/Logger.h"
00020 #include "zypp/base/PtrTypes.h"
00021 #include "zypp/base/String.h"
00022 
00023 
00024 #include "zypp/parser/tagfile/TagFileParser.h"
00025 #include "zypp/parser/tagfile/ParseException.h"
00026 
00027 
00028 #undef ZYPP_BASE_LOGGER_LOGGROUP
00029 #define ZYPP_BASE_LOGGER_LOGGROUP "TagFileParser"
00030 
00031 using namespace std;
00032 using namespace boost;
00033 
00035 namespace zypp
00036 { 
00037 
00038   namespace parser
00039   { 
00040 
00041     namespace tagfile
00042     { 
00043 
00044       void dumpRegexpResults( const boost::smatch &what )
00045       {
00046         for ( unsigned int k=0; k < what.size(); k++)
00047         {
00048           XXX << "[match "<< k << "] [" << what[k] << "]" << std::endl;
00049         }
00050       }
00051 
00052       void dumpRegexpResults2( const boost::smatch &what )
00053       {
00054         for ( unsigned int k=0; k < what.size(); k++)
00055         {
00056           DBG << "[match "<< k << "] [" << what[k] << "]" << std::endl;
00057         }
00058       }
00059 
00060       TagFileParser::TagFileParser()
00061       {
00062       }
00063 
00064       void TagFileParser::beginParse()
00065       {
00066       }
00067 
00068       void TagFileParser::endParse()
00069       {
00070       }
00071 
00072       void TagFileParser::consume( const SingleTag &tag )
00073       {
00074       }
00075 
00076       void TagFileParser::consume( const MultiTag &tag )
00077       {
00078       }
00079 
00081       //
00082       //        METHOD NAME : Parser::parse
00083       //        METHOD TYPE : void
00084       //
00085       void TagFileParser::parse( const Pathname & file_r)
00086       {
00087         // save parsed filename for debug
00088         _file_r = file_r;
00089         
00090         std::ifstream file(file_r.asString().c_str());
00091 
00092         boost::regex rxComment("^[[:space:]]*#(.*)$");
00093         boost::regex rxMStart("^\\+([^[:space:]^\\.]+)(\\.([^[:space:]]+))?:$");
00094         boost::regex rxMEnd("^\\-([^[:space:]^\\.]+)(\\.([^[:space:]]+))?:$");
00095         boost::regex rxSStart("^=([^[:space:]^\\.]+)(\\.([^[:space:]]+))?:[[:space:]]*(.*)$");
00096         boost::regex rxEmpty("^([[:space:]]*)$");
00097 
00098         if (!file) {
00099             ZYPP_THROW (ParseException( "Can't open " + file_r.asString() ) );
00100         }
00101 
00102         std::string buffer;
00103         // read vendor
00104         MIL << "Started parsing " << file_r << std::endl;
00105         beginParse();
00106         while(file && !file.eof())
00107         {
00108           getline(file, buffer);
00109           boost::smatch what;
00110           if(boost::regex_match(buffer, what, rxComment, boost::match_extra))
00111           {
00112             XXX << "comment" << std::endl;
00113             // comment # something
00114             // str::strtonum(buffer, entry_r.count);
00115             dumpRegexpResults(what);
00116           }
00117           else if(boost::regex_match(buffer, what, rxMStart, boost::match_extra))
00118           {
00119             MultiTag tag;
00120             tag.name = what[1];
00121             tag.modifier = what[3];
00122 
00123             XXX << "start list" << std::endl;
00124             dumpRegexpResults(what);
00125             // start of list +Something.lang:
00126             // lang is optional
00127             // str::strtonum(buffer, entry_r.count);
00128             std::string element;
00129             boost::smatch element_what;
00130             getline(file, element);
00131             // while we dont find the list terminator
00132             while(!file.eof())
00133             {
00134               // avoid regexping in most cases.
00135               if ( element[0] == '-' )
00136               {
00137                 if ( boost::regex_match(element, element_what, rxMEnd, boost::match_extra) )
00138                 {
00139                   // end list element? we check that it is the same as the opening tag, otherwise it is all broken!
00140                   if ( tag.name != element_what[1] )
00141                     ZYPP_THROW(ParseException("Expecting tag -" + tag.name + " for closing. Found -" + element_what[1]));
00142                   
00143                   // no problem, is a real close list tag
00144                   break;
00145                 }
00146               }
00147               
00148               // if we are in a multi tag (list), we cannot start a list inside a list, so if we find a
00149               // + sign, we check it. We dont just regexp every entry because it is very expensive
00150               if ( element[0] == '+' )
00151               {
00152                 if ( boost::regex_match(element, element_what, rxMStart, boost::match_extra) )
00153                 {
00154                   if ( tag.name != element_what[1] )
00155                     ZYPP_THROW(ParseException("MultiTag +" + element_what[1] + " started before closing +" + tag.name));
00156                   else
00157                     ZYPP_THROW(ParseException("MultiTag +" + tag.name + " duplicate opening tag"));
00158                 }
00159               }
00160               
00161               tag.values.push_back(element);
00162               XXX << element << std::endl;
00163               getline(file, element);
00164               //dumpRegexpResults(element_what);
00165             }
00166             XXX << "end list" << std::endl;
00167             consume(tag);
00168             // end of list
00169           }
00170           else if(boost::regex_match(buffer, what, rxSStart, boost::match_extra))
00171           {
00172             SingleTag tag;
00173             tag.name = what[1];
00174             tag.modifier = what[3];
00175             tag.value = what[4];
00176             XXX << "assign" << std::endl;
00177             // start of list
00178             // str::strtonum(buffer, entry_r.count);
00179             dumpRegexpResults(what);
00180             consume(tag);
00181           }
00182           else if(boost::regex_match(buffer, what, rxEmpty, boost::match_extra))
00183           {
00184             XXX << "empty line" << std::endl;
00185           }
00186           else
00187           {
00188             // https://bugzilla.novell.com/show_bug.cgi?id=160607
00189             // before we used to throw a parse error exception if we dont find
00190             // a key value line. But package descriptions usually are broken
00191             // and contain multiple lines for single line tags, etc.
00192             // so now we just skip those lines.
00193             //ZYPP_THROW(ParseException("parse error: " + buffer));
00194             ERR << "Parse error, unrecognized format [" << buffer << "]. Be sure " << _file_r << "does not contains a single tag with new lines." << std::endl;
00195           }
00196         }
00197         endParse();
00198         MIL << "Done parsing " << file_r << std::endl;
00199       }
00200 
00202     } // namespace tagfile
00205   } // namespace parser
00208 } // namespace zypp

Generated on Thu Jul 6 00:07:21 2006 for zypp by  doxygen 1.4.6