Logo Search packages:      
Sourcecode: afnix version File versions

Pattern.cpp

// ---------------------------------------------------------------------------
// - Pattern.hpp                                                             -
// - afnix:txt module - pattern description class implementation             -
// ---------------------------------------------------------------------------
// - This program is free software;  you can redistribute it  and/or  modify -
// - it provided that this copyright notice is kept intact.                  -
// -                                                                         -
// - This program  is  distributed in  the hope  that it will be useful, but -
// - without  any  warranty;  without  even   the   implied    warranty   of -
// - merchantability or fitness for a particular purpose.  In no event shall -
// - the copyright holder be liable for any  direct, indirect, incidental or -
// - special damages arising in any way out of the use of this software.     -
// ---------------------------------------------------------------------------
// - copyright (c) 1999-2007 amaury darsch                                   -
// ---------------------------------------------------------------------------

#include "Item.hpp"
#include "Vector.hpp"
#include "Pattern.hpp"
#include "Integer.hpp"
#include "Boolean.hpp"
#include "Character.hpp"
#include "QuarkZone.hpp"
#include "InputString.hpp"

namespace afnix {

  // -------------------------------------------------------------------------
  // - private section                                                       -
  // -------------------------------------------------------------------------

  // this structure permits to operate with an input stream that
  // is prefixed by a string.
  struct s_pis {
    // the input stream
    Input* p_is;
    // the prefix string
    String d_ps;
    // the prefix stream
    InputString d_ip;
    // the escape character
    t_quad d_ec;
    // the read buffer
    String d_buf;
    // construct by stream
    s_pis (Input* is) {
      p_is = is;
      d_ps = "";
      d_ec = nilq;
      reset ();
    }
    // construct by stream and prefix
    s_pis (Input* is, const String ps) {
      p_is = is;
      d_ec = nilq;
      d_ps = ps;
      reset ();
    }
    // construct by stream, prefix and escape character
    s_pis (Input* is, const String ps, const t_quad ec) {
      p_is = is;
      d_ec = nilq;
      d_ps = ps;
      d_ec = ec;
      reset ();
    }
    // reset the prefixed stream
    void reset (void) {
      d_ip.set (d_ps);
      d_buf = "";
    }
    // check for an escape character
    bool isesc (const t_quad c) const {
      if (d_ec == nilq) return false;
      return (d_ec == c);
    }
    // read a character
    t_quad rduc (void) {
      // check first the prefix
      if (d_ip.iseof () == false) {
      return d_ip.rduc ();
      }
      // check the stream
      if (!p_is) return eofc;
      t_quad uc = p_is->rduc ();
      if (uc == eofq) return uc;
      // save in the pushback buffer
      d_buf = d_buf + uc;
      return uc;
    }
    // restore the input stream
    void restore (void) {
      if (!p_is) return;
      if (d_buf.length () != 0) p_is->pushback (d_buf);
    }
    // check for a pattern and eventually consume
    bool check (const String& pat, const bool pflg) {
      // the check buffer
      String buf;
      // iterate in the pattern
      long  len = pat.length ();
      for (long  i = 0; i < len; i++) {
      // update string pattern
      t_quad uc = rduc ();
      buf = buf + uc;
      // check for escape
      if (isesc (uc) == true) {
        d_ip.pushback (buf);
        return false;
      }
      // check for position
      if (pat[i] != uc) {
        d_ip.pushback (buf);
        return false;
      }
      }
      if (pflg == true) d_ip.pushback (buf);
      return true;
    }
    // try to match with a start and end string patterns in balanced mode
    String bmode (const String& sbs, const String& ebs) {
      // check for initial matching
      if (check (sbs, false) == false) {
      restore ();
      return "";
      }
      // initialize result
      String result = sbs;
      // accumulate until end match
      while (check (ebs, true) == false) {
      // get next character or escape
      t_quad uc = rduc ();
      // chekc for eof
      if (uc == eofq) {
        restore ();
        return "";
      }
      // check for escape
      if (isesc (uc) == true) {
        t_quad nc = rduc ();
        if (nc == eofq) {
          restore ();
          return "";
        }
        result = result + uc;
        uc = nc;
      }
      // add the character
      result = result + uc;
      }
      // the match occurs
      result = result + ebs;
      return result;
    }
    // try to match with a start and end string patterns in recursive mode
    String rmode (const String& sbs, const String& ebs) {
      // the recursive counter
      long count = 0;
      // check for initial matching
      if (check (sbs, false) == false) {
      restore ();
      return "";
      }
      count++;
      // initialize result
      String result = sbs;
      // loop for data
      while (true) {
      // check for start string
      if (check (sbs, false) == true) {
        result = result + sbs;
        count++;
        continue;
      }
      // check for end string
      if (check (ebs, false) == true) {
        result = result + ebs;
        count--;
        if (count == 0) break;
        continue;
      }
      // get next character or escape
      t_quad uc = rduc ();
      // chekc for eof
      if (uc == eofq) {
        restore ();
        return "";
      }
      // check for escape
      if (isesc (uc) == true) {
        t_quad nc = rduc ();
        if (nc == eofq) {
          restore ();
          return "";
        }
        result = result + uc;
        uc = nc;
      }
      // add the character
      result = result + uc;
      }
      // the match occurs
      return result;
    }
  };

  // -------------------------------------------------------------------------
  // - class section                                                         -
  // -------------------------------------------------------------------------

  // create an empty pattern
  
00213   Pattern::Pattern (void) {
    d_mode  = REGEX;
    d_sbs   = "";
    d_ebs   = "";
    d_name  = "";
    d_rtag  = -1;
    d_escc  = nilq;
  }

  // create a regex pattern by string
  
00224   Pattern::Pattern (const String& re) {
    d_mode  = REGEX;
    d_regex = re;
    d_sbs   = "";
    d_ebs   = "";
    d_name  = "";
    d_rtag  = -1;
    d_escc  = nilq;
  }

  // create a regex pattern by name and string
  
00236   Pattern::Pattern (const String& name, const String& re) {
    d_mode  = REGEX;
    d_regex = re;
    d_sbs   = "";
    d_ebs   = "";
    d_name  = name;
    d_rtag  = -1;
    d_escc  = nilq;
  }

  // create a pattern by name and regex
  
00248   Pattern::Pattern (const String& name, const Regex& re) {
    d_mode  = REGEX;
    d_regex = re;
    d_sbs   = "";
    d_ebs   = "";
    d_name  = name;
    d_rtag  = -1;
    d_escc  = nilq;
  }

  // create a balanced pattern by name, control string and escape character
  
00260   Pattern::Pattern (const String& name, const String& cs, const t_quad escc) {
    d_mode  = BMODE;
    d_sbs   = cs;
    d_ebs   = cs;
    d_name  = name;
    d_rtag  = -1;
    d_escc  = escc;
  }

  // create a balanced pattern by name, control strings and escape character
  
00271   Pattern::Pattern (const String& name, const String& sbs, const String& ebs,
                const t_quad escc) {
    d_mode  = BMODE;
    d_sbs   = sbs;
    d_ebs   = ebs;
    d_name  = name;
    d_rtag  = -1;
    d_escc  = escc;
  }

  // create a balanced pattern by name and control strings
  
00283   Pattern::Pattern (const String& name, const String& sbs, const String& ebs) {
    d_mode  = BMODE;
    d_sbs   = sbs;
    d_ebs   = ebs;
    d_name  = name;
    d_rtag  = -1;
    d_escc  = nilq;
  }

  // create a pattern by name, control strings and flags
  
00294   Pattern::Pattern (const String& name, const String& sbs, const String& ebs,
                const bool rfl) {
    d_mode  = rfl ? RMODE : BMODE;
    d_sbs   = sbs;
    d_ebs   = ebs;
    d_name  = name;
    d_rtag  = -1;
    d_escc  = nilq;
  }

  // copy construct this regex element

00306   Pattern::Pattern (const Pattern& that) {
    that.rdlock ();
    d_mode  = that.d_mode;
    d_regex = that.d_regex;
    d_sbs   = that.d_sbs;
    d_ebs   = that.d_ebs;
    d_name  = that.d_name;
    d_rtag  = that.d_rtag;
    d_escc  = that.d_escc;
    unlock ();
  }

  // assign a pattern to this one

00320   Pattern& Pattern::operator = (const Pattern& that) {
    // make sure the regex elements are not equal
    if (this == &that) return *this;
    // lock this and that
    that.rdlock ();
    wrlock ();
    // assign the pattern to this one
    d_mode  = that.d_mode;
    d_regex = that.d_regex;
    d_sbs   = that.d_sbs;
    d_ebs   = that.d_ebs;
    d_name  = that.d_name;
    d_rtag  = that.d_rtag;
    d_escc  = that.d_escc;
    // unlock everything
    unlock ();
    that.unlock ();
    return *this;
  }

  // return the class name

00342   String Pattern::repr (void) const {
    return "Pattern";
  }

  // set the pattern with a regex string

00348   void Pattern::setregex (const String& re) {
    wrlock ();
    try {
      d_mode  = REGEX;
      d_regex = re;
      d_sbs   = "";
      d_ebs   = "";
      unlock ();
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // set the pattern with a regex object

00364   void Pattern::setregex (const Regex& re) {
    wrlock ();
    try {
      d_mode  = REGEX;
      d_regex = re;
      d_sbs   = "";
      d_ebs   = "";
      unlock ();
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // set the pattern with the balanced control strings

00380   void Pattern::setbcs (const String& sbs, const String& ebs) {
    wrlock ();
    d_mode  = BMODE;
    d_regex = "";
    d_sbs   = sbs;
    d_ebs   = sbs;
    unlock ();
  }

  // set the pattern with the escape character

00391   void Pattern::setesc (const t_quad escc) {
    wrlock ();
    d_escc = escc;
    unlock ();
  }

  // return the escape character

00399   t_quad Pattern::getesc (void) const {
    rdlock ();
    t_quad escc = d_escc;
    unlock ();
    return escc;
  }

  // set the pattern name
  
00408   void Pattern::setname (const String& name) {
    wrlock ();
    d_name = name;
    unlock ();
  }

  // return the pattern name
  
00416   String Pattern::getname (void) const {
    rdlock ();
    String result = d_name;
    unlock ();
    return result;
  }

  // set the pattern tag

00425   void Pattern::settag (const long rtag) {
    wrlock ();
    d_rtag = rtag;
    unlock ();
  }
   
  // return the pattern tag
  
00433   long Pattern::gettag (void) const {
    rdlock ();
    long result = d_rtag;
    unlock ();
    return result;
  }

  // check a string with this pattern

00442   bool Pattern::check (const String& s) const {
    // lock and check for recursive consistency mode
    rdlock ();
    if ((d_mode == RMODE) && (d_sbs == d_ebs)) {
      unlock ();
      throw Exception ("pattern-error", "recursive mode with same delimiters");
    }
    try {
      bool result = false;
      switch (d_mode) {
      case REGEX:
      result = (d_regex == s);
      break;
      case BMODE: 
      {
        s_pis pis (nilp, s, d_escc);
        result = (pis.bmode (d_sbs, d_ebs) == s);
      }
      break;
      case RMODE: 
      {
        s_pis pis (nilp, s, d_escc);
        result = (pis.rmode (d_sbs, d_ebs) == s);
      }
      break;
      }
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // match an input stream with this pattern

00478   String Pattern::match (Input* is) const {
    return match (is, "");
  }

  // match an input stream with this pattern and a prefix

00484   String Pattern::match (Input* is, const String& ps) const {
    // lock and check for recursive consistency mode
    rdlock ();
    if ((d_mode == RMODE) && (d_sbs == d_ebs)) {
      unlock ();
      throw Exception ("pattern-error", "recursive mode with same delimiters");
    }
    try {
      String result = "";
      switch (d_mode) {
      case REGEX:
      result = d_regex.match (is, ps);
      break;
      case BMODE: 
      {
        s_pis pis (is, ps, d_escc);
        result = pis.bmode (d_sbs, d_ebs);
      }
      break;
      case RMODE: 
      {
        s_pis pis (is, ps, d_escc);
        result = pis.rmode (d_sbs, d_ebs);
      }
      break;
      }
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // -------------------------------------------------------------------------
  // - object section                                                        -
  // -------------------------------------------------------------------------

  // the object eval quarks
  static const long QUARK_REGEX     = String::intern ("REGEX");
  static const long QUARK_PATTERN   = String::intern ("Pattern");
  static const long QUARK_BALANCED  = String::intern ("BALANCED");
  static const long QUARK_RECURSIVE = String::intern ("RECURSIVE");

  // the quark zone
  static const long QUARK_ZONE_LENGTH = 10;
  static QuarkZone  zone (QUARK_ZONE_LENGTH);

  // the object supported quarks
  static const long QUARK_CHECK     = zone.intern ("check");
  static const long QUARK_MATCH     = zone.intern ("match");
  static const long QUARK_SETBCS    = zone.intern ("set-balanced");
  static const long QUARK_SETESC    = zone.intern ("set-escape");
  static const long QUARK_GETESC    = zone.intern ("get-escape");
  static const long QUARK_SETPTAG   = zone.intern ("set-tag");
  static const long QUARK_GETPTAG   = zone.intern ("get-tag");
  static const long QUARK_SETNAME   = zone.intern ("set-name");
  static const long QUARK_GETNAME   = zone.intern ("get-name");
  static const long QUARK_SETREGEX  = zone.intern ("set-regex");

  // evaluate a quark statically

00546   Object* Pattern::meval (Runnable* robj, Nameset* nset, const long quark) {
    if (quark == QUARK_REGEX) 
      return new Item (QUARK_PATTERN, QUARK_REGEX);
    if (quark == QUARK_BALANCED) 
      return new Item (QUARK_PATTERN, QUARK_BALANCED);
    if (quark == QUARK_RECURSIVE) 
      return new Item (QUARK_PATTERN, QUARK_RECURSIVE);
    throw Exception ("eval-error", "cannot evaluate member",
                 String::qmap (quark));
  }

  // create a new object in a generic way

00559   Object* Pattern::mknew (Vector* argv) {
    long argc = (argv == nilp) ? 0 : argv->length ();
    // check for 0 argument
    if (argc == 0) return new Pattern;
    // check for 1 argument
    if (argc == 1) {
      String re = argv->getstring (0);
      return new Pattern (re);
    }
    // check for 2 arguments
    if (argc == 2) {
      // get the regex name
      String name = argv->getstring (0);
      // get the object and dispatch
      Object* obj = argv->get (1);
      // check for a string
      String* sobj = dynamic_cast <String*> (obj);
      if (sobj != nilp) return new Pattern (name, *sobj);
      // check for a regex
      Regex* robj = dynamic_cast <Regex*> (obj);
      if (robj != nilp) return new Pattern (name, *robj);
      throw Exception ("argument-error", "invalid arguments with pattern");
    }
    // check for 3 arguments
    if (argc == 3) {
      String name = argv->getstring (0);    
      String cs   = argv->getstring (1);
      // get the object and select
      Object* obj = argv->get (2);
      // check for a string
      String* sobj = dynamic_cast <String*> (obj);
      if (sobj != nilp) return new Pattern (name, cs, *sobj);
      // check for a character
      Character* cobj = dynamic_cast <Character*> (obj);
      if (cobj != nilp) {
      t_quad escc = cobj->toquad ();
      return new Pattern (name, cs, escc);
      }
      throw Exception ("argument-error", "invalid arguments with pattern");
    }
    // check for 4 arguments
    if (argc == 4) {
      String name = argv->getstring (0);    
      String sbs  = argv->getstring (1);
      String ebs  = argv->getstring (2);
      // get the object and select
      Object* obj = argv->get (3);
      // check for a character
      Character* cobj = dynamic_cast <Character*> (obj);
      if (cobj != nilp) {
      t_quad escc = cobj->toquad ();
      return new Pattern (name, sbs, ebs, escc);
      }
      // check for a boolean
      Boolean* bobj = dynamic_cast <Boolean*> (obj);
      if (bobj != nilp) {
      bool rfl = bobj->toboolean ();
      return new Pattern (name, sbs, ebs, rfl);
      }
      throw Exception ("argument-error", "invalid arguments with pattern");
    }
    throw Exception ("argument-error", "too many arguments with pattern");
  }

  // return true if the given quark is defined

00625   bool Pattern::isquark (const long quark, const bool hflg) const {
    rdlock ();
    if (zone.exists (quark) == true) {
      unlock ();
      return true;
    }
    bool result = hflg ? Object::isquark (quark, hflg) : false;
    unlock ();
    return result;
  }

  // apply this object with a set of arguments and a quark
  
00638   Object* Pattern::apply (Runnable* robj, Nameset* nset, const long quark,
                    Vector* argv) {
    // get the number of arguments
    long argc = (argv == nilp) ? 0 : argv->length ();

    // dispatch 0 argument
    if (argc == 0) {
      if (quark == QUARK_GETESC)  return new Character (getesc ());
      if (quark == QUARK_GETNAME) return new String    (getname ());
      if (quark == QUARK_GETPTAG) return new Integer   (gettag  ());
    }
    // dispatch 1 argument
    if (argc == 1) {
      if (quark == QUARK_SETNAME) {
      String name = argv->getstring (0);
      setname (name);
      return nilp;
      }
      if (quark == QUARK_SETPTAG) {
      long tag = argv->getint (0);
      settag (tag);
      return nilp;
      }
      if (quark == QUARK_SETESC) {
      t_quad escc = argv->getchar (0);
      setesc (escc);
      return nilp;
      }
      if (quark == QUARK_CHECK) {
      String sval = argv->getstring (0);
      return new Boolean (check (sval));
      }
      if (quark == QUARK_MATCH) {
      Object*  obj = argv->get (0);
      // check for a stream
      Input*  iobj = dynamic_cast <Input*> (obj);
      if (iobj != nilp) return new String (match (iobj));
      // check for a string
      String* sobj = dynamic_cast <String*> (obj);
      if (sobj != nilp) return new String (match (nilp, *sobj));
      // argument error
      throw Exception ("type-error", "invalid object with match ", 
                   Object::repr (obj));
      }
      if (quark == QUARK_SETREGEX) {
      Object* obj = argv->get (0);
      String* sre = dynamic_cast <String*> (obj);
      if (sre != nilp) {
        setregex (*sre);
        return nilp;
      }
      Regex* ore = dynamic_cast <Regex*> (obj);
      if (ore != nilp) {
        setregex (*ore);
        return nilp;
      }
      throw Exception ("type-error", "invalid object with set-regex ", 
                   Object::repr (obj));
      }
      if (quark == QUARK_SETBCS) {
      String bs = argv->getstring (0);
      setbcs (bs, bs);
      return nilp;
      }
    }
    // dispatch 2 arguments
    if (argc == 2) {
      if (quark == QUARK_MATCH) {
      // get the input stream
      Object*  obj = argv->get (0);
      Input*  iobj = dynamic_cast <Input*> (obj);
      // get the prefix
      String ps = argv->getstring (1);
      if (iobj != nilp) return new String (match (iobj, ps));
      throw Exception ("type-error", "invalid object with match ", 
                   Object::repr (obj));
      }
      if (quark == QUARK_SETBCS) {
      String sbs = argv->getstring (0);
      String ebs = argv->getstring (1);
      setbcs (sbs, ebs);
      return nilp;
      }
    }
    // call the object method
    return Object::apply (robj, nset, quark, argv);
  }
}

Generated by  Doxygen 1.6.0   Back to index