# Copyright 2006-2008 The FLWOR Foundation.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
# http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

                       Zorba Cookbook
                       --------------
                        June 26, 2007
                        Revision 1.0

                        Paul Pedersen








Table of Contents
-----------------

	1. Evaluation data flow
	2. Example output
	3. Building an iterator
	4. Cookbook (arithmetics example)
		4.1 STEP 1 implement functor class
		4.2 STEP 2 implement iterator class
		4.3 STEP 3 verify codegen
		4.4 STEP 4 verify normalization
		4.5 STEP 5 verify parsing
	5. Zorba runtime
	6. Codegen: expression tree to iterator tree
	7. Normalization: Syntax tree to expression tree
	8. Parsing: input expression to syntax tree
	9. Function objects

 




1. Evaluation data flow
-----------------------

The zorba query evaluation data flow consists of
these steps:
 
          ___________________
         |                   |
         |   query source    |
         |___________________|
                  |
                  | PARSE  
                  |
          ________v__________
         |                   |
         |    syntax tree    |
         |___________________|
                  |
                  |
                  | NORMALIZE
                  |      __________
                  |     |          |
          ________v_____v____      |
         |                   |     | OPTIMIZE
         |  expression tree  |     |
         |___________________|     |
                  |     |          |
                  |     |__________|
                  |
                  | CODE GEN
                  |
          ________v__________
         |                   |
         |   iterator plan   |
         |___________________|
                  |
                  | EVALUATE
                  |
          ________v__________
         |                   |
         |   XDM instance    |
         |___________________|
                  |
                  | SERIALIZE
                  |
          ________v__________
         |                   |
         |    XML output     |
         |___________________|


The complete sequence of steps performed during evaluation
are as follows (from runtime/plan_visitor_test.cpp):

      /*
       *  Parse the contents of file 'argv'
       *  and pretty-print the syntax tree.
       */
      driver.parse(*argv);
      parsenode* n_p = driver.get_expr();
      cout << "\nSyntax tree:\n";
      n_p->put(cout) << endl;

      /*
       *  Normalize the syntax tree, creating
       *  the corresponding expression tree.
       *  ('zorp' is the zorba runtime context pointer.)
       */
      MainModule* mm_p;
      QueryBody* qb_p;
      Expr* ex_p;

      // strip away the syntax prolog wrappers
      if ((mm_p = dynamic_cast<MainModule*>(n_p))==NULL) {
        cout << "Parse error: expecting MainModule\n";
        return -1;
      }
      if ((qb_p = dynamic_cast<QueryBody*>(&*mm_p->get_query_body()))==NULL) {
        cout << "Parse error: expecting MainModule->QueryBody\n";
        return -1;
      }
      if ((ex_p = dynamic_cast<Expr*>(&*qb_p->get_expr()))==NULL) {
        cout << "Parse error: expecting MainModule->QueryBody->Expr\n";
        return -1;
      }

      // run the normalize visitor
      cout << "Expression tree:\n";
      ex_p->accept(nvs);
      rchandle<expr> e_h = nvs.pop_nodestack();

      // pretty-print the expression tree.
      cout << endl;
      if (e_h==NULL) { cout << "e_h==NULL\n"; return -1; }
      e_h->put(zorp,cout) << endl;

      /*
       *  Run the plan_visitor code gen
       */
      cout << "Codegen:\n";
      plan_visitor pvs(zorp);
      e_h->accept(pvs);
      iterator_t it_h = pvs.pop_itstack();
      cout << endl;

      /*
       *  Sanity check: print iterator rtti info 
       */
      cout << "iterator type = " << typeid(*it_h).name() << endl;

      /*
       *  Run the iterator
       */
      cout << "\nIterator run:\n";
      if (it_h==NULL) { cout << "it_h==NULL\n"; return -1; }
      it_h->open();
      while (!it_h->done()) {
        item_t i_p = it_h->next();
        if (i_p==NULL) continue;
        i_p->put(zorp,cout) << endl;
      }
      it_h->close();
    }


A simple command line shell caninvoke this code with:

      while read c; do
        echo $c >test.xqy;
        Build/plan_visitor_test test.xqy
      done






2. Example output
-----------------

      $ ./cmdline
      1 + 2                             // input
      
      library.cpp:114::init :           // fo library function ref
        fn_doc_key = 3375376522981177514
      
      NumericLiteral [int]              // parser trace 
      Literal [numeric]                 //  (parser/xquery_parser.y)
      PrimaryExpr [literal]
      FilterExpr [primary]
      StepExpr [filter]
      RelativePathExpr [step]
      PathExpr [relative]
      ValueExpr [path]
      UnaryExpr [value]
      CastExpr [unary]
      CastableExpr [cast]
      TreatExpr [castable]
      InstanceofExpr [treat]
      IntersectExceptExpr [instanceof]
      UnionExpr [interexcept]
      MultiplicativeExpr [union]
      AdditiveExpr [mult]
      NumericLiteral [int]
      Literal [numeric]
      PrimaryExpr [literal]
      
      FilterExpr [primary]
      StepExpr [filter]
      RelativePathExpr [step]
      PathExpr [relative]
      ValueExpr [path]
      UnaryExpr [value]
      CastExpr [unary]
      CastableExpr [cast]
      TreatExpr [castable]
      InstanceofExpr [treat]
      IntersectExceptExpr [instanceof]
      UnionExpr [interexcept]
      MultiplicativeExpr [union]
      AdditiveExpr [mult+mult]
      RangeExpr [add]
      FTContainsExpr [range]
      ComparisonExpr [ftcontains]
      AndExpr [comp]
      OrExpr [and]
      ExprSingle [OrExpr]
      Expr [single]
      QueryBody [expr]
      MainModule [querybody]
      Module [main]
      
      Syntax tree:                     // pretty-print syntax tree
        MainModule[                    //  (parser/parsenodes.cpp)
          QueryBody[
            Expr[
              AdditiveExpr[
                NumericLiteral[num_integer=1]
                plus
                NumericLiteral[num_integer=2]
                ]
              ]
            ]
          ]
      
                                      // trace normalize visitor
      Expression tree:                //  (exprtree/normalize_visitor.cpp)
       -normalize_visitor.cpp:859::begin_visit: Expr
      |  -normalize_visitor.cpp:665::begin_visit: AdditiveExpr
      | |  -normalize_visitor.cpp:967::begin_visit: NumericLiteral
      | |  -normalize_visitor.cpp:2260::end_visit: NumericLiteral
      | |  -normalize_visitor.cpp:967::begin_visit: NumericLiteral
      | |  -normalize_visitor.cpp:2260::end_visit: NumericLiteral
      |  -normalize_visitor.cpp:2023::end_visit: AdditiveExpr
       -normalize_visitor.cpp:2159::end_visit: Expr
      
                                      // pretty-print expression-tree
                                      //  (exprtree/normalize_visitor.cpp)
      fo_expr[
        xs_qname([http://www.w3.org/2005/xpath-functions]op:op_add)
          integer[1      ]
          integer[2      ]
        ]
      
                                      // trace plan-visitor
      Codegen:                        //  (runtime/plan_visitor.cpp)
       -plan_visitor.cpp:87::begin_visit: fo_expr
      |  -plan_visitor.cpp:160::begin_visit: literal_expr
      |  -plan_visitor.cpp:339::end_visit: literal_expr
      |  -plan_visitor.cpp:160::begin_visit: literal_expr
      |  -plan_visitor.cpp:339::end_visit: literal_expr
       -plan_visitor.cpp:266::end_visit: fo_expr
      
                                      // print iterator RTTI info
      iterator type = N3xqp23op_numeric_add_iteratorE
      
      Iterator run:                   // run the iterator
                                      //  (runtime/item_iterator.h)
      integer(3)                      // (serialized) output
      





3. Building an iterator
-----------------------

Building an iterator involves

  1.  Writing code which transforms a syntax tree into
      the corresponding expression tree.

  2.  Writing code which transforms an expression tree
      into an iterator tree.

  3.  Writing the iterators

  4.  Testing the iterators by calling 'next' until 'done'.

In this document we trace the steps involved in building
one arithmetic iterator representing the plan for evaluating
arithmetic expressions like 1 + (2 + 3).   The current
codebase has many gaps and only some simple expression are
available for complete evaluation.






4. Cookbook (arithmetics example)
---------------------------------
In the example of arithmetic expressions the basic steps are
as follows:


STEP 1
------
Overloading operator().  For example, the functor for op:add
(see functions/Numerics.{h,cpp}):

      // 6.2.1 op:numeric-add
      // --------------------
      class op_numeric_add : public function
      {
      public:
        op_numeric_add() {}
        op_numeric_add(const signature&);
        ~op_numeric_add() {}
      
      public:
        iterator_t operator()(zorba*,std::vector<iterator_t>&) const;
        sequence_type_t type_check(signature&) const;
        bool validate_args(std::vector<iterator_t>&) const;
      };
      

      // here is the implemetation:
      iterator_t op_numeric_add::operator()(
        zorba* zorp,
        vector<iterator_t>& argv) const
      {
        if (!validate_args(argv)) return NULL;
        return new op_numeric_add_iterator(argv[0], argv[1]);
      }



STEP 2
------
Implement the iterator returned by the functor
(see functions/NumericsImpl.{h,cpp}):


      // binary iterator base class
      class op_numeric_binary_iterator : public basic_iterator
      {
      public:
        op_numeric_binary_iterator(iterator_t, iterator_t);
        virtual ~op_numeric_binary_iterator() {}
      
      public:  // iterator interface
        virtual void _open();
        virtual void _close();
        virtual item_t _next() = 0;
        virtual bool done() const;
      
      protected:
        iterator_t arg0;
        iterator_t arg1;
      };
      
      // 6.2.1 op:numeric-add
      // --------------------
      class op_numeric_add_iterator : public op_numeric_binary_iterator
      {
      public:
        op_numeric_add_iterator(iterator_t, iterator_t);
        ~op_numeric_add_iterator() {}
      
      public:  // iterator interface
        item_t _next();
      
      };

(Notice that _open(), _close(), and done() are all inherited by
op_numeric_add_iterator.)

The work is concentrated in the '_next()' implementation:
(see functions/NumericsImpl.cpp):

      item_t op_numeric_add_iterator::_next()
      {
        const numericValue& n0 = dynamic_cast<const numericValue&>(*arg0->next());
        const numericValue& n1 = dynamic_cast<const numericValue&>(*arg1->next());
        return new numericValue(xs_decimal, n0.val() + n1.val());
      }

The implemetation call 'next' on each components, cast to numericValue, and
then returns a new numericValue item containing the sum.  In other words,
as usual,

      eval(a '+' b) = eval(a) + eval(b).


STEP 3
------
Verify that 'plan_visitor' generates the iterators from
the corresponding exprtree nodes.


STEP 4
------
Verify that 'normalize_visitor' generates the correct expression
tree from the corresponding syntax nodes.


STEP 5
------
Verify that 'xquery_parser' generates the correct syntax tree
from the corresponding input expression string.






5. Zorba runtime
----------------

The zorba runtime uutputs results by calling the 'next'
method for the top node of an iterator tree.

The iterator interface:

      class basic_iterator : public rcobject
      {
      protected:
        zorba* zorp;
        bool open_b;
      
      public:
        basic_iterator()
          : zorp(NULL), open_b(false) {}
        basic_iterator(zorba* _zorp)
          : zorp(_zorp), open_b(false) {}
        basic_iterator(const basic_iterator& it)
          : zorp(it.zorp), open_b(it.open_b) {}
        virtual ~basic_iterator() {}
      
      public:    // inline base logic
        void open()
        {
          assert(!open_b);
          open_b = true;
          _open();
        }
      
        item_t next()
        {
          assert(open_b);
          return _next(); 
        }
      
        void close()
        {
          assert(open_b);
          open_b = false;
          _close();
        }
      
        bool is_open() const
        {
          return open_b;
        }
      
        virtual bool done() const = 0;
      
      /*
       *  Dispatch to these methods in the
       *  implementation classes.
       */
      protected:
        virtual void   _open() = 0;
        virtual item_t _next() = 0;
        virtual void   _close() = 0;
      
      };


The methods open/close are used to acquire and release resources used 
by the iterator when it runs.   An iterator can be allocated and 
passed around before being run.   It acts like a "prepared statement"
and can be reused by calling "close" then "open" with new variable
bindings in the context.






6. Codegen: expression tree to iterator tree
--------------------------------------------

The transformation occurs in the 'plan_visitor" code
(runtime/plan_visitor.{h,cpp}):


  bool plan_visitor::begin_visit(const fo_expr& v)
  {
  #ifdef DEBUG
    cout << indent[++depth] << TRACE << ": fo_expr" << endl;
  #endif

    itstack.push(NULL);
    return true;
  }

  ...

  void plan_visitor::end_visit(const fo_expr& v)
  {
  #ifdef DEBUG
    cout << indent[depth--] << TRACE << ": fo_expr" << endl;
  #endif

    const function* func_p = v.get_func();
    assert(func_p!=NULL);
    const function& func = *func_p;
    
    vector<iterator_t> argv;
    while (true) {
      iterator_t it_h = pop_itstack();
      if (it_h==NULL) break;
        argv.push_back(it_h);
    }
    itstack.push(func(zorp,argv));
  }


The arithmetic expressions are all backed up by fo_operators,
therefore the plan for an arithmetic expression consists of
an iterator node for the function.

The plan_visitor marks the plan stack during 'begin_visit' and
then lazily 'applies' the function to arguments appearing
in the stack frame on 'end_visit'.  The loop pops the (recursively
constructed) iterators for the arguments into an arg vector,
then constructs the function application iterator plan:

    itstack.push(func(zorp,argv));

This expression uses the overloaded operator() method of the
function references object 'func', which returns an iterator
handle, iterator_t.  This result is pushed on the plan_visitor
stack.   When the plan_visitor traversal completes, the stack
should contain one iterator, which is the root of the complete
iterator tree.

The begin_visit and end_visit methods are called during the
expression tree traversal initiated by calling

    expr_t->accept(plan_visitor).

Begin_visit returns a boolean value which the expression tree
'accept' method use to decide whether 'end_visit' needs to
be invoked (see: exprtree/expr.cpp):

      void fo_expr::accept(expr_visitor& v) const
      {
        if (!v.begin_visit(*this)) return;
        vector<expr_t>::const_iterator it = begin();
        for (; it!=end(); ++it) {
          (*it)->accept(v);
        }
        v.end_visit(*this);
      }






7. Normalization: Syntax tree to expression tree
------------------------------------------------

    // zorba normalization (file exprtree/normalize_visitor.h)
    class normalize_visitor : public parsenode_visitor
    {
    public:
      typedef rchandle<expr> exprref;
    
    protected:
      zorba* zorp;
      std::stack<exprref> nodestack;
      std::stack<exprref> argstack;
      std::stack<exprref> pstack;
      fxcharheap sheap;
    
    public:
      normalize_visitor(zorba*);
      ~normalize_visitor() {}
    
    public:
      exprref pop_nodestack();
      void clear_argstack();
      void clear_pstack();
    
    public:
     /*..........................................
       :  begin visit                            :
       :.........................................*/
      bool begin_visit(parsenode const&);
      bool begin_visit(AbbrevForwardStep const&);
      bool begin_visit(AnyKindTest const&);
      bool begin_visit(AposAttrContentList const&);
      bool begin_visit(AposAttrValueContent const&);
      bool begin_visit(ArgList const&);
      bool begin_visit(AtomicType const&);
      ...
    
     /*..........................................
       :  end visit                              :
       :.........................................*/
      void end_visit(parsenode const&);
      void end_visit(AbbrevForwardStep const&);
      void end_visit(AnyKindTest const&);
      void end_visit(AposAttrContentList const&);
      void end_visit(AposAttrValueContent const&);
      void end_visit(ArgList const&);
      void end_visit(AtomicType const&);
      ...
    };

The data flow through normalize_visitor for the query
'fn:doc("sample.xml")' looks like this:

     -normalize_visitor.cpp:899::begin_visit: Expr
    |  -normalize_visitor.cpp:930::begin_visit: FunctionCall
    | |  -normalize_visitor.cpp:87::begin_visit: ArgList
    | | |  -normalize_visitor.cpp:1061::begin_visit: StringLiteral
    | | |  -normalize_visitor.cpp:2268::end_visit: StringLiteral
    | |  -normalize_visitor.cpp:1440::end_visit: ArgList
    |  -normalize_visitor.cpp:2125::end_visit: FunctionCall : argstack.size() = 1
     -normalize_visitor.cpp:2087::end_visit: Expr

with output expression tree:

    fo_expr[xs_qname([http://www.w3.org/2005/xpath-functions]fn:fn_doc)
      string[ "sample.xml" ]
    ]






8. Parsing: input expression to syntx tree
------------------------------------------

The input expression is handed the an 'xquery_driver' class
which invokes the flex/bison scanner/parser subsystem:

    // zorba query driver (file 'parser/xquery_driver.h')
    xquery_driver driver(log_stream);
  
    driver.trace_parsing = true;
    driver.trace_scanning = true;
  
    driver.parse(expression_string);
    parsenode* n_p = driver.get_expr();


Currently the 'xquery_driver' class holds a simple log stream and 
minimal disgnostic options (shown above).  The driver expects either 
an expression string or an input stream where it can read the 
expression.  In the cases of a parse error, the driver will emit a log 
message and exit.  In the absence of error the driver concludes the 
parse holding a local parse tree consisting of heap-allocated parse 
nodes: 

    // zorba syntax tree (file 'parser/parsenodes.h')
    class parsenode : public rcobject
    {
    protected:
      yy::location loc;  // source code line number
    
    public:
      parsenode(yy::location const& _loc) : loc(_loc) { }
      ~parsenode() { }
    
    public:
      yy::location get_location() const { return loc; }
      virtual std::ostream& put(std::ostream&) const;
      virtual void accept(parsenode_visitor&) const = 0;
    };






9. Function objects
-------------------

The iterator plan contains nodes corresponding to function
objects (see: 'functions/function.h'):

      class function : public rcobject
      {
      protected:
        signature sig;    // (see: functions/signature.h)
        
      public:
        function() {}
        function(const signature& _sig) : sig(_sig) {}
        virtual ~function() {}
      
      public:
        // XQuery signature (name+arity)
        const qname* get_fname() const { return sig.get_name(); }
        void set_signature(signature& _sig) { sig = _sig; }
      
        // codegen: functor specification
        virtual iterator_t operator()(zorba*,std::vector<iterator_t>& argv) const = 0;
      
        // XQuery polymorphic type inference
        virtual sequence_type_t type_check(signature&) const = 0;
      
      };


