<?php

  // TODO:
  // ----
  // full checks of running off end of strings
  // search highlighting
  // fix multiple output modes
  // nbsp processing
  // [[] broken
  // investigate nl2br() instead of slow homebrew crap

  if (!defined("SDA_VALIDENTRY"))
    die();

  //require_once("inc_globals.php");
  require_once("inc_parser_tag_defs.php");

  // add more output modes (parse targets) here ...
  define ("SDA_PARSER_OPMODE_HTML", 1);

  define ("SDA_PARSER_CONTEXTBLOCK_LEN", 128);

  // error classes
  define ("SDA_PARSER_ERROR_MASTERTAG", 1);
  define ("SDA_PARSER_ERROR_HYPERLINK", 2);
  define ("SDA_PARSER_ERROR_UNICODE",   3);
  // error messages (english, this should be populated by some sort of external localisation module for other languages)
  $parser_emsgs=array();
  $parser_emsgs[SDA_PARSER_ERROR_MASTERTAG]="%0 tag may only be used inside %1 tags.";
  $parser_emsgs[SDA_PARSER_ERROR_HYPERLINK]="%0 is an illegal URL protocol specifier.";
  $parser_emsgs[SDA_PARSER_ERROR_UNICODE]  ="%0 is an illegal Unicode escape.";
  class SDACLS_ParserError {
    var $contextblock; // beware: this isn't preformatted
    var $type;
    var $data;
    function SDACLS_ParserError() { // constructor
      $this->data=array();
    }
  }
  function sda_parser_error_report(&$error) {
    global $parser_emsgs;
    $tmp=$parser_emsgs[$error->type];
    $tmp2=array();
    // substitute %* for entries from $error->data
    for ($i=0;$i<count($error->data);$i++) { $tmp2["%".$i]=sda_htmlentities($error->data[$i]); }
    $tmp=strtr($tmp,$tmp2);
    //return "ERROR: ".$tmp."<br>"."Context:<br>...".sda_parser_cr_to_br(sda_htmlentities($error->contextblock))."...";
    return "ERROR: ".$tmp."<br>"."Context:<br>... ".sda_htmlentities_2($error->contextblock,SDA_PARSER_CRMODE_BR)." ...";
  }

  $sda_linebreak="\n";

  class SDACLS_ParserTag {
    var $id;            // the identifier after '[', so 'hr', 'x', 'b' and so on
    var $needs_closing; // is it a [x] [/x] style tag or a [x] style one ?
    var $extra_data;    // do we expect extra data, as in the URL in [a http://url.com] [/a] ?
    var $master_tags;   // some tags are only valid inside other ones ... i.e. [ul] [li] [/ul] ...
                        // this contains an array, a list of valid master tags, identified by their
                        // indices in the $sda_parser_tags array (otherwise NULL)
  }

  // define tags used
  require_once("inc_parser_tag_defs.php");

  class SDACLS_ParserStackItem {
    var $tagnum;       // numerical index into the $sda_parser_tags array
    var $type;         // 1=open, 2=close
    var $strpos_otag;  // string position of first character of opening tag:
                       //    stuff [a http://url.com] stuff [/a] stuff
                       //          ^ here
    var $strpos_edata; // string position of first character of extra data:
                       //    stuff [a http://url.com] stuff [/a] stuff
                       //             ^ here
    var $len_edata;    // length of extra data
    var $strpos_body;  // string position of first character of tag body:
                       //    stuff [a http://url.com] stuff [/a] stuff
                       //                            ^ here
    var $strpos_ctag;  // string position of first character of closing tag:
                       //    stuff [a http://url.com] stuff [/a] stuff
                       //                                   ^ here
    var $strpos_ptag;  // string position of first character of post-tag section:
                       //    stuff [a http://url.com] stuff [/a] stuff
                       //                                       ^ here
    function toString() {
      $t =  "SDACLS_ParserStackItem->toString(): -------->\n";
      $t .= "tagnum=" . $this->tagnum . "\n";
      $t .= "type=" . $this->type . "\n";
      $t .= "strpos_otag=" . $this->strpos_otag . "\n";
      $t .= "strpos_edata=" . $this->strpos_edata . "\n";
      $t .= "len_edata=" . $this->len_edata . "\n";
      $t .= "strpos_body=" . $this->strpos_body . "\n";
      $t .= "strpos_ctag=" . $this->strpos_ctag . "\n";
      $t .= "strpos_ptag=" . $this->strpos_ptag . "\n";
      return $t;
    }
    // implement this manually in case we want to run it
    // on a PHP 4 installation
    function cl0ne() {
      $clone = new SDACLS_ParserStackItem;
      if (isset($this->tagnum))       { $clone->tagnum=$this->tagnum;             }
      if (isset($this->type))         { $clone->type=$this->type;                 }
      if (isset($this->strpos_otag))  { $clone->strpos_otag=$this->strpos_otag;   }
      if (isset($this->strpos_edata)) { $clone->strpos_edata=$this->strpos_edata; }
      if (isset($this->len_edata))    { $clone->len_edata=$this->len_edata;       }
      if (isset($this->strpos_body))  { $clone->strpos_body=$this->strpos_body;   }
      if (isset($this->strpos_ctag))  { $clone->strpos_ctag=$this->strpos_ctag;   }
      if (isset($this->strpos_ptag))  { $clone->strpos_ptag=$this->strpos_ptag;   }
      return $clone;
    }
//    function SDACLS_ParserStackItem() {
//      $this->strpos_edata=-1;
//      $this->len_edata=-1;
//    }
  }

/*
  function sda_parser_wrapper($a) {
die('fixme');
    $e=array();
    return sda_parser_wrapper_2($a,$e,0,"sda");
  }

  function sda_parser_wrapper_2($a,$b,$c,$d) {
die('fixme');
    return sda_parser(sda_parser_decode_utf8($a),$b,$c);
  }
*/

  function sda_parser_wrapper($s) {
    $e=FALSE;
    return sda_parser($s,$e,0); //,NULL);
  }

/*
  function sda_parser_wrapper_700($s) {
    $e=FALSE;
    return sda_parser($s,$e,0,"w700j");
  }
*/

  function sda_parser($ip, &$errors, $rss) { //, $css_default) { // $errors must be an array (or NULL)

    global $sda_parser_tags;
    
    // FIXME: email protection, just for now, need a better solution to this:
    $ip=strtr($ip,'@','|');

    if ($errors===FALSE) { $errors=NULL; } // hack. This function ought to use varargs instead.

    /*
    $ip = input string, $optypes = array containing SDA_PARSER_OPMODE_XXXX integers
    determining how many output formats we want (HTML, XML etc). Function returns
    an array of strings with as many elements as $optypes had.

    Parsing any piece of SDA content is a problem that
    will be attacked in two passes:

    pass 1: input will be searched, the tag integrities will be checked.
    If errors are detected, they are recorded in an array of SDACLS_ParserError
    objects to be relayed back to the user (or ignored, depending on context).
    Each tag encountered will cause a SDACLS_ParserStackItem object to be populated.
    If the tag is an open/close type (rather than just [hr], [x 1234] and so on),
    it will be pushed onto a stack so that nested tags can be checked correctly.
    If not, the tag will simply be appended to an array ($eventlist),
    which keeps track of all tags and their locations in the input string
    so that this data can be used to build the output in the second pass.
    Non-open/close tags will just go straight into the array and never touch the stack.

    pass 2: If there are no errors, the data from the array of SDACLS_ParserStackItem
    objects is used to process $ip and create as many $op elements as necessary,
    depending on how we asked the output to be formatted (HTML, syndicated XML, etc).
    Because we know the positions of everything, we ought to be able to get
    extremely high speeds by copying the largest possible pieces from input
    to output -- if we're lucky and PHP has been written in a non-retarded fashion
    this will end up being done using hardware-assisted ASM block copy instructions.
    */

    $stack=array();     // the stack
    $eventlist=array();
    
    $len=strlen($ip);

    $mode=0;
    $tag_id_pos=-1;
    $startmarker=0;

    /*
    the $mode variable is used to keep track of what we're currently
    doing as we walk through the input string.
    possible modes are as follows:

    0: looking for '[', belonging to either an opening tag or a closing tag.
    1: following a '[', looking for either '/' to close a tag (or do nothing ...), or a tag id ('b', 'u', etc).
       This mode is anomalous because it looks ahead and then sets $i to a new value if it manages
       to match the rest of the tag; the tag characters are not processed one by one.
    2: Tag matched; now looking for ']' to close the tag (whereupon the mode returns to 0),
       or alternatively one or more spaces to mark the start of extra data (URLs, unicode escapes etc)
    3: inside extra data and waiting for ']' to close the opening tag (then $mode will be set to 0 as in 2)
    4: just encountered a '[/' sequence and now expect the current character to match whatever
       is on the top of the stack; if it doesn't match then we generate an error. Again,
       this mode uses lookahead and then updates $i, skipping part of the input string.
       There's a slight difference between this mode and 1, namely that this mode includes
       the closing ']' in the match (can't do this with 1 because of extra data possibility),
       so a mode 5 (analogous to mode 2) isn't necessary.

    Here's a quick summary of the modes:
      stuff [a http://url.com] stuff [/a] stuff
      ^      ^^^              ^       ^^ ^
      0      123              0       14 0
    */

    $tag=NULL;
    $stacktag=NULL;
    if ($len>1000) { $prof_d=1000; $prof_s="K"; } else { $prof_d=1; $prof_s="bytes"; }
    $prof_pp1="parser pass 1 [".number_format($len/$prof_d,floor($prof_d/999))." ".$prof_s."]";
    unset($prof_d);
    unset($prof_s);
    if ($len>1000) { sda_prof_begin($prof_pp1); } // only bother if we have >1K of data
    for ($i=0;$i<$len;$i++) {

      switch ($mode) {
        case 0:
          if ($ip[$i]==='[') {
            $startmarker=$i;
            $mode=1;
          }
          break;
        case 1:
          if ($ip[$i]==='/') {
            // possible closing tag
            if (count($stack)) {
              $mode=4;
            }
          } else {
            // possible opening tag
            $otag=sda_parser_match_otag($ip,$i,$len);
            if ($otag!==-1) {
              $tag=$sda_parser_tags[$otag];
              // enforce master tag compliance
              if (isset($tag->master_tags) && count($tag->master_tags)) {
                $mt=$tag->master_tags;
                $match=0;
                foreach ($mt as $key=>$val) {
                  foreach ($stack as $key2=>$val2) {
                    if ($val2->tagnum===$val) {
                      $match=1;
                      break;
                    }
                  }
                  if ($match) {
                    break;
                  }
                }
                if (!$match) {
                  // master tag constraint failed, revert to mode 0
                  // report error
                  if (isset($errors)) {
                    $error=new SDACLS_ParserError;
                    $substr_start=($i>SDA_PARSER_CONTEXTBLOCK_LEN/2)?($i-SDA_PARSER_CONTEXTBLOCK_LEN/2):0;
                    $error->contextblock=substr($ip,$substr_start,SDA_PARSER_CONTEXTBLOCK_LEN); // PHP allows me to be lazy and not truncate the length, yay
                    $error->type=SDA_PARSER_ERROR_MASTERTAG;
                    $data_array=$error->data; // swings and roundabouts, this is not PHP's most attractive feature !
                    $data_array[]="[".$tag->id."]";
                    $x="";
                    $y=0;
                    foreach ($mt as $k=>$v) {
                      if ($y>0) { $x.=", "; }
                      $z=$sda_parser_tags[$v]; // php sucks
                      $x.="[".($z->id)."]";
                      $y++;
                    }
                    $data_array[]=$x;
                    $error->data=$data_array;
                    $errors[]=$error;
                  }
                  $mode=0;
                  break; // break out of switch statement
                }
              }
              $stacktag=new SDACLS_ParserStackItem;
              $stacktag->tagnum=$otag;
              $stacktag->strpos_otag=$startmarker;
              $stacktag->type=1; // opening tag
              $stacktag->len_edata=-1;
              if ($tag->needs_closing) {
                // push to stack
                array_push($stack,$stacktag);
              } else {
                // append to array
                $eventlist[$stacktag->strpos_otag]=$stacktag;
              }
              $mode=2; // look for ']', or extra data
            }
          }
          if ($mode===1) { // no match, revert to mode 0
            $mode = 0;
          }
          break;
        case 2:
          // ok, so now we're looking for either ']', or the spaces preceding extra data
          // (if the tag supports it)
          if ($tag->extra_data) {
            if ($ip[$i]===' ') { // space before extra data
              $mode=3; // switch to extra data parsing mode (looking for ']')
              // update tag object:
              if ($tag->needs_closing) {
                // this is ugly ... dumb PHP
                $tmp=array_pop($stack);
                $tmp->strpos_edata=$i+1; // NB: this can be off the end of the string
                array_push($stack,$tmp);
              } else {
                // this is ugly too !
                $tmp=$eventlist[$startmarker];
                $tmp->strpos_edata=$i+1; // NB: this can be off the end of the string
                $eventlist[$startmarker]=$tmp;
              }
            } else {
              // ' ' not found, discard tag object
              if ($tag->needs_closing) {
                array_pop($stack);
              } else {
                unset($eventlist[$startmarker]);
              }
              // reset everything
              $mode=0;
              $tag=NULL;
              $startmarker=-1;
            }
          } else {
            if ($ip[$i]===']') { // otag is done, return to mode 0
              // update tag object:
              if ($tag->needs_closing) {
                // this is ugly ... dumb PHP
                $tmp=array_pop($stack);
                $tmp->strpos_body=$i+1; // NB: this can be off the end of the string
                array_push($stack,$tmp);
              } else {
                // this is ugly too !
                $tmp=$eventlist[$startmarker];
                $tmp->strpos_body=$i+1; // NB: this can be off the end of the string
                $eventlist[$startmarker]=$tmp;
              }
            } else {
              // discard tag object
              if ($tag->needs_closing) {
                array_pop($stack);
              } else {
                unset($eventlist[$startmarker]);
              }
            }
            $mode=0;
            $tag=NULL;
            $startmarker=-1;
          }
          break;
        case 3:
          if ($ip[$i]===']') {
            // get the tag object from wherever so we can access strpos_edata
            if ($tag->needs_closing) {
              $tmp=array_pop($stack); // remember to put this back later if necessary !
            } else {
              $tmp=$eventlist[$startmarker];
            }
            // compute len_edata
            $len_edata=$i-$tmp->strpos_edata;
            // validate edata
            $ed=trim(substr($ip,$tmp->strpos_edata,$len_edata));
            $x=sda_parser_validate_edata($tmp,$ed);
            if (!$x) {
              // generate error messages
              if (isset($errors)) {
                switch($tmp->tagnum) {
                  case SDA_TAGNUM_A:
                    $error=new SDACLS_ParserError;
                    $substr_start=($i>SDA_PARSER_CONTEXTBLOCK_LEN/2)?($i-SDA_PARSER_CONTEXTBLOCK_LEN/2):0;
                    $error->contextblock=substr($ip,$substr_start,SDA_PARSER_CONTEXTBLOCK_LEN); // PHP allows me to be lazy and not truncate the length, yay
                    $error->type=SDA_PARSER_ERROR_HYPERLINK;
                    $data_array=$error->data; // swings and roundabouts, this is not PHP's most attractive feature !
                    $data_array[]=sda_parser_extract_url_schema($ed,$whocares)."://";
                    $error->data=$data_array;
                    $errors[]=$error;
                    break;
                  case SDA_TAGNUM_X:
                    $error=new SDACLS_ParserError;
                    $substr_start=($i>SDA_PARSER_CONTEXTBLOCK_LEN/2)?($i-SDA_PARSER_CONTEXTBLOCK_LEN/2):0;
                    $error->contextblock=substr($ip,$substr_start,SDA_PARSER_CONTEXTBLOCK_LEN); // PHP allows me to be lazy and not truncate the length, yay
                    $error->type=SDA_PARSER_ERROR_UNICODE;
                    $data_array=$error->data; // swings and roundabouts, this is not PHP's most attractive feature !
                    $data_array[]="[x ".$ed."]";
                    $error->data=$data_array;
                    $errors[]=$error;
                    break;
                }    
              }
              // nuke the tag ... don't need to bother if it's on the stack because we already popped it
              if (!$tag->needs_closing) {
                unset($eventlist[$startmarker]);
              }
            } else if ($tag->needs_closing) {
              // update tag object:
              $tmp->len_edata=$len_edata;
              $tmp->strpos_body=$i+1; // NB: this can be off the end of the string
              array_push($stack,$tmp); // put the popped item back
            } else {
              $tmp->len_edata=$len_edata;
              $tmp->strpos_body=$i+1; // NB: this can be off the end of the string
              $eventlist[$startmarker]=$tmp;
            }
            $mode=0;
            $tag=NULL;
            $startmarker=-1;
          }
          // otherwise just keep going
          break;
        case 4: // looking for closing tag id
          $tmp=array_pop($stack); // pop the top stack item
          $tmp2=$sda_parser_tags[$tmp->tagnum];
          $id=$tmp2->id; // extract the tag id
          if (sda_parser_match_ctag($ip,$i,$len,$id)) {
            // update item
            $tmp->strpos_ctag=$startmarker;
            $tmp->strpos_ptag=$i;
            // transfer item from stack to array
            $eventlist[$tmp->strpos_otag]=$tmp;
            // create second item containing the location of the closing tag
            // (tmp2 is reused)
            $tmp2 = $tmp->cl0ne();
            $tmp2->type=2;
            $eventlist[$tmp->strpos_ctag]=$tmp2;
            // return to mode 0
            $mode=0;
            $tag=NULL;
            $startmarker=-1;
          } else {
            $mode=0;
            array_push($stack,$tmp); // false alarm, push the item back onto the stack
          }
          break;
        default:
          sda_error('sda_parser(): $mode had illegal value (' . $mode . ').');
          break;
      }
    }
    if ($len>1000) { sda_prof_end($prof_pp1); }
//sda_prof_begin("parser middle section");
    if ($mode==3) {
      // if the string terminates during the extra data, just
      // get rid of the tag !
      if ($tag->needs_closing) { // it's on the stack
        array_pop($stack);
      } else { // it's in the list
        array_pop($eventlist); // remove final element
      }
    }

    $hack=1;
    while (count($stack)!==0) {
      // naughty user didn't close all their tags, so we'll fix this now
      $tmp=array_pop($stack); // pop the top stack item
      $tmp->strpos_ctag=$len; // set closing tag position to be at end of string
      $tmp->strpos_ptag=$len; // and ptag too
      $eventlist[$tmp->strpos_otag]=$tmp; // transfer to array
      $tmp2=$tmp->cl0ne();
      $tmp2->type=2;
      $eventlist[$tmp->strpos_ctag+$hack]=$tmp2; // also create close event
      $hack++;
    }

    // done. sort the array, so the events are ordered by input string position
//sda_prof_begin("ksort(eventlist)");
    ksort($eventlist);
//sda_prof_end("ksort(eventlist)");
//sda_prof_end("parser middle section");

    // PASS TWO
    // build the output ...
    //$mode=SDA_PARSER_OPMODE_HTML;
    $enable_op=1;
    $inside_list=$inside_mono=0;
    $x=0;
    $v2=NULL;
    $op="";
    $prof_pp2="parser pass 2 [".count($eventlist)." events]";
    if ($len>1000) { sda_prof_begin($prof_pp2); }
    foreach ($eventlist as $k=>$v) {
      $chunk=substr($ip,$x,$k-$x);
      $edata=NULL;
      if ($v->len_edata!==-1) {
        $edata=trim(substr($ip,$v->strpos_edata,$v->len_edata));
      }
      if ($enable_op) {
        if (!$inside_list && !$inside_mono) {
/*
          $chunk=str_replace("\n","<br>\n",sda_htmlentities($chunk,1)); // 1 as second arg means HTML entity mode (not xtag mode)
          $chunk=str_replace("\r","",$chunk);
*/
          $chunk=sda_htmlentities_2($chunk,SDA_PARSER_CRMODE_BR); //,$css_default);
        }
      }
      if ($v->type===1) { // open
        $x=$v->strpos_body;
      } else { // close
        $x=$v->strpos_ptag+1;
      }
      $op.=$chunk;
      $op.=sda_parser_build_tag_output($v,$enable_op,$edata,$errors,$rss); // enable_op is passed by reference
      $v2=$v; // v is copied here to ensure it stays in scope when the loop terminates (dunno if this is actually necessary)
    }
    // final chunk
    if ($enable_op) { // && $v2->strpos_ptag+1<$len) {
      $chunk=substr($ip,$x,$len-(/*1+*/$x));
      if (/*$mode===SDA_PARSER_OPMODE_HTML && */$inside_list===0) {
        //$chunk=str_replace("\n","<br>\n",htmlentities($chunk));
        //$chunk=str_replace("\r","",$chunk);
        //$chunk = sda_parser_cr_to_br(sda_htmlentities($chunk));
        //$chunk = sda_parser_cr_to_p(sda_htmlentities($chunk),$css_default);
        $chunk=sda_htmlentities_2($chunk,SDA_PARSER_CRMODE_BR); //$css_default);
      }
      $op.=$chunk;
    }
    if ($len>1000) { sda_prof_end($prof_pp2); }

/*
    print "\n\nSTACK:\n";
    while ($tmp=array_pop($stack)) {
      print $tmp->toString()."\n";
    }
    print "\n\nARRAY:\n";
    foreach ($eventlist as $k=>$v) {
      print "ELEMENT " . $k . "\n";
      print $v->toString()."\n";
    }
*/

    //print $op;

    //return "<span class=\"".SDA_CSS_DEFAULT."\">".$op."</span>";
    //return "<p class=\"".(isset($css_default)?$css_default:"just")."\">".$op."</p>";
    //return "<p class=\"".$css_default."\">".$op."</p>";
    return $op;

  }


  function sda_parser_cr_to_br($ip) {
    return str_replace("\r","",str_replace("\n","<br>\n",$ip));
  }
/*
  function sda_parser_cr_to_p($ip,$css_default) {
    return str_replace("\r","",str_replace("\n\n","\n<p class=\"".$css_default."\">",$ip));
  }
*/

/*
  // positions of the tags within the array
  define ("SDA_TAGNUM_NORSS",      0);
  define ("SDA_TAGNUM_TABLE",      1);
  define ("SDA_TAGNUM_TR",         2);
  define ("SDA_TAGNUM_TD",         3);
  define ("SDA_TAGNUM_NB",         4); // <nobr>
  define ("SDA_TAGNUM_HR",         5);
  define ("SDA_TAGNUM_OL",         6);
  define ("SDA_TAGNUM_UL",         7);
  define ("SDA_TAGNUM_LI",         8);
  define ("SDA_TAGNUM_B",          9);
  define ("SDA_TAGNUM_U",         10);
  define ("SDA_TAGNUM_I",         11);
  define ("SDA_TAGNUM_MDASH",     12);
  define ("SDA_TAGNUM_A",         13);
  define ("SDA_TAGNUM_SQBRACKET", 14);
  define ("SDA_TAGNUM_S",         15);
  define ("SDA_TAGNUM_M",         16); // <pre>
  define ("SDA_TAGNUM_H",         17);
*/

  function sda_parser_build_tag_output($tag,&$enable_op,&$edata,&$errors,$rss) {
    $ret="";
    global $sda_linebreak;
    if ($tag->type===1) { // OPENING TAGS
      switch ($tag->tagnum) {
        case SDA_TAGNUM_NORSS:
          if ($rss) { // disable news feed output
            $enable_op=0;
          }
          break;
        case SDA_TAGNUM_TABLE:
          //$ret.=(isset($css_default)?"</span>":"")."<table cellpadding=\"2\" cellspacing=\"2\">";
          $ret.="<table cellpadding=\"2\" cellspacing=\"2\">";
          break;
        case SDA_TAGNUM_TR:
          $ret.="<tr>";
          break;
        case SDA_TAGNUM_TD:
//          $ret.="<td class=\"".(isset($css_default)?$css_default:"")."\"  bgcolor=\"#ffdddd\">";
          $ret.="<td bgcolor=\"#ffdddd\">";
          break;
        case SDA_TAGNUM_NB:  // <nobr>
          $ret.="<span style=\"white-space: nowrap\">";
          break;
        case SDA_TAGNUM_HR:  // hr
          $ret.="<hr>";
          break;
        case SDA_TAGNUM_OL:  // ol
          $ret.="<ol>";
          break;
        case SDA_TAGNUM_UL:  // ul
          $ret.="<ul>";
          break;
        case SDA_TAGNUM_LI:  // li
          $ret.="<li>";
          break;
        case SDA_TAGNUM_B:  // b
          $ret.="<span class=\"".SDA_CSS_BOLD."\">";
          break;
        case SDA_TAGNUM_U:  // u
          $ret.="<span class=\"".SDA_CSS_ULINE."\">";
          break;
        case SDA_TAGNUM_I:  // i
          $ret.="<span class=\"".SDA_CSS_ITAL."\">";
          break;
        case SDA_TAGNUM_MDASH:  // -
          $ret.="&mdash;";
          break;
        case SDA_TAGNUM_A: // a
          $x=sda_parser_validate_hyperlink($edata);
          $ret.=($x===NULL?"":$x);
          break;
        case SDA_TAGNUM_SQBRACKET: // [
          $ret.="[";
          break;
        case SDA_TAGNUM_S: // strike
          $ret.="<span class=\"".SDA_CSS_STRIKE."\">";
          break;
        case SDA_TAGNUM_M: // <pre>
          $ret.="<pre>";
          break;
        case SDA_TAGNUM_H: // <h#>
          $ret.="<span class=\"".SDA_CSS_HEADING."\">";
          break;
      }
    } else { // CLOSING TAGS
      switch ($tag->tagnum) {
        case SDA_TAGNUM_NORSS: // norss
          if ($rss) {
            $enable_op=1;
          }
          break;
        case SDA_TAGNUM_TABLE:
          $ret.="</table>"; //.(isset($css_default)?"<span class=\"".SDA_CSS_DEFAULT."\">":"");
          break;
        case SDA_TAGNUM_OL:  // ol
          $ret.="</ol>";
          break;
        case SDA_TAGNUM_UL:  // ul
          $ret.="</ul>";
          break;
        case SDA_TAGNUM_M:  // pre
          $ret.="</pre>";
          break;
        case SDA_TAGNUM_B:
        case SDA_TAGNUM_U:
        case SDA_TAGNUM_I:
        case SDA_TAGNUM_H:
        case SDA_TAGNUM_S:
        case SDA_TAGNUM_NB:
          $ret.="</span>";
          break;
        case SDA_TAGNUM_A: // a
          $ret.="</a>";
          break;
      }
    }
    return $ret;
  }


  function sda_parser_match_otag(&$ip,&$i,$iplen) {
    // - $ip passed by reference - don't think this is strictly necessary
    //   because PHP uses a copy-on-write strategy, but it can't hurt
    // - we'll be modifying $i, so this *needs* to be passed by reference
    // - passing in $iplen saves recomputing it

    global $sda_parser_tags;

    // cycle through all available tags
    $c = count($sda_parser_tags);
    for ($j=0;$j<$c;$j++) {
      $tag=$sda_parser_tags[$j];
      $id=$tag->id;
      sda_log_debug(9,"inc_parser.php: sda_parser_match_otag(): trying tag " . $j . ", [" . $id . "]");
      if ($i+strlen($id) > $iplen) {
        sda_log_debug(7,"inc_parser.php: sda_parser_match_otag(): ran off end of string");
        // oops ... ran off end of string, try next tag
        continue;
      }
      if (substr($ip,$i,strlen($id)) === $id) {
        // yaaaaa, match
        sda_log_debug(8,"inc_parser.php: sda_parser_match_otag(): matched \"" . $id . "\" at i=" . $i . ",'" . $ip[$i] . "'");
        $i+=(strlen($id)-1);
        sda_log_debug(8,"inc_parser.php: sda_parser_match_otag(): advanced i to " . $i . ",'" . $ip[$i] . "'");
        return $j;
      }
    }
    return -1;
  }

  function sda_parser_match_ctag(&$ip,&$i,$iplen,$id) {
    // $ip passed by reference - don't think this is strictly necessary
    // because PHP uses a copy-on-write strategy, but it can't hurt
    // we'll be modifying $i, so this *needs* to be passed by reference

    global $sda_parser_tags;

    if ($i+strlen($id)+1 > $iplen) {
      sda_log_debug(7,"inc_parser.php: sda_parser_match_ctag(): ran off end of string");
      // oops ... ran off end of string
      return 0;
    }
    // note the difference between this and _otag() ... this one checks for ']', _otag() doesn't because of the extra data possibility
    if (substr($ip,$i,strlen($id)+1) === $id."]") {
      // yaaaaa, match
      sda_log_debug(8,"inc_parser.php: sda_parser_match_ctag(): matched \"" . $id . "\" at i=" . $i . ",'" . $ip[$i] . "'");
      $i+=strlen($id); // this is also different from _otag
      sda_log_debug(8,"inc_parser.php: sda_parser_match_ctag(): advanced i to " . $i . ",'" . $ip[$i] . "'");
      return 1;
    }
    return 0;
  }

  function sda_parser_validate_edata($tag,$edata) {
    sda_log_debug(7,"inc_parser.php: sda_parser_validate_edata(): edata=\"" . $edata . "\".");
    sda_log_debug(7,"inc_parser.php: sda_parser_validate_edata(): int edata=\"" . (intval($edata,16)) . "\".");
    switch ($tag->tagnum) {
      case SDA_TAGNUM_A:
        return (sda_parser_validate_hyperlink($edata) !== NULL);
        break;
    }
    return 0;
  }

  // ****
  // URLS
  // ****

  function sda_parser_extract_url_schema($s,&$e) { // e>1 means error
    $boom=explode("://",$s);
    return (($e=count($boom))!==2)?NULL:$boom[0];
  }

  function sda_parser_validate_hyperlink($s) {
    $url_schema=sda_parser_extract_url_schema($s,$e);
    if ($e>2) { return NULL; }
    if ($url_schema !== NULL) {
      // hey phpbb guys, ever heard of whitelisting ?
      if ($url_schema !== "http" && $url_schema !== "ftp" && $url_schema !== "https") {
        return NULL;
      }
    }
    $purified_url = grenola_url_purify((($url_schema===NULL)?"http://":"").$s);
    return "<a href=\"" . $purified_url . "\">";
  }

  function grenola_url_purify ($url) {
    sda_log_debug(7,"inc_parser.php: grenola_url_purify(): \"" . $url . "\".");
    $exploded = explode ("://", $url);
    $ret = $exploded[0] . "://";
    for ($i=1;$i<count($exploded);$i++) {
      $ret .= grenola_url_segment_escape ($exploded[$i]);
    }
    return $ret;
  }

  // *******
  // UNICODE
  // *******

  function sda_parser_encode_utf8($x) {
    $xi=intval($x,16); // x is a string, convert to an int
    if ($xi<0x7f) {
      return chr($xi);
    } else if ($xi<0x7ff) {
      return chr(0xc0|(($xi&0x7c0)>>6)).chr(0x80|($xi&0x3f));
    } else {
      return chr(0xe0|(($xi&0xf000)>>12)).chr(0x80|(($xi&0xfc0)>>6)).chr(0x80|($xi&0x3f));
    }
  }

  define("SDA_UTF8DEC_MODE_ASCII", 1);
  define("SDA_UTF8DEC_MODE_110",   2);
  define("SDA_UTF8DEC_MODE_1110",  3);

  // these three following functions form the core ability to take markup
  // from the database and display it in forms etc. *as markup*, and conversely
  // to take markup from forms and prepare it for database insertion.

  define("SDA_PARSER_CRMODE_UNCHANGED", 1);
  define("SDA_PARSER_CRMODE_BR", 2);
  define("SDA_PARSER_CRMODE_P", 3);

  function sda_xtag_to_unicode($s) {
    // decode [x 1234]-style unicode escapes to native UTF-8.
    // use on submitted forms etc.
    $l=strlen($s)-7;
    $start=0;
    $op="";
    for ($i=0;$i<$l;$i++) {
      if ($s[$i]=='[' && $s[$i+1]=='x' && $s[$i+2]==' ' && $s[$i+7]==']' && ctype_xdigit(substr($s,$i+3,4))) {
        // copy largest possible block from src to dest:
        $op.=substr($s,$start,$i-$start);
        // tack on the unicode sequence:
        $op.=sda_hexcode_to_unicode(substr($s,$i+3,4));
        // set start to after the [x 1234] string:
        $start=$i+8;
      }
    }
    // almost all done, copy the final piece:
    $op.=substr($s,$start);
    return $op;
  }
  
  function sda_htmlentities($s) {
    return sda_htmlentities_2($s,SDA_PARSER_CRMODE_UNCHANGED,array());
  }

  function sda_htmlentities_2($s,$cr_mode) {
    // replacement for the builtin function that is UTF-8 capable.
    // all UTF-8 characters, as well as <, >, &, " and friends
    // will be replaced by entity equivalents.
    //return sda_decode_utf8(htmlspecialchars($s),1);
    switch ($cr_mode) {
      case SDA_PARSER_CRMODE_UNCHANGED:
        // original mode
        return sda_decode_utf8(htmlspecialchars($s,ENT_COMPAT,"UTF-8"),1);
      case SDA_PARSER_CRMODE_P:
        $p="\n<p>";
        return sda_decode_utf8(strtr(htmlspecialchars($s,ENT_COMPAT,"UTF-8"),array("\r"=>"","\n\n"=>$p,"\n"=>"<br>")),1);
      case SDA_PARSER_CRMODE_BR:
        return sda_decode_utf8(strtr(htmlspecialchars($s,ENT_COMPAT,"UTF-8"),array("\r"=>"","\n"=>"<br>")),1);
    }
  }
  
  function sda_decode_utf8 ($s,$html_mode) {

    // Convert UTF-8 stuff to one of two escaped modes.

    // HTML mode decides whether unicode characters should be converted to HTML entities
    // (1) or to xtags(0).

    // this is a port of GLUE ...

    $op="";

    $m=SDA_UTF8DEC_MODE_ASCII;
    $bc=0;
    $tc=0;

    $fmtstg=$html_mode?"&#x%02x%02x;":"[x %02x%02x]";

    $len=strlen($s);
    for ($j=0;$j<$len;$j++) {
      $i=ord($s[$j]);
      if ($bc===0) {
        $buffer=array();
        if (!($i&0x80)) {
          $m = SDA_UTF8DEC_MODE_ASCII;
        } else if (($i&0xe0)===0xc0) {
          $m = SDA_UTF8DEC_MODE_110;
        } else if (($i&0xf0)===0xe0) {
          $m = SDA_UTF8DEC_MODE_1110;
        } else if (($i&0xf8)===0xf0) {
          // we only support the 16-bit unicode plane, so this is an error
          sda_log_warn("inc_parser.php(): sda_parser_encode_utf8(): UTF-8 decode error [1]");
          return NULL;
        } else {
          // decode error
          sda_log_warn("inc_parser.php(): sda_parser_encode_utf8(): UTF-8 decode error [2]");
          return NULL;
        }
      } else if (($i&0xc0) !== 0x80) {
        // decode error
        sda_log_warn("inc_parser.php(): sda_parser_encode_utf8(): UTF-8 decode error [3]");
        return NULL;
      }

      switch ($m) {
        case SDA_UTF8DEC_MODE_ASCII:
          $op.=chr($i);
          break;
        case SDA_UTF8DEC_MODE_110:
          switch ($bc) {
            case 0:
              $buffer[0]=($i&0x1c)>>2;
              $buffer[1]=($i&0x03)<<6;
              $bc++;
              break;
            case 1:
              $buffer[1]|=($i&0x3f);
//              $op.=sprintf ("[x %02x%02x]",$buffer[0]&0xff,$buffer[1]&0xff);
              $op.=sprintf ($fmtstg,$buffer[0]&0xff,$buffer[1]&0xff);
              $bc=0;
              $m=SDA_UTF8DEC_MODE_ASCII;
              break;
            default:
              // decode error
              sda_log_warn("inc_parser.php(): sda_parser_encode_utf8(): UTF-8 decode error [4]");
              return NULL;
              break;
          }
          break;
        case SDA_UTF8DEC_MODE_1110:
          switch ($bc) {
            case 0:
              $buffer[0]=($i&0x0f)<<4;
              $bc++;
              break;
            case 1:
              $buffer[0]|=($i&0x3c)>>2;
              $buffer[1]=($i&0x03)<<6;
              $bc++;
              break;
            case 2:
              $buffer[1]|=($i&0x3f);
              //$op.=sprintf ("[x %02x%02x]",$buffer[0]&0xff,$buffer[1]&0xff);
              $op.=sprintf ($fmtstg,$buffer[0]&0xff,$buffer[1]&0xff);
              $bc=0;
              $m=SDA_UTF8DEC_MODE_ASCII;
              break;
            default:
              // decode error
              sda_log_warn("inc_parser.php: sda_parser_encode_utf8(): UTF-8 decode error [5]");
              return NULL;
              break;
          }
          break;
      }
      $tc++;
    }
    return $op;
  }


  function sda_hexcode_to_unicode($s) {
    $char="";
    sscanf($s,"%04x",$char);
    $b0=$char&0xff;
    $b1=(($char&0xff00)>>8)&0xff;
    if ($char<=0x7f) {
      return chr($char);
    } else if ($char<=0x800) {
      return chr(0xc0|(($b1<<2)|(($b0>>6)&0x3))).chr(0x80|($b0&0x3f));
    } else {
      return chr((($b1>>4)&0x0f)|0xe0).chr(((($b1&0xf)<<2)|(($b0>>6)&0x3))|0x80).chr(($b0&0x3f)|0x80);
    }
  }

  function sda_log_debug ($a, $b) { }
  function sda_log_warn ($a) { }
  function sda_error ($e) { sda_die($e); }


?>
