//@copyright_begin
// ================================================================
// Copyright Notice
// Copyright (C) 1998-2004 by Joe Linoff
// 
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
// 
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL JOE LINOFF BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// 
// Comments and suggestions are always welcome.
// Please report bugs to http://ccdoc.sourceforge.net/ccdoc
// ================================================================
//@copyright_end
#include "log.h"
#include "phase1_scanner.h"
#include <cstdio>

// ================================================================
// This variable allows the header version
// to be queried at runtime.
// ================================================================
namespace {
  char ccdoc_rcsid[] = "$Id: phase1_scanner.cc,v 1.11 2004/09/30 04:16:07 jlinoff Exp $";
}

// ================================================================
// Constructor.
// ================================================================
ccdoc::phase1::scanner::scanner(switches& sw)
  : m_lineno(0),
    m_sw(sw),
    m_debug(false)
{
}
// ================================================================
// Destructor.
// ================================================================
ccdoc::phase1::scanner::~scanner()
{
}
// ================================================================
// Open
// ================================================================
void ccdoc::phase1::scanner::open(const string& name)
{
  m_name = name;
  m_lineno = 1;
  m_put_tokens.clear();
  m_put_chars.clear();
  if( m_is.is_open() )
    m_is.close();
  m_is.open( m_name.c_str() );
}
// ================================================================
// Close
// ================================================================
void ccdoc::phase1::scanner::close()
{
  if( m_is.is_open() )
    m_is.close();
}
// ================================================================
// Get character.
// ================================================================
char ccdoc::phase1::scanner::get_char()
{
  char ch;
  if( m_put_chars.size() ) {
    ch = m_put_chars.back();
    m_put_chars.pop_back();
  }
  else if( m_is.eof() || m_is.bad() || m_is.fail() ) {
    return 0;
  }
  else if(!m_is.get(ch)) {
    return 0;
  }
  // Ignore carriage returns under Windows.
  if( '\r' == ch )
    return get_char();
  if( '\n' == ch )
    ++m_lineno;
  return ch;
}
// ================================================================
// Put character. Use a vector so that characters can
// be put even when the end of the stream is reached.
// ================================================================
void ccdoc::phase1::scanner::put_char(char ch)
{
  if(ch) {
    m_put_chars.push_back(ch);
    // Issue 0056: m_line>0
    if( '\n' == ch && m_lineno>0 )
      --m_lineno;
  }
}
// ================================================================
// Put token.
// ================================================================
void ccdoc::phase1::scanner::put_token(const string& token)
{
  m_put_tokens.push_back(token);
  if(m_debug) {
    if( token == "\n" )
      s_log << "CCDOC_PHASE1_DEBUG: put_line: " << m_lineno << "\n";
    else if( token == "" )
      s_log << "CCDOC_PHASE1_DEBUG: put_eof: " << m_lineno << "\n";
    else
      s_log << "CCDOC_PHASE1_DEBUG: put_token: '" << token << "'\n";
  }
}
// ================================================================
// Get token.
// ================================================================
const char* ccdoc::phase1::scanner::get_token()
{
  const char* token = scan_token();
  if(m_debug) {
    if( *token == '\n' )
      s_log << "CCDOC_PHASE1_DEBUG: get_line: " << m_lineno << "\n";
    else if( *token == 0 )
      s_log << "CCDOC_PHASE1_DEBUG: get_eof: " << m_lineno << "\n";
    else
      s_log << "CCDOC_PHASE1_DEBUG: get_token: '" << token << "'\n";
  }
  return token;
}
// ================================================================
// Scan token.
// ================================================================
const char* ccdoc::phase1::scanner::scan_token()
{
  static char tokenbuf[0x100000]; // 2^20
  *tokenbuf = 0;

  // ================================================
  // A token was put back, return that one.
  // ================================================
  if( m_put_tokens.size() ) {
    ::strcpy(tokenbuf,m_put_tokens.back().c_str());
    m_put_tokens.pop_back();
    return tokenbuf;
  }

  char ch = skip_ws();

  // ================================================
  // If this the end of the file, return
  // an empty token.
  // ================================================
  if(!ch) {
    return tokenbuf;
  }

  // ================================================
  // Translate trigraph sequences.
  // This must be done early because they can
  // be part of another token.
  // ================================================
  if( '?' == ch ) {
    ch = scan_trigraph();
    if( '?' == ch ) {
      tokenbuf[0] = '?';
      tokenbuf[1] = 0;
      return tokenbuf;
    }
  }

  // ================================================
  // Eliminate the "\\\n" white space. This cannot
  // be done in skip_ws() because of the backslash
  // trigraph sequence "??/".
  // ================================================
  if( '\\' == ch ) {
    char ch1 = get_char();
    if( '\n' == ch1 ) { // issue 0124
      return get_token();
    }
    put_char(ch1);
  }

  // ================================================
  // Check for character and string literals with
  // the 'L' prefix.
  // ================================================
  if( ch == 'L' ) {
    char ch1 = get_char();
    if( '"' == ch1 ) {
      tokenbuf[0] = ch;
      tokenbuf[1] = ch1;
      tokenbuf[2] = 0;
      get_string_literal(&tokenbuf[2],sizeof(tokenbuf)-3);
      return tokenbuf;
    }
    if( '\'' == ch1 ) {
      tokenbuf[0] = ch;
      tokenbuf[1] = ch1;
      tokenbuf[2] = 0;
      get_char_literal(&tokenbuf[2],sizeof(tokenbuf)-3);
      return tokenbuf;
    }
    put_char(ch1);
  }

  // ================================================
  // This is a quoted string. Look for the next
  // un-escaped quote.
  // ================================================
  if( ch == '"' ) {
    tokenbuf[0] = ch;
    tokenbuf[1] = 0;
    get_string_literal(&tokenbuf[1],sizeof(tokenbuf)-2);
    return tokenbuf;
  }

  // ================================================
  // This is a single quoted character. Look for the
  // next un-escaped quote.
  // ================================================
  if( '\'' == ch ) {
    tokenbuf[0] = ch;
    tokenbuf[1] = 0;
    get_char_literal(&tokenbuf[1],sizeof(tokenbuf)-2);
    return tokenbuf;
  }

  // ================================================
  // This is some sort of number.
  // The special case of ".333" is handled in the '.'
  // processing.
  // ================================================
  if( '0' <= ch && ch <= '9' ) {
    put_char(ch);
    get_number_literal(tokenbuf,sizeof(tokenbuf)-1);
    return tokenbuf;
  }

  // ================================================
  // Convert alternative token forms to their
  // primary form. This is handled for each
  // separate character analysis.
  //
  //  alt     pri     alt     pri     alt     pri
  //  ======  ===     ======  ===     ======  ===
  //  <%      {       and     &&      and_eq  &=
  //  %>      }       bitor   |       or_eq   |=
  //  <:      [       or      ||      xor_eq  ^=
  //  :>      ]       xor     ^       not     !
  //  %:      #       compl   ~       not_eq  !=
  //  %:%:    ##      bitand  &
  //
  // We don't care about the identifier based
  // alternative forms.
  // ================================================

  // ================================================
  // This is an identifier.
  // Ccdoc must support the non-standard $ character
  // because some compilers support it.
  // ================================================
  if( ( 'a' <= ch && ch <= 'z' ) ||
      ( 'A' <= ch && ch <= 'Z' ) ||
      ( '_' == ch || '$' == ch ) ) {
    put_char(ch);
    get_identifier(tokenbuf,sizeof(tokenbuf)-2);
    return tokenbuf;
  }

  // ================================================
  // This is a operator/punctuator.
  // Do a longest match.
  // ================================================
  if( '{' == ch || '}' == ch ||
      '[' == ch || ']' == ch ||
      '(' == ch || ')' == ch ||
      '~' == ch ||
      ',' == ch ||
      ';' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: !, !=
  // ================================================
  if( '!' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '=' == ch ) {
      *pstr++ = ch;
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: #, ##
  // ================================================
  if( '#' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '#' == ch ) {
      *pstr++ = ch;
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: %, %:, %:%:, %=, %>
  // ================================================
  if( '%' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '=' == ch ) {
      *pstr++ = ch;
    }
    else if( '>' == ch ) {
      // %> --> }
      pstr = tokenbuf;
      *pstr++ = '}';
      *pstr++ = 0;
      return tokenbuf;
    }
    else if( ':' == ch ) {
      ch = get_char();
      if( '%' == ch ) {
	ch = get_char();
	if( ':' == ch ) {
	  // %:%: --> ##
	  pstr = tokenbuf;
	  *pstr++ = '#';
	  *pstr++ = '#';
	  *pstr++ = 0;
	  return tokenbuf;
	}
	else {
	  put_char('%');
	  put_char(ch);
	}
      }
      else {
	// %: --> #
	pstr = tokenbuf;
	*pstr++ = '#';
	*pstr++ = 0;
	put_char(ch);
	return tokenbuf;
      }
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: &, &&, &=
  // ================================================
  if( '&' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '&' == ch || '=' == ch ) {
      *pstr++ = ch;
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: *, *=
  // ================================================
  if( '*' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '=' == ch ) {
      *pstr++ = ch;
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: +, ++, +=
  // ================================================
  if( '+' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '+' == ch || '=' == ch ) {
      *pstr++ = ch;
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: -, --, -=, ->, ->*
  // ================================================
  if( '-' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '-' == ch || '=' == ch ) {
      *pstr++ = ch;
    }
    else if( '>' == ch ) {
      *pstr++ = ch;
      ch = get_char();
      if( '*' == ch ) {
	*pstr++ = ch;
      }
      else {
	put_char(ch);
      }
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: ., .*, ..., .[0-9]+
  // ================================================
  if( '.' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '*' == ch ) {
      *pstr++ = ch;
    }
    else if( '0' <= ch && ch <= '9' ) {
      // Handle floating number of the form: .333
      put_char(ch);
      put_char('.');
      get_number_literal(tokenbuf,sizeof(tokenbuf)-1);
      return tokenbuf;
    }
    else if( '.' == ch ) {
      ch = get_char();
      if( '.' == ch ) {
	*pstr++ = '.';
	*pstr++ = '.';
      }
      else {
	put_char(ch);
	put_char('.');
      }
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: /, /=, //, /*
  // ================================================
  if( '/' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '=' == ch ) {
      *pstr++ = ch;
      *pstr = 0;
      return tokenbuf;
    }
    else if( '/' == ch ) {
      // Found '//'
      // Check for '//@{' or a '// @{' comment.
      return scan_ccdoc_style2(tokenbuf,sizeof(tokenbuf)-1);
    }
    else if( '*' == ch ) {
      // Found '/*'
      // Check for a '/**' comment.
      return scan_ccdoc_style1(tokenbuf,sizeof(tokenbuf)-1);
    }
    else {
      put_char(ch); // issue 0069
      *pstr = 0;
      return tokenbuf;
    }
  }

  // ================================================
  // OP: :, ::, :>
  // ================================================
  if( ':' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( ':' == ch ) {
      *pstr++ = ch;
    }
    else if( '>' == ch ) {
      // :> --> ]
      pstr = tokenbuf;
      *pstr++ = ']';
      *pstr++ = 0;
      return tokenbuf;
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: <, <%, <:, <<, <<=, <=
  // ================================================
  if( '<' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '=' == ch ) {
      *pstr++ = ch;
    }
    else if( '%' == ch ) {
      // <% --> {
      pstr = tokenbuf;
      *pstr++ = '{';
      *pstr++ = 0;
      return tokenbuf;
    }
    else if( ':' == ch ) {
      // <: --> ]
      pstr = tokenbuf;
      *pstr++ = '[';
      *pstr++ = 0;
      return tokenbuf;
    }
    else if( '<' == ch ) {
      *pstr++ = ch;
      ch = get_char();
      if( '=' == ch ) {
	*pstr++ = ch;
      }
      else {
	put_char(ch);
      }
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: =, ==
  // ================================================
  if( '=' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '=' == ch ) {
      *pstr++ = ch;
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: >, >=, >>, >>=
  // ================================================
  if( '>' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '=' == ch ) {
      *pstr++ = ch;
    }
    else if( '>' == ch ) {
      *pstr++ = ch;
      ch = get_char();
      if( '=' == ch ) {
	*pstr++ = ch;
      }
      else {
	put_char(ch);
      }
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: ^, ^=
  // ================================================
  if( '^' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '=' == ch ) {
      *pstr++ = ch;
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // ================================================
  // OP: |, |=, ||
  // ================================================
  if( '|' == ch ) {
    char* pstr = tokenbuf;
    *pstr++ = ch;
    ch = get_char();
    if( '=' == ch || '|' == ch ) {
      *pstr++ = ch;
    }
    else {
      put_char(ch);
    }
    *pstr = 0;
    return tokenbuf;
  }

  // Catch all at the end.
  tokenbuf[0] = ch;
  tokenbuf[1] = 0;
  return tokenbuf;
}
// ================================================================
// Skip w/s. Note that new lines are not w/s because they
// are significant for pre-processing directives.
// ================================================================
char ccdoc::phase1::scanner::skip_ws()
{
  // Ignore white space, keep new lines
  // because they are used for pre-processing
  // analysis.
  char ch = get_char();
  while( (ch > 0 && ch <= ' ') && ch != 0 && ch != '\n' ) {
    ch = get_char();
  }
  return ch;
}
// ================================================================
// scan trigraph sequences.
// ================================================================
char ccdoc::phase1::scanner::scan_trigraph()
{
  // ================================================
  // Check for tri-graph sequences.
  // The standard states that these sequences are
  // converted first. I interpret this to mean all
  // sequences except strings and comments.
  // OP: ?, ??=, ??/, ??', ??(, ??), ??!, ??<, ??>, ??-
  // ================================================
  char ch1 = get_char();
  if( ch1 != '?' ) {
    put_char(ch1);
    return '?';
  }
  char ch2 = get_char();

  switch(ch2) {
  case '=':  ch2 = '#' ; break;
  case '/':  ch2 = '\\'; break;
  case '\'': ch2 = '^' ; break;
  case '(':  ch2 = '[' ; break;
  case ')':  ch2 = ']' ; break;
  case '!':  ch2 = '|' ; break;
  case '<':  ch2 = '{' ; break;
  case '>':  ch2 = '}' ; break;
  case '-':  ch2 = '~' ; break;
  default:
    put_char(ch2);
    put_char(ch1);
  }

  return ch2;
}
// ================================================================
// get_string_literal
// ================================================================
void ccdoc::phase1::scanner::get_string_literal(char* token,int max)
{
  int ch = '"';
  int pch = ch;
  while( max>0 && (ch = get_char()) ) {
    max--;
    *token++ = ch;
    if( ch == '"' && pch != '\\' ) {
      *token = 0;
      return;
    }
    // Issue 0116
    //   Contributed by Chris Martin 2001/11/25
    if( ch == '\\' && pch == '\\' ) {
      // Reset pch to 0 to make sure that
      // the terminating double quote is
      // found correctly for the case of "\\".
      ch = 0;
    }
    pch = ch;
  }
  *token = 0;

  // The end of the string was never reached.
  // We should never reach this point because all input should
  // be from a legal C++ file.
  s_log.warning()
    << "Unterminated string literal found in "
    << m_name.c_str()
    << " at line "
    << m_lineno
    << ".\n"
    << s_log.enable();
}
// ================================================================
// get_char_literal
// ================================================================
void ccdoc::phase1::scanner::get_char_literal(char* token,int max)
{
  // Issue 0052: Handle the special case of '\\'.
  int ch = get_char(); --max; *token++ = ch;
  if( '\\' == ch ) {
    ch = get_char(); --max; *token++ = ch;
  }
  ch = get_char(); --max; *token++ = ch;
  // Also handle the special cases of '\xad'.
  while( ch != '\'' ) {
    ch = get_char(); --max; *token++ = ch;
    if( max <= 2 ) {
      *token = 0;
      // The end of the character was never reached.
      // We should never reach this point because all input should
      // be from a legal C++ file.
      s_log.warning()
	<< "Unterminated character literal found in "
	<< m_name.c_str()
	<< " at line "
	<< m_lineno
	<< ".\n"
	<< s_log.enable();
      return;
    }
  }
  *token = 0;
}
// ================================================================
// get_number_literal
// ================================================================
void ccdoc::phase1::scanner::get_number_literal(char* token,int max)
{
  // Some of the numeric processing is confusing because
  // the types overlap. Consider the following
  // number:
  //
  //   07777.35
  //
  // This is a floating pointer number not an ill-formed
  // octal integer literal.
  //
  // This analyzer takes advantage of the fact that the
  // input is guaranteed to be legal C++.
  char ch = get_char();

  if( '0' == ch ) {
    *token++ = ch;
    max--;
    char ch1 = get_char();
    if( 'x' == ch1 || 'X' == ch1 ) {
      // This is a hex number.
      *token++ = ch1;
      max--;
      while( max>0 && (ch = get_char()) ) {
	if( ( 'a' <= ch && ch <= 'f' ) ||
	    ( 'A' <= ch && ch <= 'Z' ) ||
	    ( '0' <= ch && ch <= '9' ) ) {
	  *token++ = ch;
	  max--;
	}
	else {
	  break;
	}
      }
    }
    else if( '0' <= ch1 && ch1 <= '9' ) {
      // This is octal or floating point.
      *token++ = ch1;
      max--;
      while( max>0 && (ch = get_char()) ) {
	if( '0' <= ch && ch <= '9' ) {
	  *token++ = ch;
	  max--;
	}
	else {
	  break;
	}
      }
    }
    else {
      ch = ch1;
    }
  }
  else if( '1' <= ch && ch <= '9' ) {
    *token++ = ch;
    max--;
    // This is decimal or floating point.
    while( max>0 && (ch = get_char()) ) {
      if( '0' <= ch && ch <= '9' ) {
	*token++ = ch;
	max--;
      }
      else {
	break;
      }
    }
  }

  // At this point, ch is not a digit. Check
  // for a floating point decimal point, an
  // exponent or the UL suffix.

  // Scan for a floating point literal decimal point.
  if( '.' == ch ) {
    *token++ = ch;
    max--;
    while( max>0 && (ch = get_char()) ) {
      if( '0' <= ch && ch <= '9' ) {
	*token++ = ch;
	max--;
      }
      else {
	break;
      }
    }
  }

  // Scan for a floating point literal exponent.
  if( 'E' == ch || 'e' == ch ) {
    *token++ = ch;
    max--;
    if( max>0 && (ch = get_char()) ) {
      // Get the sign of the exponent or a digit if
      // the sign was not specified.
      if( ( '0' <= ch && ch <= '9' ) ||
	  ( '-' == ch || '+' == ch ) ) {
	*token++ = ch;
	max--;

	// Get the rest of the exponent digits.
	while( max>0 && (ch = get_char()) ) {
	  if( '0' <= ch && ch <= '9' ) {
	    *token++ = ch;
	    max--;
	  }
	  else {
	    break;
	  }
	}
      }
    }
  }

  // Scan for the ULF suffix.
  if( 'U' == ch || 'u' == ch ||
      'L' == ch || 'l' == ch ||
      'F' == ch || 'f' == ch) {
    *token++ = ch;
    max--;
    while( max>0 && (ch = get_char()) ) {
      if( 'U' == ch || 'u' == ch ||
	  'L' == ch || 'l' == ch ||
	  'F' == ch || 'f' == ch ) {
	*token++ = ch;
	max--;
      }
      else {
	break;
      }
    }
  }
  put_char(ch);
  *token = 0;
}
// ================================================================
// get_identifier
// ================================================================
void ccdoc::phase1::scanner::get_identifier(char* token,int max)
{
  char ch;
  while( max>0 && (ch = get_char()) ) {
    max--;
    if( ( 'a' <= ch && ch <= 'z' ) ||
	( 'A' <= ch && ch <= 'Z' ) ||
	( '0' <= ch && ch <= '9' ) ||
	( '_' == ch || ch == '$' ) ) {
      *token++ = ch;
    }
    else {
      // Non-identifier token found, we are done.
      *token = 0;
      put_char(ch);
      return;
    }
  }

  // We reached the end of the file.
  // This is legal for things like:
  // #endif
  *token = 0;
}
// ================================================================
// scan ccdoc style1
// ================================================================
const char* ccdoc::phase1::scanner::scan_ccdoc_style1(char* token,
						      int max)
{
  bool suffix_flag = false;

  char ch = get_char();
  if( '*' != ch ) {
    if( ch == '!' ) {
      // Issue 0162:
      // This may be a doxygen style comment of the form:
      //   int a; /*!< ... */
      ch = get_char();
      if( ch == '<' ) {
        suffix_flag = true;
      }
      else {
        // This could be '*'.
        put_char(ch);
      }
    }
    if( !suffix_flag ) {
      // This is not a ccdoc comment,
      // skip to the end of the comment.
      while( ch != 0 ) {
        char pch = ch;
        ch = get_char();
        if( '*' == pch && '/' == ch )
          break;
      }
      return scan_token();
    }
  }

  // Issue 0162:
  //   Check for '/**<'. This is a doxygen style suffix comment.
  ch = get_char();
  if( ch == '<' ) {
    suffix_flag = true;
  }
  else {
    put_char(ch);
  }

  // Found: '/**'
  // ================================================
  // Check for the special pre-processing directives:
  // ================================================
  if( scan_ccdoc_style1_special() )
    return scan_token();

  // Re-format the comment for consumption by the parser.
  // Here are the comment fields.
  scanner_doc doc(*this,m_sw);

  // Define the processing mode.
  static char line[65536]; // maximum line length
  bool first = true;
  bool done = false;
  bool ignore_flag = false;
  while(!done) {
    // ================================================
    // Skip leading w/s
    // ================================================
    char ch = get_char();
    while(ch && ch != '\n' && (ch > 0 && ch <= ' ') ) {
      ch = get_char();
    }

    // ================================================
    // Skip the leading asterisk if it exists.
    // ================================================
    if( !first && '*' == ch ) {
      ch = get_char();
      if( '/' == ch ) {
	done = true;
	break;
      }
    }

    // ================================================
    // Now load the line for directive processing.
    // Don't trim w/s, it may be needed for <pre></pre>.
    // ================================================
    char* pline = line;
    while( ch && ch != '\n' ) {
      *pline++ = ch;
      char pch = ch;
      ch = get_char();
      if( '*' == pch && ch == '/' ) {
	// This is the end of the comment.
	//
	// Make sure that any preceding directives
	// are processed, such as:
	//   '/**'
	//   ' * @return Foo bar spam */'
	//
	// Also make sure that all trailing asterisks are eaten:
	//  '/**'
	//  ' **/'
	//
	pline--; // *pline == '*'
	ccdoc_assert( '*' == *pline );
	while( pline>line && '*' == *pline )
	  --pline;
	if( pline == line ) {
	  // We are done.
	  done = true;
	  break;
	}
	// There may be some stuff on this line that
	// we need to parse.
	pline++;
	put_char('\n');
	put_char('/');
	put_char('*');
	break;
      }
    }
    *pline = 0;

    // ================================================
    // EOF
    // ================================================
    if( !ch ) {
      // The end of the file was reached before the
      // comment was terminated.
      s_log.warning()
	<< "Unexpected EOF found, unterminated ccdoc comment "
	<< "specified at line "
	<< m_lineno << " in " << m_name.c_str()
	<< ".\n"
	<< s_log.enable();
      return scan_token();
    }

    // ================================================
    // Eliminate special tokens from the comment line.
    // ================================================
    if( m_sw.doxygen() ) {
      if( !ignore_flag ) {
        if( contains_token(line,"@file") ) {
          ignore_flag = true;
        }
      }
    }

    // ================================================
    // At this point we have the line.
    // Terminate it and write it out in debug mode.
    // ================================================
    if( m_debug ) {
      s_log << "CCDOC_PHASE1_DEBUG: ccdoc_line: '" << line << "'\n";
    }

    // ================================================
    // Skip the first line, if it is empty to
    // avoid conflicts when trying to determine
    // the long description for the following case:
    //   /**            | <-- line 1 (blank - ignore)
    //    * short       | <-- line 2 (short description)
    //    *             | <-- line 3 (blank - separator)
    //    * long        | <-- line 4 (long description)
    //    */            | <-- line 5 (end of comment)
    // ================================================
    if(first) {
      first = false;
      if(*line == 0)
	continue;
    }

    // ================================================
    // Issue 0082: only do this if -nojdsds is specified.
    // Set the short description flag to false
    // if a blank line (other than the first one)
    // is encountered.
    // ================================================
    if(!m_sw.jdsds() && *line == 0 && doc.m_mode == scanner_doc::SHORT) {
      doc.m_mode = scanner_doc::LONG;
      continue;
    }

    doc.parse_line(line);
  }

  // Issue 0162:
  if( suffix_flag )
    doc.m_comment.add_suffix(true);

  // ================================================
  // At this point we have a valid ccdoc comment.
  // Format it for the parser to make things easy.
  // If it is empty, ignore it.
  // ================================================
  if( doc.empty() || ignore_flag )
    return scan_token();
  return doc.format(token,max);
}
// ================================================================
// scan ccdoc style1 special
//   /**@#-*\/       - Start ignoring characters
//   /**@#+*\/       - Stop ignoring characters.
//   /**@#=<char>*\/ - Insert the specified token
//                     into the input stream.
// ================================================================
bool ccdoc::phase1::scanner::scan_ccdoc_style1_special()
{
  char ch = get_char();
  if( '@' != ch ) {
    put_char(ch);
    return false;
  }
  ch = get_char();
  if( '#' != ch ) {
    put_char(ch);
    put_char('@');
    return false;
  }
  ch = get_char();
  if( '-' != ch && '+' != ch && '=' != ch ) {
    put_char(ch);
    put_char('#');
    put_char('@');
    return false;
  }
  if( '-' == ch || '+' == ch ) {
    char ch1 = get_char();
    if( '*' != ch1 ) {
      put_char(ch1);
      put_char(ch);
      put_char('#');
      put_char('@');
      return false;
    }
    ch1 = get_char();
    if( '/' != ch1 ) {
      put_char(ch1);
      put_char('*');
      put_char(ch);
      put_char('#');
      put_char('@');
      return false;
    }
    if( '-' == ch ) {
      // We found: "/**@#-*\/", Ignore all characters
      // until we find "/**@#+*\/".
      for(ch=get_char();ch;ch=get_char()) {
	if( '/' != ch ) continue;
	// The put char handles cases like this:
	//   "/**@/**@#+*\/"
	ch = get_char(); if(!ch) break;
	if( '*' != ch ) {put_char(ch);continue;}
	ch = get_char(); if(!ch) break;
	if( '*' != ch ) {put_char(ch);continue;}
	ch = get_char(); if(!ch) break;
	if( '@' != ch ) {put_char(ch);continue;}
	ch = get_char(); if(!ch) break;
	if( '#' != ch ) {put_char(ch);continue;}
	ch = get_char(); if(!ch) break;
	if( '+' != ch ) {put_char(ch);continue;}
	ch = get_char(); if(!ch) break;
	if( '*' != ch ) {put_char(ch);continue;}
	ch = get_char(); if(!ch) break;
	if( '/' != ch ) {put_char(ch);continue;}
	break;
	// Don't warn about nested "/**@#-*\/", they are
	// legal.
      }
      // Don't warn about the EOF, that is legal as well.
    }
    // Ignore "/**@#+*\/". It is only used during
    // "/**@#-*\/" processing.
    return true;
  }

  // At this point we have: "/**@#=".
  ch = get_char(); // This is the character to insert.
  char ch1 = get_char();
  if( '*' != ch1 ) {
    put_char(ch1);
    put_char(ch);
    put_char('=');
    put_char('#');
    put_char('@');
    return false;
  }
  ch1 = get_char();
  if( '/' != ch1 ) {
    put_char(ch1);
    put_char('*');
    put_char(ch);
    put_char('=');
    put_char('#');
    put_char('@');
    return false;
  }

  // We found: "/**@#=<char>*\/".
  // Insert the character into the input stream.
  put_char(ch);
  return true;
}
// ================================================================
// scan ccdoc style2
// ================================================================
const char* ccdoc::phase1::scanner::scan_ccdoc_style2(char* token,
						      int max)
{
  // ================================================
  // Check for ccdoc comment designators:
  //   '//[ \t]*@{' 'vi}'  -- brace balancing for vi, added by bzoe
  //     or
  //   '//[ \t]*/**'
  // ================================================
  enum {
    NOT_A_COMMENT,
    STYLE2A,
    STYLE2B,
    STYLE2C} // Issue 0086: STYLE2C, added by bzoe 2001/11/26
  ccdoc_flag = NOT_A_COMMENT;

  // Get the third character:
  //  //x
  //    ^
  char ch = get_char();

  // Issue 0162:
  if( '!' == ch || '/' == ch ) {
    // Look for doxygen single comment indicators:
    //   '//!<' or '///<'
    char first_ch = ch;
    ch = get_char();
    if( '<' == ch ) {
      // This is a doxygen style comment of the form:
      //   int x; //!< ...
      // or
      //   int x; ///< ...
      //
      // This is the same as ccdoc:
      //   int x; //@- ...
      put_char('-');
      ch = '@';
    }
    else {
      // This is not a doxygen style comment.
      put_char(ch);       // NOT '<'
      ch = first_ch;
    }
  }

  // ccdoc doesn't care about the leading w/s.
  while( ' ' == ch || '\t' == ch ) {
    ch = get_char();
  }

  // process the other tokens.
  if( '@' == ch ) {
    // Look for: '//[ \t]*@{' 'vi}'  -- brace balancing for vi, added by bzoe
    ch = get_char();
    if( '{' == ch ) {
      ccdoc_flag = STYLE2A;
    }
    else if( '-' == ch ) {
      // Issue 0086
      // Added by bzoe to support the new
      // single line, suffix syntax:
      //  int foo; //@- this is a comment
      ccdoc_flag = STYLE2C;
    }
    else {
      put_char(ch); // in case it is a '\n'
    }
  }
  else if( '/' == ch ) {
    // Look for: '//[ \t]*/**'
    // Discard: '///***'
    ch = get_char();
    if( '*' == ch ) {
      ch = get_char();
      if( '*' == ch ) {
	ch = get_char();
	if( '*' != ch ) {
	  ccdoc_flag = STYLE2B;
	  put_char(ch);
	}
	else {
	  put_char(ch);
	  put_char('*');
	  put_char('*');
	  put_char('/');
	}
      }
      else {
	put_char(ch);
	put_char('*');
	put_char('/');
      }
    }
    else {
      put_char(ch);
      put_char('/');
    }
  }
  else {
    put_char(ch);
  }

  // ================================================
  // This is not a ccdoc comment,
  // skip to the end of the line.
  // ================================================
  if(ccdoc_flag == NOT_A_COMMENT) {
    while( ch != 0 && ch != '\n' ) {
      ch = get_char();
    }
    if( '\n' == ch ) {
      put_char(ch);
    }
    return scan_token();
  }

  // ================================================
  // Get the rest of the tokens on the line.
  // Here are the comment fields.
  // ================================================
  scanner_doc doc(*this,m_sw);

  // Define the processing mode.
  static char line[65536]; // maximum line length
  bool first = true;
  bool done = false;
  bool ignore_flag = false;
  while(!done) {
    // ================================================
    // Skip leading w/s
    // ================================================
    char ch = get_char();
    while(ch && ch !='\n' && (ch > 0 && ch <= ' ') ) {
      ch = get_char();
    }

    // ================================================
    // This must be either a comment or a blank line.
    // Both are valid.
    // ================================================
    if( !first ) {
      if( '/' == ch ) {
	ch = get_char();
	if( '/' != ch ) {
	  s_log.warning()
	    << "Invalid ccdoc comment specified at line "
	    << m_lineno << " in " << m_name.c_str()
	    << ".\n"
	    << s_log.enable();
	  put_char(ch);
	  put_char('/');
	  return scan_token();
	}
	// Check for end of comment.
	// Skip the lead '//'
	ch = get_char();

        // 'vi{' -- brace balancing for vi, added by bzoe
	// Is this '//@}'?
	if( ccdoc_flag == STYLE2A ) {
	  if( '@' == ch ) {
	    char ch1 = get_char();
            // 'vi{' -- brace balancing for vi, added by bzoe
	    if( '}' == ch1 ) {
	      while( ch1 && ch1 != '\n' )
		ch1 = get_char();
	      line[0] = 0;
	      done = true;
	      break;
	    }
	    put_char(ch1);
	  }
	}
	else if( ccdoc_flag == STYLE2B ) {
	  // Is this '//\*/'?
	  if( '*' == ch ) {
	    char ch1 = get_char();
	    if( '/' == ch1 ) {
	      while( ch1 && ch1 != '\n' )
		ch1 = get_char();
	      line[0] = 0;
	      done = true;
	      break;
	    }
	    // Skip the asterisk in this form.
	    ch = ch1;
	  }
	  // Is this '// \*/'?
	  else if( ' ' == ch || '\t' == ch ) {
	    char ch1 = get_char();
	    if( '*' == ch1 ) {
	      char ch2 = get_char();
	      if( '/' == ch2 ) {
		while( ch2 && ch2 != '\n' )
		  ch2 = get_char();
		line[0] = 0;
		done = true;
		break;
	      }
	      // Skip the asterisk and preceding space in this form.
	      ch = ch2;
	    }
	    else {
	      put_char(ch1);
	    }
	  }
	}
      }
    }

    // ================================================
    // Now load the line for directive processing.
    // Don't trim w/s, it may be needed for <pre></pre>.
    // ================================================
    char* pline = line;
    while( ch && ch != '\n' ) {
      *pline++ = ch;
      char pch = ch;
      ch = get_char();
      if( pline>&line[3] ) {
	if( ccdoc_flag == STYLE2A ) {
          // 'vi{' -- brace balancing for vi, added by bzoe
	  if( '@' == pch && ch == '}' ) {
	    // This may be the end of the comment.
	    //
	    // Make sure that any preceding directives
	    // are processed, such as:
	    //   '//@{ @return Foo bar spam //@}'
	    //
	    char* mark = pline;
	    pline--; // *pline == '@'
	    ccdoc_assert( '@' == *pline );
	    char ch1 = *--pline;
	    char ch2 = *--pline;
	    if( '/' == ch1 && '/' == ch2 ) {
	      if( pline == line ) {
		// We are done.
		done = true;
		break;
	      }

	      // 'vi{' -- brace balancing for vi, added by bzoe
	      // This was an end token '//@}',
	      // parse the other stuff on the line.
	      pline = mark;
	      pline--; // *pline == '@'
	      pline--; // *pline == '/'
	      pline--; // *pline == '/'
	      *pline = 0;
	      done = true;

	      // Ignore everything to the end of the line.
	      while( ch && ch != '\n' ) {
		ch = get_char();
	      }
	      break;
	    }
	    else {
              // 'vi{' -- brace balancing for vi, added by bzoe
	      // This was not an end token '//@}',
	      // continue parsing.
	      pline = mark;
	    }
	  }
	}
	else if( ccdoc_flag == STYLE2B ) {
	  if( '*' == pch && ch == '/' ) {
	    // This may be the end of the comment.
	    //
	    // Make sure that any preceding directives
	    // are processed, such as:
	    //   '///** @return Foo bar spam //\*/'
	    //
	    char* mark = pline;
	    pline--; // *pline == '*'
	    ccdoc_assert( '*' == *pline );
	    char ch1 = *--pline;
	    char ch2 = *--pline;
	    if( '/' == ch1 && '/' == ch2 ) {
	      if( pline == line ) {
		// We are done.
		done = true;
		break;
	      }

	      // This was an end token '//\*/',
	      // parse the other stuff on the line.
	      pline = mark;
	      pline--; // *pline == '*'
	      pline--; // *pline == '/'
	      pline--; // *pline == '/'
	      *pline = 0;
	      done = true;

	      // Ignore everything to the end of the line.
	      while( ch && ch != '\n' ) {
		ch = get_char();
	      }
	      break;
	    }
	    else {
	      // This was not an end token '//\*/',
	      // continue parsing.
	      pline = mark;
	    }
	  }
	}
	else if( ccdoc_flag == STYLE2C ) {
          // Issue 0086
          if( '\n' == ch ) {
            done = true;
            break;
          }
        }
      }
    }
    *pline = 0;

    // ================================================
    // EOF
    // ================================================
    if( !ch ) {
      // The end of the file was reached before the
      // comment was terminated.
      s_log.warning()
	<< "Unexpected EOF found, unterminated ccdoc comment "
	<< "specified at line "
	<< m_lineno << " in " << m_name.c_str()
	<< ".\n"
	<< s_log.enable();
      return scan_token();
    }

    // ================================================
    // Eliminate special tokens from the comment line.
    // ================================================
    if( !ignore_flag ) {
      if( contains_token(line,"@file") ) {
        ignore_flag = true;
      }
    }

    // ================================================
    // At this point we have the line.
    // Terminate it and write it out in debug mode.
    // ================================================
    if( m_debug ) {
      s_log << "CCDOC_PHASE1_DEBUG: ccdoc_line: '" << line << "'\n";
    }

    // ================================================
    // Skip the first line, if it is empty to
    // avoid conflicts when trying to determine
    // the long description for the following case:
    //   //@{           | <-- line 1 (blank - ignore)
    //   // short       | <-- line 2 (short description)
    //   //             | <-- line 3 (blank - separator)
    //   // long        | <-- line 4 (long description)
    //   //             | <-- line 5 (end of comment)
    // ================================================
    if( ccdoc_flag == STYLE2C ) { // bzoe
      // Issue 0086
      first = false;
      if (*line == 0) {
        s_log.warning()
          << "Empty single suffix comment\n"
          << s_log.enable();
      }
      if (m_debug) {
        s_log << "STYLE2C: " << line << "\n";
      }
    }
    else {
      if(first) {
        first = false;
        if(*line == 0)
          continue;
      }

      // ================================================
      // Issue 0082: only do this if -nojdsds is specified.
      // Set the short description flag to false
      // if a blank line (other than the first one)
      // is encountered.
      // ================================================
      if(!m_sw.jdsds() && *line == 0 && doc.m_mode == scanner_doc::SHORT) {
        doc.m_mode = scanner_doc::LONG;
        continue;
      }
    }

    doc.parse_line(line);

    // Issue 0086
    if (ccdoc_flag == STYLE2C)
      doc.m_comment.add_suffix(true);  // bzoe
  }

  // ================================================
  // At this point we have a valid ccdoc comment.
  // Format it for the parser to make things easy.
  // If it is empty, ignore it.
  // ================================================
  if( doc.empty() || ignore_flag )
    return scan_token();
  return doc.format(token,max);
}
// ================================================================
// Strip out token.
// ================================================================
void ccdoc::phase1::scanner::strip_token(char* buf,
                                         const char* token) const
{
  if( buf && token && token[0] ) {
    char* p1 = buf;
    for(;*p1;++p1) {
      char* p2 = p1;
      const char* p3 = token;
      for(;*p3 && *p2 == *p3;++p3,++p2);
      if( !*p3 && *p2<=' ' ) {
        // They matched. Strip out the token.
        // Ex. token = CDE
        //   AB CDE FG
        //      ^   ^
        //      |   +--- p2
        //      +------ p1
        //
        char* p4 = p2-1;
        for(;*p2;++p1,++p2) {
          *p1 = *p2;
        }
        *p1 = 0;
        p1 = p4;
      }
    }
  }
}
// ================================================================
// Contains token.
// ================================================================
bool ccdoc::phase1::scanner::contains_token(const char* buf,
                                            const char* token) const
{
  if( buf && token && token[0] ) {
    const char* pbuf = buf;
    for(;*pbuf;++pbuf) {
      const char* p2 = pbuf;
      const char* ptoken = token;

      // Make sure that there is preceding w/s or
      // we are at the start of the line.
      if( p2 == buf || *p2 <= ' ' ) {
        for(;*ptoken && *p2 && *p2 == *ptoken;++ptoken,++p2);
        // Make sure that there is trailing w/s.
        if( !*ptoken && *p2<=' ' )
          return true;
      }
    }
  }
  return false;
}
