# $Id: Converter.pm 7942 2017-08-28 20:42:04Z gavin $
# Converter.pm: Common code for Converters.
#
# Copyright 2011, 2012, 2013, 2014, 2015, 2016 Free Software Foundation, Inc.
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License,
# or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# 
# Original author: Patrice Dumas <pertusus@free.fr>

package Texinfo::Convert::Converter;

use 5.00405;
use strict;

# for fileparse
use File::Basename;
# for file names portability
use File::Spec;
use Encode;

use Texinfo::Report;
use Texinfo::Common;
use Texinfo::Convert::Text;
use Texinfo::Convert::Texinfo;
use Texinfo::Structuring;

use Carp qw(cluck);

require Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
@ISA = qw(Exporter Texinfo::Report);

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.

# This allows declaration       use Texinfo::Convert::Converter ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
%EXPORT_TAGS = ( 'all' => [ qw(
xml_protect_text
xml_comment
xml_accent
xml_accents
) ] );

@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

@EXPORT = qw(
);

$VERSION = '6.5';

my %defaults = (
  'ENABLE_ENCODING'      => 1,
  'OUTFILE'              => undef,
  'SUBDIR'               => undef,
  'documentlanguage'     => undef,
);

# defaults for all converters.  Maybe more could be added, especially what
# can be set with --set and should be the same for all the formats.
our %all_converters_defaults = (
  'htmlxref_files'       => undef,
  'output_format'        => undef,
  'SPLIT_SIZE'           => 300000,
  'paragraphindent'      => 3,
  'fillcolumn'           => 72,
  'expanded_formats'     => undef,
  'include_directories'  => undef,
  'IMAGE_LINK_PREFIX'    => undef,
  'NUMBER_SECTIONS'      => 1,
  'NUMBER_FOOTNOTES'     => 1,
  'frenchspacing'        => 'off',
  'paragraphindent'      => 3,
  'firstparagraphindent' => 'none',
  'allowcodebreaks'      => 'true',
  'footnotestyle'        => 'end',
  'deftypefnnewline'     => 'off',
  'BASEFILENAME_LENGTH'  => 255 - 10,
# This is not used much as converters do their best to give a sane output
  'FIX_TEXINFO'          => 1,
# This is the default, mainly for tests; the caller should set them.  These
# values are what is used in tests of the Converters.
  'PACKAGE_AND_VERSION'  => 'texinfo',
  'PACKAGE_VERSION'      => '',
  'PACKAGE_URL'          => 'http://www.gnu.org/software/texinfo/',
  'PROGRAM'              => '',

  'DEBUG'                => 0,
  'TEST'                 => 0,
  'translated_commands'  => {'error' => 'error@arrow{}',},
);

# For translation of in document string.
if (0) {
  my $self;
  $self->gdt('error@arrow{}');
}

sub converter_defaults($$)
{
  return %defaults;
}

sub converter_initialize($)
{
}

sub converter_global_commands($)
{
  return ('documentlanguage', 'documentencoding');
}

sub output_internal_links($)
{
  my $self = shift;
  return undef;
}

sub _informative_command_value($$)
{
  my $self = shift;
  my $root = shift;

  my $cmdname = $root->{'cmdname'};

  if ($Texinfo::Common::misc_commands{$cmdname} eq 'skipline') {
    return 1;
  } elsif (exists($root->{'extra'}->{'text_arg'})) {
    return $root->{'extra'}->{'text_arg'};
  } elsif ($root->{'extra'} and $root->{'extra'}->{'misc_args'}
           and exists($root->{'extra'}->{'misc_args'}->[0])) {
    return $root->{'extra'}->{'misc_args'}->[0];
  }
  return undef;
}

# FIXME documentencoding handling is not reverted by resetting
# a value with set_conf, so _unset_global_multiple_commands won't
# reverse what _set_global_multiple_commands did through 
# _informative_command.
sub _informative_command($$)
{
  my $self = shift;
  my $root = shift;

  my $cmdname = $root->{'cmdname'};
  $cmdname = 'shortcontents' if ($cmdname eq 'summarycontents');

  return if ($self->{'set'}->{$cmdname});

  my $value = $self->_informative_command_value($root);
  if (defined($value)) {
    $self->set_conf($cmdname, $value);
  }
}

sub register_close_file($$)
{
  my $self = shift;
  my $filename = shift;
  if ($self->{'unclosed_files'}->{$filename}) {
    delete $self->{'unclosed_files'}->{$filename};
  } else {
    cluck "$filename not opened\n";
  }
}

sub converter(;$)
{
  my $class = shift;
  my $converter = { 'set' => {} };

  my $conf;
  my $name = 'converter';

  if (ref($class) eq 'HASH') {
    $conf = $class;
    bless $converter;
  } elsif (defined($class)) {
    bless $converter, $class;
    $name = ref($converter);
    $conf = shift;
  } else {
    bless $converter;
    $conf = shift;
    $name = ref($converter);
  }
  my %defaults = $converter->converter_defaults($conf);
  foreach my $key (keys(%all_converters_defaults)) {
    $defaults{$key} = $all_converters_defaults{$key} 
      if (!exists($defaults{$key}));
  }
  foreach my $key (keys(%defaults)) {
    if (Texinfo::Common::valid_option($key)) {
      $converter->{'conf'}->{$key} = $defaults{$key};
    } else {
      $converter->{$key} = $defaults{$key};
    }
  }
  $converter->{'conf_default'} = \%defaults;
  if (defined($conf)) {
    if ($conf->{'parser'}) {
      $converter->{'parser'} = $conf->{'parser'};
      $converter->{'extra'} 
         = $converter->{'parser'}->global_commands_information();
      $converter->{'info'} = $converter->{'parser'}->global_informations();
      if ($converter->{'info'} 
          and $converter->{'info'}->{'input_perl_encoding'}
          and !defined($conf->{'INPUT_PERL_ENCODING'})) {
        $conf->{'INPUT_PERL_ENCODING'}
              = $converter->{'info'}->{'input_perl_encoding'};
      }
      if ($converter->{'info'} 
          and $converter->{'info'}->{'input_encoding_name'}
          and !defined($conf->{'INPUT_ENCODING_NAME'})) {
        $conf->{'INPUT_ENCODING_NAME'} 
             = $converter->{'info'}->{'input_encoding_name'};
      }
      my $floats = $converter->{'parser'}->floats_information();
      my $labels = $converter->{'parser'}->labels_information();
      $converter->{'structuring'} = $converter->{'parser'}->{'structuring'};

      $converter->{'floats'} = $floats if ($floats);
      $converter->{'labels'} = $labels if ($labels);
      foreach my $global_command ($converter->converter_global_commands()) {
        if (defined($converter->{'extra'}->{$global_command})) {
          my $root = $converter->{'extra'}->{$global_command};
          # always set unique commands
          if (ref($root) ne 'ARRAY') {
            $converter->_informative_command($root);
          }
        }
      }
      $converter->{'gettext'} = $converter->{'parser'}->{'gettext'};
      $converter->{'pgettext'} = $converter->{'parser'}->{'pgettext'};
      delete $conf->{'parser'};
    }
    foreach my $key (keys(%$conf)) {
      if (Texinfo::Common::valid_option($key)) {
        $converter->{'conf'}->{$key} = $conf->{$key};
      } elsif (!exists($defaults{$key})) {
        warn "$key not a possible configuration in $name\n";
      } else {
        $converter->{$key} = $conf->{$key};
      }
      $converter->{'set'}->{$key} = 1;
    }
  }
  $converter->set_conf('OUTPUT_ENCODING_NAME', 
                       $converter->get_conf('INPUT_ENCODING_NAME'))
     if ($converter->get_conf('INPUT_ENCODING_NAME'));
  if (!$converter->get_conf('OUTPUT_PERL_ENCODING')
       and $converter->get_conf('OUTPUT_ENCODING_NAME')) {
    my $perl_encoding 
      = Encode::resolve_alias($converter->get_conf('OUTPUT_ENCODING_NAME'));
    if ($perl_encoding) {
      $converter->set_conf('OUTPUT_PERL_ENCODING', $perl_encoding);
    }
  }
  if (!defined($converter->{'expanded_formats'})) {
    if ($converter->{'parser'}) {
      $converter->{'expanded_formats'} = $converter->{'parser'}->{'expanded_formats'};
    } else {
      $converter->{'expanded_formats'} = [];
    }
  }
  if (!defined($converter->{'include_directories'})) {
    if ($converter->{'parser'}) {
      $converter->{'include_directories'} = $converter->{'parser'}->{'include_directories'};
    } else {
      $converter->{'include_directories'} = [ '.' ];
    }
  }

  # turn the array to a hash for speed.  Not sure it really matters for such
  # a small array.
  foreach my $expanded_format(@{$converter->{'expanded_formats'}}) {
    $converter->{'expanded_formats_hash'}->{$expanded_format} = 1;
  }

  $converter->Texinfo::Report::new();

  $converter->converter_initialize();

  return $converter;
}

sub converter_unclosed_files($)
{
  my $self = shift;
  return $self->{'unclosed_files'};
}

sub converter_opened_files($)
{
  my $self = shift;
  if (defined($self->{'opened_files'})) {
    return @{$self->{'opened_files'}};
  } else {
    return ();
  }
}

sub _set_global_multiple_commands($;$)
{
  my $self = shift;
  my $multiple_commands_index = shift;
  $multiple_commands_index = 0 if (!defined($multiple_commands_index));

  foreach my $global_command ($self->converter_global_commands()) {
    if (defined($self->{'extra'}->{$global_command})
        and ref($self->{'extra'}->{$global_command}) eq 'ARRAY') {
      my $root = $self->{'extra'}->{$global_command}->[$multiple_commands_index];
      if ($self->get_conf('DEBUG')) {
        print STDERR "SET_global_multiple_commands($multiple_commands_index) $global_command\n";
      }
      $self->_informative_command($root);
    }
  }
}

# Notice that set_conf is used, which means that it is not possible to
# customize what is done for those commands.  For documentencoding, for
# example the values are not reset correctly, see the FIXME above.
sub _unset_global_multiple_commands($)
{
  my $self = shift;

  foreach my $global_command ($self->converter_global_commands()) {
    if (defined($self->{'extra'}->{$global_command})
        and ref($self->{'extra'}->{$global_command}) eq 'ARRAY') {
      if (exists($self->{'conf_default'}->{$global_command})) {
        if ($self->get_conf('DEBUG')) {
          my $default = 'UNDEF';
          $default = $self->{'conf_default'}->{$global_command} 
            if (defined($self->{'conf_default'}->{$global_command}));
          my $set = 0;
          $set = 1 if ($self->{'set'}->{$global_command});
          print STDERR "UNSET_global_multiple_commands $global_command ($set): $default\n";
        }
        if (Texinfo::Common::valid_option($global_command)) {
          $self->set_conf($global_command, $self->{'conf_default'}->{$global_command});
        } else {
          $self->{$global_command} = $self->{'conf_default'}->{$global_command};
        }
      }
    }
  }
}

use Carp;
sub get_conf($$)
{
  my $self = shift;
  my $conf = shift;
  if (!Texinfo::Common::valid_option($conf)) {
    cluck "CBUG: unknown option $conf\n";
    die;
    return undef;
  }
  return $self->{'conf'}->{$conf};
}

sub set_conf($$$)
{
  my $self = shift;
  my $conf = shift;
  my $value = shift;
  if (!Texinfo::Common::valid_option($conf)) {
    die "BBUG: unknown option $conf\n";
    return undef;
  } elsif (Texinfo::Common::obsolete_option($conf)) {
    warn(sprintf(main::__("Obsolete variable %s\n"), $conf));
  }
  if ($self->{'set'}->{$conf}) {
    return 0;
  } else {
    $self->{'conf'}->{$conf} = $value;
    return 1;
  }
}

sub force_conf($$$)
{
  my $self = shift;
  my $conf = shift; 
  my $value = shift;
  if (!Texinfo::Common::valid_option($conf)) {
    die "ABUG: unknown option $conf\n";
    return undef;
  } elsif (Texinfo::Common::obsolete_option($conf)) {
    warn(sprintf(main::__("Obsolete variable %s\n"), $conf));
  }
  $self->{'conf'}->{$conf} = $value;
  return 1;
}

my $STDIN_DOCU_NAME = 'stdin';

sub _set_outfile($$$)
{
  my $self = shift;

  # determine input file base name
  my $input_basename;
  if (defined($self->{'info'}->{'input_file_name'})) {
    my ($directories, $suffix);
    ($input_basename, $directories, $suffix) 
       = fileparse($self->{'info'}->{'input_file_name'});
  } else {
    # This could happen if called on a piece of texinfo
    $input_basename = '';
  }
  $self->{'input_basename'} = $input_basename;
  if ($input_basename eq '-') {
    $input_basename = $STDIN_DOCU_NAME 
  } else {
    $input_basename =~ s/\.te?x(i|info)?$//;
  }
  $self->{'input_basename_name'} = $input_basename;

  my $setfilename;
  if (defined($self->get_conf('setfilename'))) {
    $setfilename = $self->get_conf('setfilename');
  } elsif ($self->{'extra'} and $self->{'extra'}->{'setfilename'}
          and $self->{'extra'}->{'setfilename'}->{'extra'}
          and defined($self->{'extra'}->{'setfilename'}->{'extra'}->{'text_arg'})) {
     $setfilename = $self->{'extra'}->{'setfilename'}->{'extra'}->{'text_arg'}
  }

  # PREFIX overrides both setfilename and the input file base name
  if (defined($self->get_conf('PREFIX'))) {
    $setfilename = undef;
    $input_basename = $self->get_conf('PREFIX');
  }

  # the name of the document, which is more or less the basename, without 
  # extension
  my $document_name;
  # determine output file and output file name
  my $outfile;
  if (!defined($self->get_conf('OUTFILE'))) {
    if (defined($setfilename) and !$self->get_conf('NO_USE_SETFILENAME')) {
      $outfile = $setfilename;
      $document_name = $setfilename;
      $document_name =~ s/\.[^\.]*$//;
      if (!$self->get_conf('USE_SETFILENAME_EXTENSION')) {
        $outfile =~ s/\.[^\.]*$//;
        $outfile .= '.'.$self->get_conf('EXTENSION') 
          if (defined($self->get_conf('EXTENSION')) 
              and $self->get_conf('EXTENSION') ne '');
      }
    } elsif ($input_basename ne '') {
      $outfile = $input_basename;
      $document_name = $input_basename;
      $outfile .= '.'.$self->get_conf('EXTENSION') 
        if (defined($self->get_conf('EXTENSION')) 
            and $self->get_conf('EXTENSION') ne '');
    } else {
      $outfile = '';
      $document_name = $outfile;
    }
    if (defined($self->get_conf('SUBDIR')) and $outfile ne '') {
      my $dir = File::Spec->canonpath($self->get_conf('SUBDIR'));
      $outfile = File::Spec->catfile($dir, $outfile);
    }
    #$self->set_conf('OUTFILE', $outfile);
  } else {
    $document_name = $self->get_conf('OUTFILE');
    $document_name =~ s/\.[^\.]*$//;
    $outfile = $self->get_conf('OUTFILE');
  }

  # the output file without directories part.
  my $output_filename = $outfile;
  # this is a case that should happen rarely: one wants to get 
  # the result in a string and there is a setfilename.
  if ($outfile eq '' and defined($setfilename)
      and !$self->get_conf('NO_USE_SETFILENAME')) {
    $output_filename = $setfilename;
    $document_name = $setfilename;
    $document_name =~ s/\.[^\.]*$//;
  }
  my ($directories, $suffix);
  # We may be handling setfilename there, so it is not obvious that we
  # want to use fileparse and not consider unixish separators.  However, 
  # if this is setfilename, it should be a simple file name, so it 
  # should hopefully be harmless to use fileparse
  ($document_name, $directories, $suffix) = fileparse($document_name);
  $self->{'document_name'} = $document_name;
  ($output_filename, $directories, $suffix) = fileparse($output_filename);
  $self->{'output_filename'} = $output_filename;
  if ($self->get_conf('SPLIT')) {
    if (defined($self->get_conf('OUTFILE'))) {
      $self->{'destination_directory'} = $self->get_conf('OUTFILE');
    } elsif (defined($self->get_conf('SUBDIR'))) {
      $self->{'destination_directory'} = $self->get_conf('SUBDIR');
    } else {
      $self->{'destination_directory'} = $document_name;
    }
  } else {
    my ($out_filename, $output_dir, $suffix) = fileparse($outfile);
    if ($output_dir ne '') {
      $self->{'destination_directory'} = $output_dir;
    }
  }
  if (defined($self->{'destination_directory'}) 
      and $self->{'destination_directory'} ne '') {
    $self->{'destination_directory'} 
      = File::Spec->canonpath($self->{'destination_directory'});
  }
  $self->{'output_file'} = $outfile;
}

sub _id_to_filename($$)
{
  my $self = shift;
  my $id = shift;
  return substr($id, 0, $self->get_conf('BASEFILENAME_LENGTH'));
}

sub _sectioning_command_normalized_filename($$)
{
  my $self = shift;
  my $command = shift;
  my $no_unidecode;

  $no_unidecode = 1 if (defined($self->get_conf('USE_UNIDECODE')) 
                        and !$self->get_conf('USE_UNIDECODE'));

  my $normalized_name = Texinfo::Convert::NodeNameNormalization::transliterate_texinfo(
       {'contents' => $command->{'extra'}->{'misc_content'}},
                $no_unidecode);

  my $filename = $self->_id_to_filename($normalized_name);
  $filename .= '.'.$self->get_conf('EXTENSION') 
    if (defined($self->get_conf('EXTENSION')) 
      and $self->get_conf('EXTENSION') ne '');
  return ($normalized_name, $filename);
}

sub _node_filename($$)
{
  my $self = shift;
  my $node_info = shift;

  my $no_unidecode;
  $no_unidecode = 1 if (defined($self->get_conf('USE_UNIDECODE'))
                        and !$self->get_conf('USE_UNIDECODE'));

  my $filename;
  if (defined($node_info->{'normalized'})) { 
    if ($self->get_conf('TRANSLITERATE_FILE_NAMES')) {
      $filename = Texinfo::Convert::NodeNameNormalization::transliterate_texinfo(
       {'contents' => $node_info->{'node_content'}},
            $no_unidecode);
    } else {
      $filename = $node_info->{'normalized'};
    }
  } elsif (defined($node_info->{'node_content'})) { 
    $filename = Texinfo::Convert::NodeNameNormalization::normalize_node (
             { 'contents' => $node_info->{'node_content'} });
  } else {
    $filename = '';
  }
  $filename = $self->_id_to_filename($filename);
  return $filename;
}

sub _set_element_file($$$)
{
  my $self = shift;
  my $element = shift;
  my $filename = shift;

  if (!defined($filename)) {
    cluck("_set_element_file: filename not defined\n");
  }
  if ($self->get_conf('CASE_INSENSITIVE_FILENAMES')) {
    if (exists($self->{'filenames'}->{lc($filename)})) {
      if ($self->get_conf('DEBUG')) {
        print STDERR "Reusing ".$self->{'filenames'}->{lc($filename)}
                     ." for $filename\n";
      }
      $filename = $self->{'filenames'}->{lc($filename)};
    } else {
      $self->{'filenames'}->{lc($filename)} = $filename;
    }
  }
  $element->{'filename'} = $filename;
  if (defined($self->{'destination_directory'}) 
      and $self->{'destination_directory'} ne '') {
    $element->{'out_filename'} = 
      File::Spec->catfile($self->{'destination_directory'}, $filename);
  } else {
    $element->{'out_filename'} = $filename;
  }
}

sub _top_node_filename($)
{
  my $self = shift;

  my $top_node_filename;
  if (defined($self->get_conf('TOP_FILE')) 
      and $self->get_conf('TOP_FILE') ne '') {
    $top_node_filename = $self->get_conf('TOP_FILE');
  } else {
    if (defined($self->get_conf('TOP_NODE_FILE'))) {
      $top_node_filename = $self->get_conf('TOP_NODE_FILE');
    } else {
      # TOP_NODE_FILE is set in the default case.
      # If not the manual name is used.
      $top_node_filename = $self->{'document_name'};
    }
    if (defined($top_node_filename)) {
      my $top_node_extension;
      if ($self->get_conf('NODE_FILENAMES')) {
        $top_node_extension = $self->get_conf('NODE_FILE_EXTENSION');
      } else {
        $top_node_extension = $self->get_conf('EXTENSION');
      }
      $top_node_filename .= '.'.$top_node_extension 
        if (defined($top_node_extension) and $top_node_extension ne '');
    }
  }
  return $top_node_filename;
}

sub _get_element($$)
{
  my $self = shift;
  my $command = shift;
  my $find_container = shift;

  my $current = $command;

  while (1) {
    if ($current->{'type'}) {
      if ($current->{'type'} eq 'element') {
        return $current;
      }
    }
    if ($current->{'parent'}) {
      $current = $current->{'parent'};
    } else {
      return undef;
    }
  }
}

sub _set_pages_files($$)
{
  my $self = shift;
  my $elements = shift;

  # Ensure that the document has pages
  return undef if (!defined($elements) or !@$elements);

  my $extension = '';
  $extension = '.'.$self->get_conf('EXTENSION') 
            if (defined($self->get_conf('EXTENSION')) 
                and $self->get_conf('EXTENSION') ne '');

  if (!$self->get_conf('SPLIT')) {
    foreach my $element (@$elements) {
      if (!defined($element->{'filename'})) {
        $element->{'filename'} = $self->{'output_filename'};
        $element->{'out_filename'} = $self->{'output_file'};
      }
    }
  } else {
    my $node_top;
    #my $section_top;
    $node_top = $self->{'labels'}->{'Top'} if ($self->{'labels'});
    #$section_top = $self->{'extra'}->{'top'} if ($self->{'extra'});
  
    my $top_node_filename = $self->_top_node_filename();
    # first determine the top node file name.
    if ($self->get_conf('NODE_FILENAMES') and $node_top 
        and defined($top_node_filename)) {
      my ($node_top_element) = $self->_get_element($node_top);
      die "BUG: No element for top node" if (!defined($node_top));
      $self->_set_element_file($node_top_element, $top_node_filename);
    }
    my $file_nr = 0;
    my $previous_page;
    foreach my $element(@$elements) {
      # For Top node.
      next if (defined($element->{'filename'}));
      if (!$element->{'extra'}->{'first_in_page'}) {
        cluck ("No first_in_page for $element\n");
      }
      if (!defined($element->{'extra'}->{'first_in_page'}->{'filename'})) {
        my $file_element = $element->{'extra'}->{'first_in_page'};
        if ($self->get_conf('NODE_FILENAMES')) {
          foreach my $root_command (@{$file_element->{'contents'}}) {
            if ($root_command->{'cmdname'} 
                and $root_command->{'cmdname'} eq 'node') {
              my $node_filename;
              # double node are not normalized, they are handled here
              if (!defined($root_command->{'extra'}->{'normalized'})
                  or !defined($self->{'labels'}->{$root_command->{'extra'}->{'normalized'}})) {
                $node_filename = 'unknown_node';
              } else {
                $node_filename = $self->_node_filename($root_command->{'extra'});
              }
              $node_filename .= '.'.$self->get_conf('NODE_FILE_EXTENSION') 
                if (defined($self->get_conf('NODE_FILE_EXTENSION')) 
                 and $self->get_conf('NODE_FILE_EXTENSION') ne '');
              $self->_set_element_file($file_element, $node_filename);
              last;
            }
          }
          if (!defined($file_element->{'filename'})) {
            # use section to do the file name if there is no node
            my $command = $self->element_command($file_element);
            if ($command) {
              if ($command->{'cmdname'} eq 'top' and !$node_top
                  and defined($top_node_filename)) {
                $self->_set_element_file($file_element, $top_node_filename);
              } else {
                my ($normalized_name, $filename) 
                   = $self->_sectioning_command_normalized_filename($command);
                $self->_set_element_file($file_element, $filename)
              }
            } else {
              # when everything else has failed
              if ($file_nr == 0 and !$node_top 
                  and defined($top_node_filename)) {
                $self->_set_element_file($file_element, $top_node_filename);
              } else {
                my $filename = $self->{'document_name'} . "_$file_nr";
                $filename .= $extension;
                $self->_set_element_file($element, $filename);
              }
              $file_nr++;
            }
          }
        } else {
          my $filename = $self->{'document_name'} . "_$file_nr";
          $filename .= '.'.$self->get_conf('EXTENSION') 
            if (defined($self->get_conf('EXTENSION')) 
                and $self->get_conf('EXTENSION') ne '');
          $self->_set_element_file($file_element, $filename);
          $file_nr++;
        }
      }
      $element->{'filename'} 
         = $element->{'extra'}->{'first_in_page'}->{'filename'};
      $element->{'out_filename'}
         = $element->{'extra'}->{'first_in_page'}->{'out_filename'};
    }
  }

  foreach my $element (@$elements) {
    $self->{'file_counters'}->{$element->{'filename'}}++;
    print STDERR "Page $element ".Texinfo::Structuring::_print_element_command_texi($element).": $element->{'filename'}($self->{'file_counters'}->{$element->{'filename'}})\n"
      if ($self->get_conf('DEBUG'));
  }
}

sub output($$)
{
  my $self = shift;
  my $root = shift;

  my $elements;

  if (defined($self->get_conf('OUTFILE'))
      and ($Texinfo::Common::null_device_file{$self->get_conf('OUTFILE')}
           or $self->get_conf('OUTFILE') eq '-'
           or $self->get_conf('OUTFILE') eq '')) {
    if ($self->get_conf('SPLIT')) {
      $self->document_warn(sprintf($self->__("%s: output incompatible with split"),
                                   $self->get_conf('OUTFILE')));
      $self->force_conf('SPLIT', 0);
    }
  }
  if ($self->get_conf('SPLIT')) {
    $self->set_conf('NODE_FILES', 1);
  }
  if ($self->get_conf('NODE_FILES') 
      or ($self->get_conf('SPLIT') and $self->get_conf('SPLIT') eq 'node')) {
    $self->set_conf('NODE_FILENAMES', 1);
  }
  if ($self->get_conf('NODE_FILENAMES') and defined($self->get_conf('EXTENSION'))) {
     $self->set_conf('NODE_FILE_EXTENSION', $self->get_conf('EXTENSION'));
  }

  $self->_set_outfile();
  return undef unless $self->_create_destination_directory();

  # do that now to have it available for formatting
  # NOTE this calls Convert::Converter::_informative_command on all the 
  # @informative_global commands.
  # Thus sets among others language and encodings.
  $self->_set_global_multiple_commands(-1);

  if ($self->get_conf('USE_NODES')) {
    $elements = Texinfo::Structuring::split_by_node($root);
  } else {
    $elements = Texinfo::Structuring::split_by_section($root);
  }

  Texinfo::Structuring::split_pages($elements, $self->get_conf('SPLIT'));

  # determine file names associated with the different pages
  if ($self->{'output_file'} ne '') {
    $self->_set_pages_files($elements);
  }

  #print STDERR "$elements $elements->[0]->{'filename'}\n";

  # Now do the output
  my $fh;
  my $output = '';
  if (!$elements or !defined($elements->[0]->{'filename'})) {
    # no page
    my $outfile;
    if ($self->{'output_file'} ne '') {
      if ($self->get_conf('SPLIT')) {
        $outfile = $self->_top_node_filename();
        if (defined($self->{'destination_directory'}) 
            and $self->{'destination_directory'} ne '') {
          $outfile = File::Spec->catfile($self->{'destination_directory'}, 
                                         $outfile);
        }
      } else {
        $outfile = $self->{'output_file'};
      }
      print STDERR "DO No pages, output in $outfile\n"
        if ($self->get_conf('DEBUG'));
      $fh = $self->Texinfo::Common::open_out($outfile);
      if (!$fh) {
        $self->document_error(sprintf($self->__("could not open %s for writing: %s"),
                                      $outfile, $!));
        return undef;
      }
    } else {
      print STDERR "DO No pages, string output\n"
        if ($self->get_conf('DEBUG'));
    }

    if ($elements and @$elements) {
      foreach my $element (@$elements) {
        my $element_text = $self->convert_tree($element);
        $output .= $self->_output_text($element_text, $fh);
      }
    } else {
      $output .= $self->_output_text($self->convert($root), $fh);
    }
    # NOTE do not close STDOUT now to avoid a perl warning.
    if ($fh and $outfile ne '-') {
      $self->register_close_file($outfile);
      if (!close($fh)) {
        $self->document_error(sprintf($self->__("error on closing %s: %s"),
                                      $outfile, $!));
      }
    }
    return $output if ($self->{'output_file'} eq '');
  } else {
    # output with pages
    print STDERR "DO Elements with filenames\n"
      if ($self->get_conf('DEBUG'));
    my %files;
    
    foreach my $element (@$elements) {
      my $file_fh;
      # open the file and output the elements
      if (!$files{$element->{'filename'}}->{'fh'}) {
        $file_fh = $self->Texinfo::Common::open_out($element->{'out_filename'});
        if (!$file_fh) {
          $self->document_error(sprintf($self->__("could not open %s for writing: %s"),
                                    $element->{'out_filename'}, $!));
          return undef;
        }
        $files{$element->{'filename'}}->{'fh'} = $file_fh;
      } else {
        $file_fh = $files{$element->{'filename'}}->{'fh'};
      }
      my $element_text = $self->convert_tree($element);
      print $file_fh $element_text;
      $self->{'file_counters'}->{$element->{'filename'}}--;
      if ($self->{'file_counters'}->{$element->{'filename'}} == 0) {
        # NOTE do not close STDOUT here to avoid a perl warning
        if ($element->{'out_filename'} ne '-') {
          $self->register_close_file($element->{'out_filename'});
          if (!close($file_fh)) {
            $self->document_error(sprintf($self->__("error on closing %s: %s"),
                                  $element->{'out_filename'}, $!));
            return undef;
          }
        }
      }
    }
  }
}

sub _bug_message($$;$)
{
  my $self = shift;
  my $message = shift;
  my $current = shift;

  my $line_message = '';
  my $current_element_message = '';
  if ($current) {
    if ($current->{'line_nr'}) {
      my $line_number = $current->{'line_nr'};
      my $file = $line_number->{'file_name'};
      $line_message
        = "in: $line_number->{'file_name'}:$line_number->{'line_nr'}";
      if ($line_number->{'macro'} ne '') {
        $line_message .= " (possibly involving $line_number->{'macro'})";
      }
      $line_message .= "\n";
    }
    if ($current) {
      $current_element_message = "current: ". 
        Texinfo::Common::_print_current($current);
    }
  }
  my $additional_information = '';
  if ($line_message.$current_element_message ne '') {
    $additional_information = "Additional informations:\n".
       $line_message.$current_element_message;
  }
  warn "You found a bug: $message\n\n".$additional_information;
}

# This is not used as code, but used to mark months as strings to be
# translated
if (0) {
  my $self;
  my @mark_month_for_translation = (
   $self->gdt('January'),
   $self->gdt('February'),
   $self->gdt('March'),
   $self->gdt('April'),
   $self->gdt('May'),
   $self->gdt('June'),
   $self->gdt('July'),
   $self->gdt('August'),
   $self->gdt('September'),
   $self->gdt('October'),
   $self->gdt('November'),
   $self->gdt('December')
  );
}

sub _create_destination_directory($)
{
  my $self = shift;
  if (defined($self->{'destination_directory'})
      and ! -d $self->{'destination_directory'}) {
    if (!mkdir($self->{'destination_directory'}, oct(755))) {
      if ($self->get_conf('SPLIT') 
          and $self->get_conf('EXTENSION') 
          and $self->get_conf('EXTENSION') ne '') {
        my ($volume, $directories, $file) 
           = File::Spec->splitpath($self->{'destination_directory'}, 1);
        my $new_directory = File::Spec->catpath($volume, 
                 $directories . '.' . $self->get_conf('EXTENSION'), $file);
        if (! -d $new_directory) {
          if (!mkdir($new_directory, oct(755))) {
            $self->document_error(sprintf($self->__(
              "could not create directories `%s' or `%s': %s"), 
              $self->{'destination_directory'}, $new_directory, $!));
            return undef;
          }
        }
        $self->{'destination_directory'} = $new_directory;
      } else {
        $self->document_error(sprintf($self->__(
             "could not create directory `%s': %s"), 
             $self->{'destination_directory'}, $!));
        return undef;
      }
    }
  }
  return 1;
}

sub _float_type_number($$)
{
  my $self = shift;
  my $float = shift;

  my $type;
  if ($float->{'extra'}->{'type'}
      and $float->{'extra'}->{'type'}->{'normalized'} ne '') { 
    $type = {'contents' => $float->{'extra'}->{'type'}->{'content'}};
  }

  my $tree;
  if ($type) {            
    if (defined($float->{'number'})) {
      $tree = $self->gdt("{float_type} {float_number}",
          {'float_type' => $type,
            'float_number' => $float->{'number'}});
    } else {
      $tree = $self->gdt("{float_type}",
          {'float_type' => $type});
    }
  } elsif (defined($float->{'number'})) {
    $tree = $self->gdt("{float_number}",
       {'float_number' => $float->{'number'}});
  }
  return $tree;
}

# This is used when the formatted text has no comment nor new line, but
# one want to add the comment or new line from the original arg
sub _end_line_or_comment($$)
{
  my $self = shift;
  my $contents_possible_comment = shift;
  my $end_line;
  if ($contents_possible_comment
      and $contents_possible_comment->[-1]->{'cmdname'}
      and ($contents_possible_comment->[-1]->{'cmdname'} eq 'c'
          or $contents_possible_comment->[-1]->{'cmdname'} eq 'comment')) {
    $end_line = $self->convert_tree($contents_possible_comment->[-1]);
  } elsif ($contents_possible_comment      
           and $contents_possible_comment->[-1]->{'text'}) {
    my $text = $contents_possible_comment->[-1]->{'text'};
    if (chomp($text)) {
      $end_line = "\n";
    } else {
      $end_line = '';
    }
  } else {
    $end_line = '';
  }
  return $end_line;
}

sub _tree_without_comment($)
{
  my $contents_possible_comment = shift;
  my $comment;
  my $tree;

  if ($contents_possible_comment->{'contents'}
      and $contents_possible_comment->{'contents'}->[-1]->{'cmdname'}
      and ($contents_possible_comment->{'contents'}->[-1]->{'cmdname'} eq 'c'
           or $contents_possible_comment->{'contents'}->[-1]->{'cmdname'} eq 'comment')) {
    my @contents = @{$contents_possible_comment->{'contents'}};
    $comment = pop @contents;
    $tree = {'contents' => \@contents};
    # FIXME why this selection, and not everything?
    foreach my $key ('extra', 'type', 'cmdname', 'parent', 'line_nr') {
      $tree->{$key} = $contents_possible_comment->{$key}
        if (exists($contents_possible_comment->{$key}));
    }
  } else {
   $tree = $contents_possible_comment;
  }
  return ($comment, $tree);
}

sub _convert_argument_and_end_line($$)
{
  my $self = shift;
  my $root = shift;
  my ($comment, $tree) 
    = _tree_without_comment($root);
  my $converted = $self->convert_tree($tree);
  my $end_line;
  if ($comment) {
    $end_line = $self->convert_tree($comment);
  } else {
    if (chomp($converted)) {
      $end_line = "\n";
    } else {
      $end_line = "";
    }
  }
  return ($converted, $end_line);
}

sub _collect_leading_trailing_spaces_arg($$)
{
  my $self = shift;
  my $arg = shift;
  #print STDERR "$arg->{'type'}: @{$arg->{'contents'}}\n";
  my @result;
  if ($arg->{'contents'} and $arg->{'contents'}->[0] 
      and defined($arg->{'contents'}->[0]->{'text'})
      and $arg->{'contents'}->[0]->{'text'} !~ /\S/
      and defined($arg->{'contents'}->[0]->{'type'})) {
    #print STDERR "$arg->{'contents'}->[0]->{'type'}\n";
    warn "Unknown leading space type $arg->{'contents'}->[0]->{'type'}\n"
      if ($arg->{'contents'}->[0]->{'type'} ne 'empty_spaces_after_command'
          and $arg->{'contents'}->[0]->{'type'} ne 'empty_spaces_before_argument'
          # FIXME should we really catch this too?
          and $arg->{'contents'}->[0]->{'type'} ne 'empty_line_after_command'
         );
    $result[0] = $arg->{'contents'}->[0]->{'text'};
    return @result if (scalar(@{$arg->{'contents'}}) == 1);
  }
  if ($arg->{'contents'}) {
    my $index = -1;
    $index-- if ($arg->{'contents'}->[-1] 
                 and $arg->{'contents'}->[-1]->{'cmdname'}
                 and ($arg->{'contents'}->[-1]->{'cmdname'} eq 'c'
                      or $arg->{'contents'}->[-1]->{'cmdname'} eq 'comment'));
    if (scalar(@{$arg->{'contents'}}) + $index > 0) {
      if ($arg->{'contents'}->[$index] 
          and defined($arg->{'contents'}->[$index]->{'text'})
          and $arg->{'contents'}->[$index]->{'text'} !~ /\S/
          and defined($arg->{'contents'}->[$index]->{'type'})) {
      #print STDERR "$arg->{'contents'}->[$index]->{'type'}\n";
        warn "Unknown trailing space type $arg->{'contents'}->[$index]->{'type'}\n"
          if ($arg->{'contents'}->[$index]->{'type'} ne 'spaces_at_end'
              and $arg->{'contents'}->[$index]->{'type'} ne 'space_at_end_block_command'
             );
        $result[1] = $arg->{'contents'}->[$index]->{'text'};
      }
    }
  }
  return @result;
}

sub _register_command_arg($$$)
{
  my ($self, $current, $type) = @_;

  my @contents = @{$current->{'contents'}};
  Texinfo::Common::trim_spaces_comment_from_content(\@contents);
  if (scalar(@contents)) {
    push @{$current->{'parent'}->{'extra'}->{$type}}, \@contents;
  } else {
    push @{$current->{'parent'}->{'extra'}->{$type}}, undef;
  }
}

sub _table_item_content_tree($$$)
{
  my $self = shift;
  my $root = shift;
  my $contents = shift;

  my $converted_tree = {'parent' => $root};
  my $table_command = $root->{'parent'}->{'parent'}->{'parent'};
  if ($table_command->{'extra'}
     and $table_command->{'extra'}->{'command_as_argument'}) {
    my $command_as_argument
      = $table_command->{'extra'}->{'command_as_argument'};
    my $command = {'cmdname' => $command_as_argument->{'cmdname'},
               'line_nr' => $root->{'line_nr'},
               'parent' => $converted_tree };
    if ($command_as_argument->{'type'} eq 'definfoenclose_command') {
      $command->{'type'} = $command_as_argument->{'type'};
      $command->{'extra'}->{'begin'} = $command_as_argument->{'extra'}->{'begin'};
      $command->{'extra'}->{'end'} = $command_as_argument->{'extra'}->{'end'};
    }
    my $arg = {'type' => 'brace_command_arg',
               'contents' => $contents,
               'parent' => $command,};
    $command->{'args'} = [$arg];
    _register_command_arg($self, $arg, 'brace_command_contents');
    $contents = [$command];
  }
  $converted_tree->{'contents'} = $contents;
  return $converted_tree;
}

sub _level_corrected_section($$)
{
  my $self = shift;
  my $root = shift;
  my $heading_level = $root->{'level'};
  my $command;
  if ($heading_level ne $Texinfo::Common::command_structuring_level{$root->{'cmdname'}}) {
    $command
      = $Texinfo::Common::level_to_structuring_command{$root->{'cmdname'}}->[$heading_level];
  } else {
    $command = $root->{'cmdname'};
  }
  return $command;
}

# generic output method
sub output_no_split($$)
{
  my $self = shift;
  my $root = shift;

  $self->_set_outfile();
  return undef unless $self->_create_destination_directory();
  
  my $fh;
  if (! $self->{'output_file'} eq '') {
    $fh = $self->Texinfo::Common::open_out($self->{'output_file'});
    if (!$fh) {
      $self->document_error(sprintf($self->__("could not open %s for writing: %s"),
                                    $self->{'output_file'}, $!));
      return undef;
    }
  }

  $self->_set_global_multiple_commands(-1);

  if ($self->get_conf('USE_NODES')) {
    return $self->convert_document_nodes($root, $fh);
  } else {
    return $self->convert_document_sections($root, $fh);
  }
}

# output fo $fh if defined, otherwise return the text.
sub _output_text($$$)
{
  my $self = shift;
  my $text = shift;
  my $fh = shift;
  if ($fh) { 
    print $fh $text;
    return '';
  } else {
    return $text;
  } 
}

sub _convert_document_elements($$;$$)
{
  my $self = shift;
  my $root = shift;
  my $elements = shift;
  my $fh = shift;

  if ($elements) {
    my $result = '';
    foreach my $element (@$elements) {
      $result .= $self->_output_text($self->convert_tree($element), $fh);
    }
    return $result;
  } else {
    return $self->_output_text($self->convert_tree($root), $fh);
  }
}

sub convert_document_sections($$;$)
{
  my $self = shift;
  my $root = shift;
  my $fh = shift;

  my $elements = Texinfo::Structuring::split_by_section($root);
  return $self->_convert_document_elements($root, $elements, $fh);
}

sub convert_document_nodes($$;$)
{
  my $self = shift;
  my $root = shift;
  my $fh = shift;

  my $elements = Texinfo::Structuring::split_by_node($root);
  return $self->_convert_document_elements($root, $elements, $fh);
}

# if in this container, we are 'inline', within a running text
my @inline_types = ('def_line', 'paragraph', 'preformatted',
  'misc_command_arg', 'misc_line_arg', 'block_line_arg',
  'menu_entry_name', 'menu_entry_node');

my %inline_types;
foreach my $type (@inline_types) {
  $inline_types{$type} = 1;
}

my %not_inline_commands = (%Texinfo::Common::root_commands, 
  %Texinfo::Common::block_commands, %Texinfo::Common::context_brace_command);

# Return 1 if inline in a running text, 0 if right in top-level or block
# environment, and undef otherwise.
sub _inline_or_block($$)
{
  my $self = shift;
  my $current = shift;
  if ($current->{'type'} and $inline_types{$current->{'type'}}) {
    return 1;
  } elsif ($current->{'cmdname'} 
           and exists($not_inline_commands{$current->{'cmdname'}})) {
    return 0;
  } else {
    return undef;
  }
}

# return true if in running text context
sub _is_inline($$)
{
  my $self = shift;
  my $current = shift;
  while ($current->{'parent'}) {
    $current = $current->{'parent'};
    my $inline_or_block = $self->_inline_or_block($current);
    return ($inline_or_block) if (defined($inline_or_block));
  }
  return 0;
}

# return true if container or parent may hold running text
sub _in_inline($$)
{
  my $self = shift;
  my $current = shift;
  my $inline_or_block = $self->_inline_or_block($current);
  return ($inline_or_block) if (defined($inline_or_block));
  return $self->_is_inline($current);
}

our %default_args_code_style = (
  'email' => [1],
  'anchor' => [1],
  'uref' => [1],
  'url' => [1],
  'math' => [1],
  'inforef' => [1,undef,1],
  'image' => [1, 1, 1, undef, 1],
# and type?
  'float' => [1],
);

foreach my $code_style_command (keys(%Texinfo::Common::code_style_commands)) {
  $default_args_code_style{$code_style_command} = [1];
}
foreach my $ref_cmd ('pxref', 'xref', 'ref') {
  $default_args_code_style{$ref_cmd} = [1, undef, undef, 1];
}

sub convert_accents($$$;$)
{
  my $self = shift;
  my $accent = shift;
  my $format_accents = shift;
  my $in_upper_case = shift;

  my ($contents, $stack)
      = Texinfo::Common::find_innermost_accent_contents($accent);
  my $result = $self->convert_tree({'contents' => $contents});  

  my $encoded;
  if ($self->get_conf('ENABLE_ENCODING')) {
    $encoded = Texinfo::Convert::Unicode::encoded_accents($self, $result, $stack,
                                       $self->get_conf('OUTPUT_ENCODING_NAME'),
                                       $format_accents,
                                       $in_upper_case);
  }
  if (!defined($encoded)) {
    foreach my $accent_command (reverse(@$stack)) {
      $result = &$format_accents ($self, $result, $accent_command, 
                                  $in_upper_case);
    }
    return $result;
  } else {
    return $encoded;
  }
}

# This method allows to count words in elements and returns an array
# and a text already formatted.
sub sort_element_counts($$;$$)
{
  my $converter =  shift;
  my $tree = shift;
  my $use_sections = shift;
  my $count_words = shift;

  my $elements;
  if ($use_sections) {
    $elements = Texinfo::Structuring::split_by_section($tree);
  } else {
    $elements = Texinfo::Structuring::split_by_node($tree);
  }

  if (!$elements) {
    @$elements = ($tree);
  } elsif (scalar(@$elements) >= 1 
           and (!$elements->[0]->{'extra'}->{'node'}
                and !$elements->[0]->{'extra'}->{'section'})) {
    shift @$elements;
  }

  my $max_count = 0;
  my @name_counts_array;
  foreach my $element (@$elements) {
    my $name = 'UNNAMED element';
    if ($element->{'extra'} 
        and ($element->{'extra'}->{'node'} or $element->{'extra'}->{'section'})) {
      my $command = $element->{'extra'}->{'element_command'};
      if ($command->{'cmdname'} eq 'node') {
        $name = Texinfo::Convert::Texinfo::convert({'contents' 
          => $command->{'extra'}->{'nodes_manuals'}->[0]->{'node_content'}});
      } else {
        $name = "\@$command->{'cmdname'}"
          .Texinfo::Convert::Texinfo::convert($command->{'args'}->[0]);
      }
    }
    chomp($name);
    my $count;
    my $element_content = $converter->convert_tree($element);
    if ($count_words) {
      my @res = split /\W+/, $element_content;
      $count = scalar(@res);
    } else {
      my @res = split /^/, $element_content;
      $count = scalar(@res);
    }
    push @name_counts_array, [$count, $name];
    if ($count > $max_count) {
      $max_count = $count;
    }
  }

  my @sorted_name_counts_array = sort {$a->[0] <=> $b->[0]} @name_counts_array;
  @sorted_name_counts_array = reverse(@sorted_name_counts_array);

  my $max_length = length($max_count);
  my $result = '';
  foreach my $sorted_count (@sorted_name_counts_array) {
    $result .=  sprintf("%${max_length}d  $sorted_count->[1]\n", $sorted_count->[0]);
  }
  return (\@sorted_name_counts_array, $result);
}

# XML related methods and variables that may be used in different
# XML Converters.
sub xml_protect_text($$)
{
  my $self = shift;
  my $text = shift;
  if (!defined($text)) {
    cluck;
  }
  $text =~ s/&/&amp;/g;
  $text =~ s/</&lt;/g;
  $text =~ s/>/&gt;/g;
  $text =~ s/\"/&quot;/g;
  return $text;
}

# 'today' is not set here.
our %default_xml_commands_formatting; 
$default_xml_commands_formatting{'normal'} = {
               'TeX'          => 'TeX',
               'LaTeX'          => 'LaTeX',
               'bullet'       => '&bull;',
               'copyright'    => '&copy;',
               'registeredsymbol'   => '&reg;',
               'dots'         => '&hellip;',
               'enddots'      => '...',
               'equiv'        => '&equiv;',
               # in general the following is not used since error
               # appears in 'translated_commands'
               'error'        => 'error--&gt;',
               'expansion'    => '&rarr;',
               'arrow'        => '&rarr;',
               'click'        => '&rarr;',
               'minus'        => '-',
               'point'        => '&lowast;',
               'print'        => '-|',
               'result'       => '&rArr;',
               'aa'           => '&aring;',
               'AA'           => '&Aring;',
               'ae'           => '&aelig;',
               'oe'           => '&oelig;', #pertusus: also &#156;. &oelig; not in html 3.2
               'AE'           => '&AElig;',
               'OE'           => '&OElig;', #pertusus: also &#140;. &OElig; not in html 3.2
               'o'            =>  '&oslash;',
               'O'            =>  '&Oslash;',
               'ss'           => '&szlig;',
               'DH'           => '&ETH;',
               'dh'           => '&eth;',
               'TH'           => '&THORN;',
               'th'           => '&thorn;',
               'l'            => '&#322;',
               'L'            => '&#321;',
               'exclamdown'   => '&iexcl;',
               'questiondown' => '&iquest;',
               'pounds'       => '&pound;',
               'ordf'         => '&ordf;',
               'ordm'         => '&ordm;',
               'comma'        => ',',
               'atchar'       => '@',
               'lbracechar'   => '{',
               'rbracechar'   => '}',
               'backslashchar' => '\\',
               'hashchar' => '#',
               'euro'         => '&euro;',
               'geq'          => '&ge;',
               'leq'          => '&le;',
               'tie'          => '&nbsp;',
               'textdegree'          => '&deg;',
               'quotedblleft'          => '&ldquo;',
               'quotedblright'          => '&rdquo;',
               'quoteleft'          => '&lsquo;',
               'quoteright'          => '&rsquo;',
               'quotedblbase'          => '&bdquo;',
               'quotesinglbase'          => '&sbquo;',
               'guillemetleft'          => '&laquo;',
               'guillemetright'          => '&raquo;',
               'guillemotleft'          => '&laquo;',
               'guillemotright'          => '&raquo;',
               'guilsinglleft'          => '&lsaquo;',
               'guilsinglright'          => '&rsaquo;',
};

foreach my $no_brace_command (keys(%Texinfo::Common::no_brace_commands)) {
  $default_xml_commands_formatting{'normal'}->{$no_brace_command}
    = $Texinfo::Common::no_brace_commands{$no_brace_command};
}

sub xml_comment($$)
{
  my $self = shift;
  my $text = shift;
  chomp $text;
  $text =~ s/--+/-/go;
  return '<!--' . $text . ' -->' . "\n";
}

our %xml_accent_entities = (
          '"',  'uml',
          '~',  'tilde',
          '^',  'circ',
          '`',  'grave',
          "'", 'acute',
          ",", 'cedil',
          'ringaccent', 'ring',
          'ogonek', 'ogon',
         );

my %xml_accent_text_with_entities = (
      'ringaccent' => 'aA',
      "'"          => 'aeiouyAEIOUY',
      ','          => 'cC',
      '^'          => 'aeiouAEIOU',
      '`'          => 'aeiouAEIOU',
      '~'          => 'nNaoAO',
      '"'          => 'aeiouyAEIOU',
# according to http://www2.lib.virginia.edu/small/vhp/download/ISO.txt
# however this doesn't seems to work in firefox
#      'ogonek'     => 'aeiuAEIU',
);


sub xml_accent($$$;$$)
{
  my $self = shift;
  my $text = shift;
  my $command = shift;
  my $in_upper_case = shift;
  my $use_numeric_entities = shift;
  my $accent = $command->{'cmdname'};
  
  if ($in_upper_case and $text =~ /^\w$/) {
    $text = uc ($text);
  }
 
  return "&${text}$xml_accent_entities{$accent};" 
    if (defined($xml_accent_entities{$accent}) 
        and defined($xml_accent_text_with_entities{$accent}) 
        and ($text =~ /^[$xml_accent_text_with_entities{$accent}]$/));
  if ($use_numeric_entities
      and exists($Texinfo::Convert::Unicode::unicode_accented_letters{$accent}) 
      and exists($Texinfo::Convert::Unicode::unicode_accented_letters{$accent}->{$text})) {
    return '&#' . 
      hex($Texinfo::Convert::Unicode::unicode_accented_letters{$accent}->{$text}). ';';
  }
  return $text . '&lt;' if ($accent eq 'v');
  # FIXME it is not possible to call xml_protect_text since what is in $text
  # may already be xml.  But this means that each time ascii_accent changes
  # it should be changed here too.
  return Texinfo::Convert::Text::ascii_accent($text, $command);
}

sub _xml_accent_numeric_entities($$;$)
{
  my $self = shift;
  my $text = shift;
  my $command = shift;
  my $in_upper_case = shift;
  return $self->xml_accent($text, $command, $in_upper_case, 1);
}

sub xml_accents($$;$)
{
  my $self = shift;
  my $accent = shift;
  my $in_upper_case = shift;

  my $format_accents;
  if ($self->get_conf('USE_NUMERIC_ENTITY')) {
    $format_accents = \&_xml_accent_numeric_entities;
  } else {
    $format_accents = \&xml_accent;
  }
  
  return $self->convert_accents($accent, $format_accents, $in_upper_case);
}

1;

__END__

=head1 NAME

Texinfo::Convert::Converter - Parent class for Texinfo tree converters

=head1 SYNOPSIS

  package Texinfo::Convert::MyConverter;

  use Texinfo::Convert::Converter;
  @ISA = qw(Texinfo::Convert::Converter);

  sub converter_defaults ($$) {
    return %myconverter_defaults;
  }
  sub converter_initialize($) {
    my $self = shift;
    $self->{'document_context'} = [{}];
  }
  sub converter_global_commands($) {
    return ('documentlanguage', documentencoding', 'paragraphindent');
  }

  sub convert($$) {
    ...
  }
  sub convert_tree($$) {
    ...
  }
  sub output($$) {
    ...
  }

  # end of Texinfo::Convert::MyConverter

  my $converter = Texinfo::Convert::MyConverter->converter(
                                               {'parser' => $parser});
  $converter->output($texinfo_tree);

=head1 DESCRIPTION

Texinfo::Convert::Converter is a super class that can be used to
simplify converters initialization.  The class also provide some 
useful methods.

In turn, the converter should define some methods.  Three are 
optional, C<converter_defaults>, C<converter_initialize> and 
C<converter_global_commands> and used for initialization, to 
give C<Texinfo::Convert::Converter> some informations.

The C<convert_tree> method is more or less mandatory and should 
convert portions of Texinfo tree.  The C<output> and C<convert> 
are not required, but customarily used by converters as entry 
points for conversion to a file with headers and so on, or 
conversion of a whole Texinfo tree.

Existing backends may be used as examples that implement those
methods.  C<Texinfo::Convert::Texinfo> together with 
C<Texinfo::Convert::PlainTexinfo>, as well as 
C<Texinfo::Convert::TextContent> are trivial examples.  
C<Texinfo::Convert::Text> is less trivial, although still simplistic, 
while C<Texinfo::Convert::DocBook> is a real converter
that is also not too complex.  

L<Texinfo::Common>, L<Texinfo::Convert::Unicode> 
and L<Texinfo::Report> document modules or additional function 
that may be useful for backends, while the parsed Texinfo tree is 
described in L<Texinfo::Parser>.


=head1 METHODS

=head2 Initialization

A module subclassing C<Texinfo::Convert::Converter> is created by calling
the C<converter> method that should be inherited from 
C<Texinfo::Convert::Converter>.

=over

=item $converter = MyConverter->converter($options)

The I<$options> hash reference holds options for the converter.  In
this option hash reference a L<parser object|Texinfo::Parser> 
may be associated with the I<parser> key.  The other options 
should be configuration options described in the Texinfo manual.
Those options, when appropriate, override the document content.

The C<converter> function returns a converter object (a blessed hash 
reference) after checking the options and performing some initializations,
especially when a parser is given among the options.  The converter is
also initialized as a L<Texinfo::Report>.

=back

To help with these initializations, the modules can define three methods:

=over

=item %defaults = $converter->converter_defaults($options)

The converter can provide a defaults hash for configuration options.
The I<$options> hash reference holds options for the converter.

=item @global_commands = $converter->converter_global_commands()

The list returned is the list of Texinfo global commands (like 
C<@paragraphindent>, C<@documentlanguage>...) that are relevant for the
converter.

=item converter_initialize

This method is called at the end of the converter initialization.

=back

=head2 Helper methods

C<Texinfo::Convert::Converter> provides methods
that may be useful for every converter:

=over

=item $converter->get_conf($option_string)

Returns the value of the Texinfo configuration option I<$option_string>.

=item $converter->set_conf($option_string, $value)

Set the Texinfo configuration option I<$option_string> to I<$value> if
not set as a converter option.

=item $converter->force_conf($option_string, $value)

Set the Texinfo configuration option I<$option_string> to I<$value>.
This should rarely be used, but the purpose of this method is to be able
to revert a configuration that is always wrong for a given output
format, like the splitting for example.

=item $result = $converter->convert_document_sections($root, $file_handler)

This method splits the I<$root> Texinfo tree at sections and 
calls C<convert_tree> on the elements.  If the optional I<$file_handler>
is given in argument, the result are output in I<$file_handler>, otherwise
the resulting string is returned.

=item $result = $converter->convert_accents($accent_command, \&format_accents, $in_upper_case)

I<$accent_command> is an accent command, which may have other accent
commands nested.  The function returns the accents formatted either
as encoded letters, or formatted using I<\&format_accents>.
If I<$in_upper_case> is set, the result should be uppercased.

=back

Other C<Texinfo::Convert::Converter> methods target conversion to XML:

=over

=item $protected_text = $converter->xml_protect_text($text)

Protect special XML characters (&, E<lt>, E<gt>, ") of I<$text>.

=item $comment = $converter->xml_comment($text)

Returns an XML comment for I<$text>.

=item $result = xml_accent($text, $accent_command, $in_upper_case, $use_numeric_entities)

I<$text> is the text appearing within an accent command.  I<$accent_command>
should be a Texinfo tree element corresponding to an accent command taking
an argument.  I<$in_upper_case> is optional, and, if set, the text is put
in upper case.  The function returns the accented letter as XML entity 
if possible.  I<$use_numeric_entities> is also optional, and, if set, and
there is no XML entity, the numerical entity corresponding to Unicode 
points is preferred to an ASCII transliteration.

=item $result = $converter->xml_accents($accent_command, $in_upper_case)

I<$accent_command> is an accent command, which may have other accent
commands nested.  If I<$in_upper_case> is set, the result should be 
upper cased.  The function returns the accents formatted as XML.

=back

Finally, there is:

=item $result = $converter->output_internal_links()

At this level, the method just returns undef.  It is used in the HTML
output, following the C<--internal-links> option of texi2any/makeinfo
specification.

=back

=head1 SEE ALSO

L<Texinfo::Common>, L<Texinfo::Convert::Unicode>, L<Texinfo::Report> 
and L<Texinfo::Parser>.  

=head1 AUTHOR

Patrice Dumas, E<lt>pertusus@free.frE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright 2011, 2012, 2013, 2014, 2015 Free Software Foundation, Inc.

This library is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at 
your option) any later version.

=cut