Viewing File: /usr/bin/txixml2texi

#! /usr/bin/perl
#
# texixml2texi -- convert Texinfo XML to Texinfo code
#
# Copyright 2012 Free Software Foundation, Inc.
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License,
# or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# 
# Original author: Patrice Dumas <pertusus@free.fr>

use strict;
use Getopt::Long qw(GetOptions);
# for dirname.
use File::Basename;
use File::Spec;

Getopt::Long::Configure("gnu_getopt");

BEGIN
{
  # emulate -w
  $^W = 1;
  my ($real_command_name, $command_directory, $command_suffix) 
     = fileparse($0, '.pl');

  my $datadir = '/usr/share';
  my $package = 'texinfo';
  my $updir = File::Spec->updir();

  my $texinfolibdir;
  my $lib_dir;

  # in-source run
  if (($command_suffix eq '.pl' and !(defined($ENV{'TEXINFO_DEV_SOURCE'})
       and $ENV{'TEXINFO_DEV_SOURCE'} eq 0)) or $ENV{'TEXINFO_DEV_SOURCE'}) {
    my $srcdir = defined $ENV{'srcdir'} ? $ENV{'srcdir'} : $command_directory;
    $texinfolibdir = File::Spec->catdir($srcdir, $updir, 'tp');
    $lib_dir = File::Spec->catdir($texinfolibdir, 'maintain');
    unshift @INC, $texinfolibdir;
  } elsif ($datadir ne '@' .'datadir@' and $package ne '@' . 'PACKAGE@'
           and $datadir ne '') {
    $texinfolibdir = File::Spec->catdir($datadir, $package);
    # try to make package relocatable, will only work if standard relative paths
    # are used
    if (! -f File::Spec->catfile($texinfolibdir, 'Texinfo', 'Parser.pm')
        and -f File::Spec->catfile($command_directory, $updir, 'share', 
                                   'texinfo', 'Texinfo', 'Parser.pm')) {
      $texinfolibdir = File::Spec->catdir($command_directory, $updir, 
                                          'share', 'texinfo');
    }
    $lib_dir = $texinfolibdir;
    unshift @INC, $texinfolibdir;
  }

  # '@USE_EXTERNAL_LIBINTL @ and similar are substituted in the
  # makefile using values from configure
  if (defined($texinfolibdir)) {
    if ('no' ne 'yes') {
      unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'libintl-perl', 'lib'));
    }
    if ('no' ne 'yes') {
      unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'Unicode-EastAsianWidth', 'lib'));
    }
    if ('yes' ne 'yes') {
      unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'Text-Unidecode', 'lib'));
    }
  }
}

use XML::LibXML::Reader;

# gather information on Texinfo XML elements
use Texinfo::Common;
use Texinfo::Convert::TexinfoXML;

my $debug = 0;
my $result_options = Getopt::Long::GetOptions (
 'debug|d' => \$debug,
);

sub command_with_braces($)
{
  my $command = shift;
  if ($command =~ /^[a-z]/i) {
    return "\@".$command.'{}';
  } else {
    return "\@".$command;
  }
}

my %ignored_elements = (
  'prepend' => 1,
  'formalarg' => 1,
  # not ignored everytime
  'indexterm' => 1,
);

my %elements_end_attributes = (
  'accent' => 1,
  'menunode' => 1,
  'menutitle' => 1,
);

my %element_at_commands;
my %entity_texts = (
  'textldquo' => '``',
  'textrdquo' => "''",
  'textmdash' => '---',
  'textndash' => '--',
  'textrsquo' => "'",
  'textlsquo' => '`',
  'formfeed' => "\f",
  # this is not used in pratice, as attrformfeed appears in an 
  # attribute and thus is already expanded to text.
  'attrformfeed' => "\f",
);

foreach my $command (keys(%Texinfo::Convert::TexinfoXML::commands_formatting)) {
  if (!ref($Texinfo::Convert::TexinfoXML::commands_formatting{$command})) {
    $entity_texts{$Texinfo::Convert::TexinfoXML::commands_formatting{$command}}
      = command_with_braces($command);
  } else {
    my $spec = $Texinfo::Convert::TexinfoXML::commands_formatting{$command};
    my $element = $spec->[0];
    if ($element eq 'spacecmd') {
      if ($spec->[1] eq 'type') {
        $element_at_commands{$element}->{"type"}->{$spec->[2]}
          = command_with_braces($command);
      } else {
        die "BUG, bad spacecmd specification";
      }
    } else {
      $element_at_commands{$element} = command_with_braces($command);
    }
  }
}

$element_at_commands{'accent'} = 0;

my %arg_elements;
foreach my $command (keys(%Texinfo::Convert::TexinfoXML::commands_args_elements)) {
  my $arg_index = 0;
  foreach my $element_argument (@{$Texinfo::Convert::TexinfoXML::commands_args_elements{$command}}) {
    $arg_elements{$element_argument} = [$arg_index, $command];
    $arg_index++;
  }
}

my %accent_type_command;
foreach my $accent_command (keys(%Texinfo::Convert::TexinfoXML::accent_types)) {
  $accent_type_command{$Texinfo::Convert::TexinfoXML::accent_types{$accent_command}} 
    = $accent_command;
}

my %eat_space_elements;
foreach my $element ('texinfo', 'filename') {
  $eat_space_elements{$element} = 1;
}

my $infile = shift @ARGV;

if (!defined($infile) or $infile !~ /\S/) {
  die "Missing file\n";
}

my $reader = XML::LibXML::Reader->new('location' => $infile,
                                       'expand_entities' => 0,
                                    )
       or die "cannot read $infile\n";

#(my $mydir = $0) =~ s,/[^/]*$,,;  # dir we are in
#my $txi_dtd_libdir = "$mydir";  # find tp relative to $0

sub skip_until_end($$)
{
  my $reader = shift;
  my $name = shift;
  while ($reader->read) {
    if ($reader->nodeType() eq XML_READER_TYPE_END_ELEMENT
        and $reader->name eq $name) {
      return;
    }
  }
}

my $eat_space = 0;
my @commands_with_args_stack;

while ($reader->read) {

  # ============================================================ begin debug
  if ($debug) {
    printf STDERR "(args: @commands_with_args_stack) (eat_space $eat_space) %d %d %s %d", ($reader->depth,
                           $reader->nodeType,
                           $reader->name,
                           $reader->isEmptyElement);
    my $value = '';
    if ($reader->hasValue()) {
      $value = $reader->value();
      $value =~ s/\n/\\n/g;
      print STDERR " |$value|";
    }
    if ($reader->nodeType() eq XML_READER_TYPE_ELEMENT
        and $reader->hasAttributes() 
        and defined($reader->getAttribute('spaces'))) {
      my $spaces = $reader->getAttribute('spaces');
      print STDERR " spaces:$spaces|";
    }
    print STDERR "\n";
  }
  # ============================================================ end debug

  if ($reader->nodeType() eq XML_READER_TYPE_SIGNIFICANT_WHITESPACE
      and $eat_space) {
    $eat_space = 0;
    next;
  } elsif ($reader->nodeType() eq XML_READER_TYPE_TEXT
      or $reader->nodeType() eq XML_READER_TYPE_WHITESPACE
      or $reader->nodeType() eq XML_READER_TYPE_SIGNIFICANT_WHITESPACE 
     ) {
    if ($reader->hasValue()) {
      print $reader->value();
    }
  }
  my $name = $reader->name;
  if ($reader->nodeType() eq XML_READER_TYPE_ELEMENT) {
    if (($name eq 'entry' or $name eq 'indexcommand')
        and $reader->hasAttributes()
        and defined($reader->getAttribute('command'))) {
      $name = $reader->getAttribute('command');
    } elsif ($name eq 'listitem') {
      $name = 'item';
    }
    if ($Texinfo::Convert::TexinfoXML::commands_args_elements{$name}) {
      push @commands_with_args_stack, 0;
    }
    if (exists $element_at_commands{$name}) {
      if ($name eq 'accent') {
        if ($reader->hasAttributes()) {
          if (defined($reader->getAttribute('type'))) {
            my $command = $accent_type_command{$reader->getAttribute('type')};
            print "\@$command"
              if (defined($command));
          }
          if (!defined($reader->getAttribute('spaces'))
              and !(defined($reader->getAttribute('bracketed'))
                    and $reader->getAttribute('bracketed') eq 'off')) {
            print '{';
          }
        } else {
          print '{';
        }
      } elsif (!ref($element_at_commands{$name})) {
        print $element_at_commands{$name};
      } else {
        my ($attribute) = keys(%{$element_at_commands{$name}});
        if ($reader->hasAttributes() 
            and defined($reader->getAttribute($attribute))) {
          print
            $element_at_commands{$name}->{$attribute}->{$reader->getAttribute($attribute)};
        }
      }
    } elsif (exists($Texinfo::Common::brace_commands{$name})) {
      print "\@${name}{";
      if ($name eq 'verb' and $reader->hasAttributes() 
          and defined($reader->getAttribute('delimiter'))) {
        print $reader->getAttribute('delimiter');
      }
    } elsif (exists($Texinfo::Common::block_commands{$name})) {
      print "\@$name";
      if ($name eq 'macro') {
        if ($reader->hasAttributes() and defined($reader->getAttribute('line'))) {
          print $reader->getAttribute('line');
        }
        print "\n";
      }
    } elsif (defined($Texinfo::Common::misc_commands{$name})) {
      if ($reader->hasAttributes()
          and defined($reader->getAttribute('originalcommand'))) {
        $name = $reader->getAttribute('originalcommand');
      }
      if ($name eq 'documentencoding' and $reader->hasAttributes() 
          and defined($reader->getAttribute('encoding'))) {
        my ($texinfo_encoding, $perl_encoding, $output_encoding)
         = Texinfo::Encoding::encoding_alias($reader->getAttribute('encoding'));

        if (defined($perl_encoding)) {
          if ($debug) {
            print STDERR "Using encoding $perl_encoding\n";
          }
          binmode(STDOUT, ":encoding($perl_encoding)");
        }
      }
      print "\@$name";
      if ($reader->hasAttributes() and defined($reader->getAttribute('line'))) {
        my $line = $reader->getAttribute('line');
        $line =~ s/\\\\/\x{1F}/g;
        $line =~ s/\\f/\f/g;
        $line =~ s/\x{1F}/\\/g;
        print $line;
      }
      if ($name eq 'set' or $name eq 'clickstyle') {
        skip_until_end($reader, $name);
      }
    } elsif ($arg_elements{$name}) {
      if ($reader->hasAttributes() 
          and defined($reader->getAttribute('automatic'))
          and $reader->getAttribute('automatic') eq 'on') {
        skip_until_end($reader, $name);
        next;
      }
      while ($arg_elements{$name}->[0] 
             and $commands_with_args_stack[-1] < $arg_elements{$name}->[0]) {
        $commands_with_args_stack[-1]++;
        print ',';
      }
    } elsif ($ignored_elements{$name}) {
      my $keep_indexterm = 0;
      if ($name eq 'indexterm') {
        my $node_path = $reader->nodePath();
        if ($node_path =~ m:([a-z]+)/indexterm$:) {
          my $parent = $1;
          if ($parent =~ /^[a-z]?[a-z]index$/ or $parent eq 'indexcommand') {
            $keep_indexterm = 1;
          }
        }
      }
      if (!$keep_indexterm) {
        skip_until_end($reader, $name);
        next;
      }
    } elsif ($name eq 'formattingcommand') {
      if ($reader->hasAttributes()
          and defined($reader->getAttribute('command'))) {
        print '@'.$reader->getAttribute('command');
      }
    # def* automatic 
    } elsif ($reader->hasAttributes() 
          and defined($reader->getAttribute('automatic'))
          and $reader->getAttribute('automatic') eq 'on') {
      skip_until_end($reader, $name);
      # eat the following space
      $reader->read();
    } elsif ($eat_space_elements{$name}) {
      $eat_space = 1;
    } else {
      print STDERR "UNKNOWN $name\n" if ($debug);
    }
    if ($reader->hasAttributes()) {
      if (defined($reader->getAttribute('bracketed'))
          and $reader->getAttribute('bracketed') eq 'on') {
        print '{';
      }
      if (defined($reader->getAttribute('spaces'))) {
        my $spaces = $reader->getAttribute('spaces');
        $spaces =~ s/\\n/\n/g;
        $spaces =~ s/\\f/\f/g;
        print $spaces;
      }
      if (defined($reader->getAttribute('leadingtext'))) {
        print $reader->getAttribute('leadingtext');
      }
    }
    if ($Texinfo::Common::item_line_commands{$name}
        and $reader->hasAttributes() 
        and defined($reader->getAttribute('commandarg'))) {
      print '@'.$reader->getAttribute('commandarg');
    }
  } elsif ($reader->nodeType() eq XML_READER_TYPE_END_ELEMENT) {
    if ($Texinfo::Convert::TexinfoXML::commands_args_elements{$name}) {
      pop @commands_with_args_stack;
    }
    if ($reader->hasAttributes()) {
      if (defined($reader->getAttribute('bracketed'))
          and $reader->getAttribute('bracketed') eq 'on') {
        print '}';
      }
    }
    if (exists ($Texinfo::Common::brace_commands{$name})) {
      if ($name eq 'verb' and $reader->hasAttributes() 
          and defined($reader->getAttribute('delimiter'))) {
        print $reader->getAttribute('delimiter');
      }
      print '}';
    } elsif (exists($Texinfo::Common::block_commands{$name})) {
      my $end_spaces;
      if ($reader->hasAttributes() 
          and defined($reader->getAttribute('endspaces'))) {
        $end_spaces = $reader->getAttribute('endspaces');
      }
      $end_spaces = ' ' if (!defined($end_spaces) or $end_spaces eq '');
      print "\@end".$end_spaces."$name";
    } elsif (defined($Texinfo::Common::misc_commands{$name})) {
      if ($Texinfo::Common::root_commands{$name} and $name ne 'node') {
        $eat_space = 1;
      }
    } elsif ($elements_end_attributes{$name}) {
      if ($name eq 'accent') {
        if ($reader->hasAttributes()) {
          if (!defined($reader->getAttribute('spaces'))
              and !(defined($reader->getAttribute('bracketed'))
                    and $reader->getAttribute('bracketed') eq 'off')) {
            print '}';
          }
        } else {
          print '}';
        }
      } elsif ($reader->hasAttributes() 
               and defined($reader->getAttribute('separator'))) {
        print $reader->getAttribute('separator');
      }
    } elsif ($eat_space_elements{$name}) {
      $eat_space = 1;
    } else {
      print STDERR "END UNKNOWN $name\n" if ($debug);
    }
    if ($reader->hasAttributes() 
        and defined($reader->getAttribute('trailingspaces'))) {
      my $trailingspaces = $reader->getAttribute('trailingspaces');
      $trailingspaces =~ s/\\f/\f/g;
      print $trailingspaces;
    }
  } elsif ($reader->nodeType() eq XML_READER_TYPE_ENTITY_REFERENCE) {
    if (defined($entity_texts{$name})) {
      print $entity_texts{$name};
    }
  } elsif ($reader->nodeType() eq XML_READER_TYPE_COMMENT) {
    my $comment;
    if ($reader->hasValue()) {
      $comment = $reader->value();
      $comment =~ s/^ (comment|c)//;
      my $command = $1;
      $comment =~ s/ $//;
      print "\@${command}$comment";
    }
  } elsif ($reader->nodeType() eq XML_READER_TYPE_DOCUMENT_TYPE) {
    $eat_space = 1;
  }
}

1;
Back to Directory File Manager