aboutsummaryrefslogtreecommitdiffstats
path: root/intltool-merge.in
diff options
context:
space:
mode:
authorAlessio Treglia <alessio@debian.org>2014-07-21 13:58:29 +0100
committerAlessio Treglia <alessio@debian.org>2014-07-21 13:58:29 +0100
commite25104d1480e03993980d944d21d788a95ecd09d (patch)
tree96b84cc4baac713ad7ab3e887395f9febb91a462 /intltool-merge.in
parent6ef14034edff72de856c29dd2e4f2b180444d890 (diff)
Imported Upstream version 0.18upstream/0.18
Diffstat (limited to 'intltool-merge.in')
-rw-r--r--intltool-merge.in705
1 files changed, 570 insertions, 135 deletions
diff --git a/intltool-merge.in b/intltool-merge.in
index 2a70fc9..d0535ab 100644
--- a/intltool-merge.in
+++ b/intltool-merge.in
@@ -1,4 +1,5 @@
#!@INTLTOOL_PERL@ -w
+# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*-
#
# The Intltool Message Merger
@@ -34,12 +35,21 @@
## Release information
my $PROGRAM = "intltool-merge";
my $PACKAGE = "intltool";
-my $VERSION = "0.26";
+my $VERSION = "0.35.0";
## Loaded modules
use strict;
use Getopt::Long;
use Text::Wrap;
+use File::Basename;
+
+my $must_end_tag = -1;
+my $last_depth = -1;
+my $translation_depth = -1;
+my @tag_stack = ();
+my @entered_tag = ();
+my @translation_strings = ();
+my $leading_space = "";
## Scalars used by the option stuff
my $HELP_ARG = 0;
@@ -53,6 +63,7 @@ my $RFC822DEB_STYLE_ARG = 0;
my $QUIET_ARG = 0;
my $PASS_THROUGH_ARG = 0;
my $UTF8_ARG = 0;
+my $MULTIPLE_OUTPUT = 0;
my $cache_file;
## Handle options
@@ -70,6 +81,7 @@ GetOptions
"rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
"pass-through|p" => \$PASS_THROUGH_ARG,
"utf8|u" => \$UTF8_ARG,
+ "multiple-output|m" => \$MULTIPLE_OUTPUT,
"cache|c=s" => \$cache_file
) or &error;
@@ -79,7 +91,8 @@ my $OUTFILE;
my %po_files_by_lang = ();
my %translations = ();
-my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
+my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "@INTLTOOL_ICONV@";
+my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null');
# Use this instead of \w for XML files to handle more possible characters.
my $w = "[-A-Za-z0-9._:]";
@@ -99,6 +112,7 @@ elsif ($HELP_ARG)
}
elsif ($BA_STYLE_ARG && @ARGV > 2)
{
+ &utf8_sanity_check;
&preparation;
&print_message;
&ba_merge_translations;
@@ -106,39 +120,41 @@ elsif ($BA_STYLE_ARG && @ARGV > 2)
}
elsif ($XML_STYLE_ARG && @ARGV > 2)
{
- &utf8_sanity_check;
+ &utf8_sanity_check;
&preparation;
&print_message;
- &xml_merge_translations;
+ &xml_merge_output;
&finalize;
}
elsif ($KEYS_STYLE_ARG && @ARGV > 2)
{
- &utf8_sanity_check;
- &preparation;
- &print_message;
- &keys_merge_translations;
+ &utf8_sanity_check;
+ &preparation;
+ &print_message;
+ &keys_merge_translations;
&finalize;
}
elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
{
- &preparation;
- &print_message;
- &desktop_merge_translations;
+ &utf8_sanity_check;
+ &preparation;
+ &print_message;
+ &desktop_merge_translations;
&finalize;
}
elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
{
- &preparation;
- &print_message;
- &schemas_merge_translations;
+ &utf8_sanity_check;
+ &preparation;
+ &print_message;
+ &schemas_merge_translations;
&finalize;
}
elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
{
- &preparation;
- &print_message;
- &rfc822deb_merge_translations;
+ &preparation;
+ &print_message;
+ &rfc822deb_merge_translations;
&finalize;
}
else
@@ -151,35 +167,49 @@ exit;
## Sub for printing release information
sub print_version
{
- print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
- print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
- print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
- print "Copyright (C) 2000-2001 Eazel, Inc.\n";
- print "This is free software; see the source for copying conditions. There is NO\n";
- print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
+ print <<_EOF_;
+${PROGRAM} (${PACKAGE}) ${VERSION}
+Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
+
+Copyright (C) 2000-2003 Free Software Foundation, Inc.
+Copyright (C) 2000-2001 Eazel, Inc.
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+_EOF_
exit;
}
## Sub for printing usage information
sub print_help
{
- print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
- print "Generates an output file that includes translated versions of some attributes,\n";
- print "from an untranslated source and a po directory that includes translations.\n\n";
- print " -b, --ba-style includes translations in the bonobo-activation style\n";
- print " -d, --desktop-style includes translations in the desktop style\n";
- print " -k, --keys-style includes translations in the keys style\n";
- print " -s, --schemas-style includes translations in the schemas style\n";
- print " -r, --rfc822deb-style includes translations in the RFC822 style\n";
- print " -x, --xml-style includes translations in the standard xml style\n";
- print " -u, --utf8 convert all strings to UTF-8 before merging\n";
- print " -p, --pass-through use strings as found in .po files, without\n";
- print " conversion (STRONGLY unrecommended with -x)\n";
- print " -q, --quiet suppress most messages\n";
- print " --help display this help and exit\n";
- print " --version output version information and exit\n";
- print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
- print "<xml-i18n-tools-list\@gnome.org>.\n";
+ print <<_EOF_;
+Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
+Generates an output file that includes some localized attributes from an
+untranslated source file.
+
+Mandatory options: (exactly one must be specified)
+ -b, --ba-style includes translations in the bonobo-activation style
+ -d, --desktop-style includes translations in the desktop style
+ -k, --keys-style includes translations in the keys style
+ -s, --schemas-style includes translations in the schemas style
+ -r, --rfc822deb-style includes translations in the RFC822 style
+ -x, --xml-style includes translations in the standard xml style
+
+Other options:
+ -u, --utf8 convert all strings to UTF-8 before merging
+ (default for everything except RFC822 style)
+ -p, --pass-through deprecated, does nothing and issues a warning
+ -m, --multiple-output output one localized file per locale, instead of
+ a single file containing all localized elements
+ -c, --cache=FILE specify cache file name
+ (usually \$top_builddir/po/.intltool-merge-cache)
+ -q, --quiet suppress most messages
+ --help display this help and exit
+ --version output version information and exit
+
+Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
+or send email to <xml-i18n-tools\@gnome.org>.
+_EOF_
exit;
}
@@ -227,7 +257,7 @@ sub gather_po_files
sub get_local_charset
{
my ($encoding) = @_;
- my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias";
+ my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "@INTLTOOL_LIBDIR@/charset.alias";
# seek character encoding aliases in charset.alias (glib)
@@ -270,7 +300,7 @@ sub get_po_encoding
$encoding = "ISO-8859-1";
}
- system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
+ system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull");
if ($?) {
$encoding = get_local_charset($encoding);
}
@@ -280,13 +310,8 @@ sub get_po_encoding
sub utf8_sanity_check
{
- if (!$UTF8_ARG)
- {
- if (!$PASS_THROUGH_ARG)
- {
- $PASS_THROUGH_ARG="1";
- }
- }
+ print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
+ $UTF8_ARG = 1;
}
sub get_translation_database
@@ -370,7 +395,7 @@ sub create_translation_database
}
else
{
- print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
+ print "NOTICE: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
}
@@ -434,9 +459,17 @@ sub unescape_one_sequence
return "\\" if $sequence eq "\\\\";
return "\"" if $sequence eq "\\\"";
return "\n" if $sequence eq "\\n";
+ return "\r" if $sequence eq "\\r";
+ return "\t" if $sequence eq "\\t";
+ return "\b" if $sequence eq "\\b";
+ return "\f" if $sequence eq "\\f";
+ return "\a" if $sequence eq "\\a";
+ return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
- # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
- # \xXX (hex) and has a comment saying they want to handle \u and \U.
+ return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
+ return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
+
+ # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
return $sequence;
}
@@ -445,7 +478,7 @@ sub unescape_po_string
{
my ($string) = @_;
- $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
+ $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
return $string;
}
@@ -466,8 +499,7 @@ sub entity_decode
# entity_encode: (string)
#
-# Encode the given string to XML format (encode '<' etc). It also
-# encodes high bit if not in UTF-8 mode.
+# Encode the given string to XML format (encode '<' etc).
sub entity_encode
{
@@ -475,15 +507,8 @@ sub entity_encode
my @list_of_chars = unpack ('C*', $pre_encoded);
- if ($PASS_THROUGH_ARG)
- {
- return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
- }
- else
- {
- # with UTF-8 we only encode minimalistic
- return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
- }
+ # with UTF-8 we only encode minimalistic
+ return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
}
sub entity_encode_int_minimalist
@@ -495,19 +520,6 @@ sub entity_encode_int_minimalist
return chr $_;
}
-sub entity_encode_int_even_high_bit
-{
- if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60)
- {
- # the ($_ > 127) should probably be removed
- return "&#" . $_ . ";";
- }
- else
- {
- return chr $_;
- }
-}
-
sub entity_encoded_translation
{
my ($lang, $string) = @_;
@@ -531,6 +543,8 @@ sub ba_merge_translations
}
open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
+ # Binmode so that selftest works ok if using a native Win32 Perl...
+ binmode (OUTPUT) if $^O eq 'MSWin32';
while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
{
@@ -571,59 +585,453 @@ sub ba_merge_translations
## XML (non-bonobo-activation) merge code
-sub xml_merge_translations
+
+# Process tag attributes
+# Only parameter is a HASH containing attributes -> values mapping
+sub getAttributeString
{
- my $source;
+ my $sub = shift;
+ my $do_translate = shift || 0;
+ my $language = shift || "";
+ my $result = "";
+ my $translate = shift;
+ foreach my $e (reverse(sort(keys %{ $sub }))) {
+ my $key = $e;
+ my $string = $sub->{$e};
+ my $quote = '"';
+
+ $string =~ s/^[\s]+//;
+ $string =~ s/[\s]+$//;
+
+ if ($string =~ /^'.*'$/)
+ {
+ $quote = "'";
+ }
+ $string =~ s/^['"]//g;
+ $string =~ s/['"]$//g;
+
+ if ($do_translate && $key =~ /^_/) {
+ $key =~ s|^_||g;
+ if ($language) {
+ # Handle translation
+ my $decode_string = entity_decode($string);
+ my $translation = $translations{$language, $decode_string};
+ if ($translation) {
+ $translation = entity_encode($translation);
+ $string = $translation;
+ }
+ $$translate = 2;
+ } else {
+ $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate
+ }
+ }
+
+ $result .= " $key=$quote$string$quote";
+ }
+ return $result;
+}
- {
- local $/; # slurp mode
- open INPUT, "<$FILE" or die "can't open $FILE: $!";
- $source = <INPUT>;
- close INPUT;
+# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
+sub getXMLstring
+{
+ my $ref = shift;
+ my $spacepreserve = shift || 0;
+ my @list = @{ $ref };
+ my $result = "";
+
+ my $count = scalar(@list);
+ my $attrs = $list[0];
+ my $index = 1;
+
+ $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
+ $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
+
+ while ($index < $count) {
+ my $type = $list[$index];
+ my $content = $list[$index+1];
+ if (! $type ) {
+ # We've got CDATA
+ if ($content) {
+ # lets strip the whitespace here, and *ONLY* here
+ $content =~ s/\s+/ /gs if (!$spacepreserve);
+ $result .= $content;
+ }
+ } elsif ( "$type" ne "1" ) {
+ # We've got another element
+ $result .= "<$type";
+ $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
+ if ($content) {
+ my $subresult = getXMLstring($content, $spacepreserve);
+ if ($subresult) {
+ $result .= ">".$subresult . "</$type>";
+ } else {
+ $result .= "/>";
+ }
+ } else {
+ $result .= "/>";
+ }
+ }
+ $index += 2;
}
+ return $result;
+}
- open OUTPUT, ">$OUTFILE" or die;
+# Translate list of nodes if necessary
+sub translate_subnodes
+{
+ my $fh = shift;
+ my $content = shift;
+ my $language = shift || "";
+ my $singlelang = shift || 0;
+ my $spacepreserve = shift || 0;
+
+ my @nodes = @{ $content };
+
+ my $count = scalar(@nodes);
+ my $index = 0;
+ while ($index < $count) {
+ my $type = $nodes[$index];
+ my $rest = $nodes[$index+1];
+ if ($singlelang) {
+ my $oldMO = $MULTIPLE_OUTPUT;
+ $MULTIPLE_OUTPUT = 1;
+ traverse($fh, $type, $rest, $language, $spacepreserve);
+ $MULTIPLE_OUTPUT = $oldMO;
+ } else {
+ traverse($fh, $type, $rest, $language, $spacepreserve);
+ }
+ $index += 2;
+ }
+}
- # FIXME: support attribute translations
+sub isWellFormedXmlFragment
+{
+ my $ret = eval 'require XML::Parser';
+ if(!$ret) {
+ die "You must have XML::Parser installed to run $0\n\n";
+ }
- # Empty nodes never need translation, so unmark all of them.
- # For example, <_foo/> is just replaced by <foo/>.
- $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
+ my $fragment = shift;
+ return 0 if (!$fragment);
+
+ $fragment = "<root>$fragment</root>";
+ my $xp = new XML::Parser(Style => 'Tree');
+ my $tree = 0;
+ eval { $tree = $xp->parse($fragment); };
+ return $tree;
+}
+
+sub traverse
+{
+ my $fh = shift;
+ my $nodename = shift;
+ my $content = shift;
+ my $language = shift || "";
+ my $spacepreserve = shift || 0;
+
+ if (!$nodename) {
+ if ($content =~ /^[\s]*$/) {
+ $leading_space .= $content;
+ }
+ print $fh $content;
+ } else {
+ # element
+ my @all = @{ $content };
+ my $attrs = shift @all;
+ my $translate = 0;
+ my $outattr = getAttributeString($attrs, 1, $language, \$translate);
+
+ if ($nodename =~ /^_/) {
+ $translate = 1;
+ $nodename =~ s/^_//;
+ }
+ my $lookup = '';
- # Support for <_foo>blah</_foo> style translations.
- while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s)
+ $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
+ $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
+
+ print $fh "<$nodename", $outattr;
+ if ($translate) {
+ $lookup = getXMLstring($content, $spacepreserve);
+ if (!$spacepreserve) {
+ $lookup =~ s/^\s+//s;
+ $lookup =~ s/\s+$//s;
+ }
+
+ if ($lookup || $translate == 2) {
+ my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup});
+ if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
+ $translation = $lookup if (!$translation);
+ print $fh " xml:lang=\"", $language, "\"" if $language;
+ print $fh ">";
+ if ($translate == 2) {
+ translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
+ } else {
+ print $fh $translation;
+ }
+ print $fh "</$nodename>";
+
+ return; # this means there will be no same translation with xml:lang="$language"...
+ # if we want them both, just remove this "return"
+ } else {
+ print $fh ">";
+ if ($translate == 2) {
+ translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
+ } else {
+ print $fh $lookup;
+ }
+ print $fh "</$nodename>";
+ }
+ } else {
+ print $fh "/>";
+ }
+
+ for my $lang (sort keys %po_files_by_lang) {
+ if ($MULTIPLE_OUTPUT && $lang ne "$language") {
+ next;
+ }
+ if ($lang) {
+ # Handle translation
+ #
+ my $translate = 0;
+ my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
+ my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup});
+ if ($translate && !$translation) {
+ $translation = $lookup;
+ }
+
+ if ($translation || $translate) {
+ print $fh "\n";
+ $leading_space =~ s/.*\n//g;
+ print $fh $leading_space;
+ print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
+ if ($translate == 2) {
+ translate_subnodes($fh, \@all, $lang, 1, $spacepreserve);
+ } else {
+ print $fh $translation;
+ }
+ print $fh "</$nodename>";
+ }
+ }
+ }
+
+ } else {
+ my $count = scalar(@all);
+ if ($count > 0) {
+ print $fh ">";
+ my $index = 0;
+ while ($index < $count) {
+ my $type = $all[$index];
+ my $rest = $all[$index+1];
+ traverse($fh, $type, $rest, $language, $spacepreserve);
+ $index += 2;
+ }
+ print $fh "</$nodename>";
+ } else {
+ print $fh "/>";
+ }
+ }
+ }
+}
+
+sub intltool_tree_comment
+{
+ my $expat = shift;
+ my $data = shift;
+ my $clist = $expat->{Curlist};
+ my $pos = $#$clist;
+
+ push @$clist, 1 => $data;
+}
+
+sub intltool_tree_cdatastart
+{
+ my $expat = shift;
+ my $clist = $expat->{Curlist};
+ my $pos = $#$clist;
+
+ push @$clist, 0 => $expat->original_string();
+}
+
+sub intltool_tree_cdataend
+{
+ my $expat = shift;
+ my $clist = $expat->{Curlist};
+ my $pos = $#$clist;
+
+ $clist->[$pos] .= $expat->original_string();
+}
+
+sub intltool_tree_char
+{
+ my $expat = shift;
+ my $text = shift;
+ my $clist = $expat->{Curlist};
+ my $pos = $#$clist;
+
+ # Use original_string so that we retain escaped entities
+ # in CDATA sections.
+ #
+ if ($pos > 0 and $clist->[$pos - 1] eq '0') {
+ $clist->[$pos] .= $expat->original_string();
+ } else {
+ push @$clist, 0 => $expat->original_string();
+ }
+}
+
+sub intltool_tree_start
+{
+ my $expat = shift;
+ my $tag = shift;
+ my @origlist = ();
+
+ # Use original_string so that we retain escaped entities
+ # in attribute values. We must convert the string to an
+ # @origlist array to conform to the structure of the Tree
+ # Style.
+ #
+ my @original_array = split /\x/, $expat->original_string();
+ my $source = $expat->original_string();
+
+ # Remove leading tag.
+ #
+ $source =~ s|^\s*<\s*(\S+)||s;
+
+ # Grab attribute key/value pairs and push onto @origlist array.
+ #
+ while ($source)
{
- print OUTPUT $1;
+ if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
+ {
+ $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
+ push @origlist, $1;
+ push @origlist, '"' . $2 . '"';
+ }
+ elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
+ {
+ $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
+ push @origlist, $1;
+ push @origlist, "'" . $2 . "'";
+ }
+ else
+ {
+ last;
+ }
+ }
- my $spaces = $2;
- my $tag = $3;
- my $string = $4;
+ my $ol = [ { @origlist } ];
- print OUTPUT "$spaces<$tag>$string</$tag>\n";
+ push @{ $expat->{Lists} }, $expat->{Curlist};
+ push @{ $expat->{Curlist} }, $tag => $ol;
+ $expat->{Curlist} = $ol;
+}
- $string =~ s/\s+/ /g;
- $string =~ s/^ //;
- $string =~ s/ $//;
- $string = entity_decode($string);
+sub readXml
+{
+ my $filename = shift || return;
+ if(!-f $filename) {
+ die "ERROR Cannot find filename: $filename\n";
+ }
- for my $lang (sort keys %po_files_by_lang)
- {
- my $translation = $translations{$lang, $string};
- next if !$translation;
- $translation = entity_encode($translation);
- print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
- }
+ my $ret = eval 'require XML::Parser';
+ if(!$ret) {
+ die "You must have XML::Parser installed to run $0\n\n";
+ }
+ my $xp = new XML::Parser(Style => 'Tree');
+ $xp->setHandlers(Char => \&intltool_tree_char);
+ $xp->setHandlers(Start => \&intltool_tree_start);
+ $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
+ $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
+ my $tree = $xp->parsefile($filename);
+
+# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
+# would be:
+# [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{},
+# 0, "Howdy", ref, [{}]], 0, "do" ] ]
+
+ return $tree;
+}
+
+sub print_header
+{
+ my $infile = shift;
+ my $fh = shift;
+ my $source;
+
+ if(!-f $infile) {
+ die "ERROR Cannot find filename: $infile\n";
}
- print OUTPUT $source;
+ print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
+ {
+ local $/;
+ open DOCINPUT, "<${FILE}" or die;
+ $source = <DOCINPUT>;
+ close DOCINPUT;
+ }
+ if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
+ {
+ print $fh "$1\n";
+ }
+ elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
+ {
+ print $fh "$1\n";
+ }
+}
+sub parseTree
+{
+ my $fh = shift;
+ my $ref = shift;
+ my $language = shift || "";
+
+ my $name = shift @{ $ref };
+ my $cont = shift @{ $ref };
+
+ while (!$name || "$name" eq "1") {
+ $name = shift @{ $ref };
+ $cont = shift @{ $ref };
+ }
+
+ my $spacepreserve = 0;
+ my $attrs = @{$cont}[0];
+ $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
+
+ traverse($fh, $name, $cont, $language, $spacepreserve);
+}
+
+sub xml_merge_output
+{
+ my $source;
+
+ if ($MULTIPLE_OUTPUT) {
+ for my $lang (sort keys %po_files_by_lang) {
+ if ( ! -e $lang ) {
+ mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
+ }
+ open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
+ binmode (OUTPUT) if $^O eq 'MSWin32';
+ my $tree = readXml($FILE);
+ print_header($FILE, \*OUTPUT);
+ parseTree(\*OUTPUT, $tree, $lang);
+ close OUTPUT;
+ print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
+ }
+ }
+ open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
+ binmode (OUTPUT) if $^O eq 'MSWin32';
+ my $tree = readXml($FILE);
+ print_header($FILE, \*OUTPUT);
+ parseTree(\*OUTPUT, $tree);
close OUTPUT;
+ print "CREATED $OUTFILE\n" unless $QUIET_ARG;
}
sub keys_merge_translations
{
open INPUT, "<${FILE}" or die;
open OUTPUT, ">${OUTFILE}" or die;
+ binmode (OUTPUT) if $^O eq 'MSWin32';
while (<INPUT>)
{
@@ -659,6 +1067,7 @@ sub desktop_merge_translations
{
open INPUT, "<${FILE}" or die;
open OUTPUT, ">${OUTFILE}" or die;
+ binmode (OUTPUT) if $^O eq 'MSWin32';
while (<INPUT>)
{
@@ -702,6 +1111,7 @@ sub schemas_merge_translations
}
open OUTPUT, ">$OUTFILE" or die;
+ binmode (OUTPUT) if $^O eq 'MSWin32';
# FIXME: support attribute translations
@@ -712,9 +1122,9 @@ sub schemas_merge_translations
while ($source =~ s/
(.*?)
(\s+)(<locale\ name="C">(\s*)
- (<default>\s*(.*?)\s*<\/default>)?(\s*)
- (<short>\s*(.*?)\s*<\/short>)?(\s*)
- (<long>\s*(.*?)\s*<\/long>)?(\s*)
+ (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
+ (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
+ (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
<\/locale>)
//sx)
{
@@ -730,8 +1140,6 @@ sub schemas_merge_translations
my $short_string = $9 ? $9 : '';
my $long_string = $12 ? $12 : '';
- $c_default_block =~ s/default>\[.*?\]/default>/s;
-
print OUTPUT "$locale_start_spaces$c_default_block";
$default_string =~ s/\s+/ /g;
@@ -787,9 +1195,15 @@ sub schemas_merge_translations
sub rfc822deb_merge_translations
{
+ my %encodings = ();
+ for my $lang (keys %po_files_by_lang) {
+ $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
+ }
+
my $source;
$Text::Wrap::huge = 'overflow';
+ $Text::Wrap::break = qr/\n|\s(?=\S)/;
{
local $/; # slurp mode
@@ -799,23 +1213,27 @@ sub rfc822deb_merge_translations
}
open OUTPUT, ">${OUTFILE}" or die;
+ binmode (OUTPUT) if $^O eq 'MSWin32';
- while ($source =~ /(^|\n+)(_)?([^:_\n]+)(:\s*)(.*?)(?=\n[\S\n]|$)/sg)
+ while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
{
my $sep = $1;
my $non_translated_line = $3.$4;
my $string = $5;
- my $is_translatable = defined($2);
+ my $underscore = length($2);
+ next if $underscore eq 0 && $non_translated_line =~ /^#/;
# Remove [] dummy strings
- $string =~ s/\[\s[^\[\]]*\]$//;
- $non_translated_line .= $string;
+ my $stripped = $string;
+ $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
+ $stripped =~ s/\[\s[^\[\]]*\]$//;
+ $non_translated_line .= $stripped;
- print OUTPUT $sep.$non_translated_line;
+ print OUTPUT $sep.$non_translated_line;
- if ($is_translatable)
- {
- my @str_list = rfc822deb_split($string);
-
+ if ($underscore)
+ {
+ my @str_list = rfc822deb_split($underscore, $string);
+
for my $lang (sort keys %po_files_by_lang)
{
my $is_translated = 1;
@@ -838,15 +1256,29 @@ sub rfc822deb_merge_translations
if ($first)
{
- $str_translated .=
- Text::Tabs::expand($translation) .
- "\n";
+ if ($underscore eq 2)
+ {
+ $str_translated .= $translation;
+ }
+ else
+ {
+ $str_translated .=
+ Text::Tabs::expand($translation) .
+ "\n";
+ }
}
else
{
- $str_translated .= Text::Tabs::expand(
- Text::Wrap::wrap(' ', ' ', $translation)) .
- "\n .\n";
+ if ($underscore eq 2)
+ {
+ $str_translated .= ', ' . $translation;
+ }
+ else
+ {
+ $str_translated .= Text::Tabs::expand(
+ Text::Wrap::wrap(' ', ' ', $translation)) .
+ "\n .\n";
+ }
}
$first = 0;
@@ -859,10 +1291,10 @@ sub rfc822deb_merge_translations
$str_translated =~ s/\s+$//;
$_ = $non_translated_line;
- s/^(\w+):\s*.*/$sep${1}-$lang: $str_translated/s;
+ s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
print OUTPUT;
}
- }
+ }
}
print OUTPUT "\n";
@@ -879,8 +1311,12 @@ sub rfc822deb_split
# and paragraphs are separated by a single dot on a line
# This routine returns an array of all paragraphs, and reformat
# them.
+ # When first argument is 2, the string is a comma separated list of
+ # values.
+ my $type = shift;
my $text = shift;
- $text =~ s/^ //mg;
+ $text =~ s/^[ \t]//mg;
+ return (split(/, */, $text, 0)) if $type ne 1;
return ($text) if $text !~ /\n/;
$text =~ s/([^\n]*)\n//;
@@ -890,9 +1326,7 @@ sub rfc822deb_split
for my $line (split (/\n/, $text))
{
chomp $line;
- $line =~ /\s+$/;
-
- if ($line =~ /^\.$/)
+ if ($line =~ /^\.\s*$/)
{
# New paragraph
$str =~ s/\s*$//;
@@ -903,12 +1337,13 @@ sub rfc822deb_split
{
# Line which must not be reformatted
$str .= "\n" if length ($str) && $str !~ /\n$/;
+ $line =~ s/\s+$//;
$str .= $line."\n";
}
else
{
# Continuation line, remove newline
- $str .= " " if length ($str) && $str !~ /[\n ]$/;
+ $str .= " " if length ($str) && $str !~ /\n$/;
$str .= $line;
}
}

Privacy Policy