diff --git a/configure.php b/configure.php
index b8851a1280..5aafc87550 100755
--- a/configure.php
+++ b/configure.php
@@ -551,10 +551,11 @@ function git_status()
echo "\n" , trim( $output ) . "\n\n";
}
-// DTD layer before first XML loading
+// DTD entity layer before first XML loading
dtd_conf_entities();
dtd_file_entities();
+dtd_text_entities();
function dtd_conf_entities()
{
@@ -564,22 +565,16 @@ function dtd_conf_entities()
$conf[] = "";
- if ( $lang == 'en' )
- {
- realpain( __DIR__ . "/temp/empty" , touch: true );
- $trans1 = realpain( __DIR__ . "/temp/empty" );
- $trans2 = realpain( __DIR__ . "/temp/empty" );
- $trans3 = realpain( __DIR__ . "/temp/empty" );
- }
- else
+ if ( $lang != 'en' )
{
$trans1 = realpain( __DIR__ . "/../$lang/language-defs.ent" );
$trans2 = realpain( __DIR__ . "/../$lang/language-snippets.ent" );
$trans3 = realpain( __DIR__ . "/../$lang/extensions.ent" );
+
+ $conf[] = "";
+ $conf[] = "";
+ $conf[] = "";
}
- $conf[] = "";
- $conf[] = "";
- $conf[] = "";
if ( $ac['CHMENABLED'] == 'yes' )
{
@@ -589,7 +584,7 @@ function dtd_conf_entities()
else
$conf[] = "";
- file_put_contents( __DIR__ . "/temp/manual.conf" , implode( "\n" , $conf ) );
+ file_put_contents( __DIR__ . "/temp/manual.inc" , implode( "\n" , $conf ) );
}
function dtd_file_entities()
@@ -620,6 +615,30 @@ function dtd_file_entities()
}
}
+function dtd_text_entities()
+{
+ global $ac;
+ $php = $ac['PHP'];
+ $lang = $ac["LANG"];
+
+ $parts = [ $php
+ , __DIR__ . "/scripts/text-entities.php"
+ , "en" ];
+ if ( $lang != "en" )
+ $parts[] = $lang;
+
+ foreach ( $parts as & $part )
+ $part = escapeshellarg( $part );
+ $cmd = implode( ' ' , $parts );
+ $ret = 0;
+ passthru( $cmd , $ret );
+
+ if ( $ret != 0 )
+ {
+ echo "doc-base/scripts/entities.php FAILED.\n";
+ exit( 1 );
+ }
+}
checking("for if we should generate a simplified file");
if ($ac["GENERATE"] != "no") {
diff --git a/entities/global.ent-dist b/entities/global.ent-dist
deleted file mode 100644
index a453871ca3..0000000000
--- a/entities/global.ent-dist
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-
-
-
-
-
diff --git a/entities/manual.ent-dist b/entities/manual.ent-dist
deleted file mode 100644
index d93f720ded..0000000000
--- a/entities/manual.ent-dist
+++ /dev/null
@@ -1,14 +0,0 @@
-
-
-
-
-
-
-
-
diff --git a/entities/normal.ent-dist b/entities/normal.ent-dist
new file mode 100644
index 0000000000..ca5b02df42
--- /dev/null
+++ b/entities/normal.ent-dist
@@ -0,0 +1,31 @@
+
+
+
+
+
+
+
+
diff --git a/entities/remove.ent-dist b/entities/remove.ent-dist
index 18ae9e6288..01c45b673e 100644
--- a/entities/remove.ent-dist
+++ b/entities/remove.ent-dist
@@ -1,20 +1,36 @@
-
+
+
-
-
+
+
+
diff --git a/entities/unique.ent-dist b/entities/unique.ent-dist
new file mode 100644
index 0000000000..145e36a724
--- /dev/null
+++ b/entities/unique.ent-dist
@@ -0,0 +1,37 @@
+
+
+
+
+
+
+
+
+
diff --git a/manual.xml b/manual.xml
index a5899fcd26..2c85e402bb 100644
--- a/manual.xml
+++ b/manual.xml
@@ -2,7 +2,7 @@
-
+
%configure;
diff --git a/scripts/entities.php b/scripts/entities.php
deleted file mode 100644
index 2e927c8cd7..0000000000
--- a/scripts/entities.php
+++ /dev/null
@@ -1,412 +0,0 @@
- |
-+----------------------------------------------------------------------+
-| Description: Collect individual entities into an .entities.ent file. |
-+----------------------------------------------------------------------+
-
-# Mental model, or things that I would liked to know 20 years prior
-
-DTD Entity processing has more in common with DOMDocumentFragment than
-DOMElement. In other words, simple text and multi rooted XML files
-are valid contents, whereas they are not valid XML documents.
-
-Also, namespaces do not automatically "cross" between a parent
-document and their entities, even if they are included in the same
-file, as local textual entities. s are, for all intended
-purposes, separated documents, with separated namespaces and have
-*expected* different default namespaces.
-
-So each one of, possibly multiple, "root" XML elements inside an
-fragment need to be annotated with default namespace, even if the
-"root" element occurs surrounded by text. For example:
-
-- "texttext", need one namespace, or it is invalid, and;
-- "", need TWO namespaces, or it is also invalid.
-
-# Output
-
-This script collects grouped and individual XML Entity files
-(detailed below), at some expected relative paths, and generates an
-doc-base/temp/entities.ent file with their respective DTD Entities.
-
-The output file has no duplications, so collection order is important
-to keep the necessary operational semantics. Here, latter loaded entities
-takes priority (overrides) an previous defined one. Note that this is the
-reverse of DTD convention, where duplicated entity names are
-ignored. The priority order used here is important to allow detecting
-cases where global entities are being overwritten, or if expected
-translatable entities are missing translations.
-
-# Individual XML Entities, or `.xml` files at `entities/`
-
-As explained above, the individual entity contents are not really
-valid XML *documents*, they are only at most valid XML *fragments*.
-More technically, these XML files are really well-balanced texts, per
-https://www.w3.org/TR/xml-fragment/#defn-well-balanced .
-
-Yet, individual entities are stored in entities/ as .xml files, for
-two reasons: first, text editors in general can highlights XML syntax in
-well-balanced texts; and second, this allows normal revision tracking
-per file, without requiring weird changes on `revcheck.php`. Note that
-is *invalid* to place XML declaration in these fragment files, at least
-in files that are invalid XML documents (on multi-node rooted ones).
-
-# Grouped entities files, file tracked
-
-For very small textual entities, down to simple text words or single
-tag elements that may never change, individual entity tracking is
-an overkill. This script also loads grouped XML Entities files, at
-some expected locations, with specific semantics.
-
-These grouped files are really normal XML files, correctly annotated
-with XML namespaces used on manuals, so any individual exported entity
-has correct and clean XML namespace annotations. These grouped entity
-files are tracked normally by revcheck, but are not directly included
-in manual.xml.in, as they only participate in general entity loading,
-described above.
-
-- global.ent - expected unreplaced
-- manual.ent - expected replaced (translated)
-- remove.ent - expected unused
-- lang/entities/* - expected replaced (translated)
-
-*/
-
-const PARTIAL_IMPL = true; // For while XML Entities are not fully implanted in all languages
-
-ini_set( 'display_errors' , 1 );
-ini_set( 'display_startup_errors' , 1 );
-error_reporting( E_ALL );
-
-if ( count( $argv ) < 2 || in_array( '--help' , $argv ) || in_array( '-h' , $argv ) )
-{
- fwrite( STDERR , "\nUsage: {$argv[0]} [--debug] langCode [langCode]\n\n" );
- return;
-}
-
-$filename = Entities::rotateOutputFile(); // idempotent
-
-$langs = [];
-$normal = true;
-$debug = false;
-
-for( $idx = 1 ; $idx < count( $argv ) ; $idx++ )
- if ( $argv[$idx] == "--debug" )
- $normal = false;
- else
- $langs[] = $argv[$idx];
-$debug = ! $normal;
-
-if ( $normal )
- print "Creating .entities.ent...";
-else
- print "Creating .entities.ent in debug mode.\n";
-$debug = ! $normal;
-
-loadEnt( __DIR__ . "/../global.ent" , global: true , warnMissing: true );
-foreach( $langs as $lang )
-{
- loadEnt( __DIR__ . "/../../$lang/global.ent" , global: true );
- loadEnt( __DIR__ . "/../../$lang/manual.ent" , translate: true , warnMissing: true );
- loadEnt( __DIR__ . "/../../$lang/remove.ent" , remove: true );
- loadDir( $langs , $lang );
- Entities::$debugUnique = false;
-}
-
-Entities::writeOutputFile();
-Entities::checkReplaces( $debug );
-
-echo " done: " , Entities::$countTotalGenerated , " entities";
-if ( Entities::$countUnstranslated > 0 )
- echo ", " , Entities::$countUnstranslated , " untranslated";
-if ( Entities::$countReplacedGlobal > 0 )
- echo ", " , Entities::$countReplacedGlobal , " global replaced";
-if ( Entities::$countReplacedRemove > 0 )
- echo ", " , Entities::$countReplacedRemove , " remove replaced";
-if ( Entities::$countDuplicated > 0 )
- echo ", " , Entities::$countDuplicated , " duplicated (first language)";
-echo ".\n";
-
-exit;
-
-class EntityData
-{
- public function __construct(
- public string $path ,
- public string $name ,
- public string $text ) {}
-}
-
-class Entities
-{
- private static string $filename = __DIR__ . "/../temp/entities.ent"; // idempotent
-
- private static array $entities = []; // All entities, bi duplications
- private static array $global = []; // Entities expected not replaced
- private static array $replace = []; // Entities expected replaced / translated
- private static array $remove = []; // Entities expected not replaced and not used
- private static array $unique = []; // For detecting duplicated global+en entities
- private static array $count = []; // Name / Count
- private static array $slow = []; // External entities, slow, uncontrolled file overwrites
-
- public static bool $debugUnique = true; // Start on unique mode, disable on second language
-
- public static int $countUnstranslated = 0;
- public static int $countReplacedGlobal = 0;
- public static int $countReplacedRemove = 0;
- public static int $countTotalGenerated = 0;
- public static int $countDuplicated = 0;
-
- static function put( string $path , string $name , string $text , bool $global = false , bool $replace = false , bool $remove = false )
- {
- $entity = new EntityData( $path , $name , $text );
- Entities::$entities[ $name ] = $entity;
-
- if ( $global )
- Entities::$global[ $name ] = $name;
-
- if ( $replace )
- Entities::$replace[ $name ] = $name;
-
- if ( $remove )
- Entities::$remove[ $name ] = $name;
-
- if ( ! isset( Entities::$count[ $name ] ) )
- Entities::$count[$name] = 1;
- else
- Entities::$count[$name]++;
-
- if ( Entities::$debugUnique )
- {
- if ( isset( Entities::$unique[ $name ] ) )
- {
- Entities::$countDuplicated++;
- if ( Entities::$countDuplicated == 1 )
- fwrite( STDERR , "\n\n" );
- fwrite( STDERR , " Duplicated entity: $name\n" );
- }
- Entities::$unique[ $name ] = $entity;
- }
- }
-
- static function slow( string $path )
- {
- if ( isset( $slow[$path] ) )
- fwrite( STDERR , "Unexpected file overwrite: $path\n" );
- $slow[ $path ] = $path;
- }
-
- static function rotateOutputFile()
- {
- if ( file_exists( Entities::$filename ) )
- unlink( Entities::$filename );
- touch( Entities::$filename );
- Entities::$filename = realpath( Entities::$filename ); // only full paths on XML
- }
-
- static function writeOutputFile()
- {
- saveEntitiesFile( Entities::$filename , Entities::$entities );
- }
-
- static function checkReplaces( bool $debug )
- {
- Entities::$countTotalGenerated = count( Entities::$entities );
- Entities::$countUnstranslated = 0;
- Entities::$countReplacedGlobal = 0;
- Entities::$countReplacedRemove = 0;
-
- foreach( Entities::$entities as $name => $text )
- {
- $replaced = Entities::$count[$name] - 1;
- $expectedGlobal = in_array( $name , Entities::$global );
- $expectedReplaced = in_array( $name , Entities::$replace );
- $expectedRemoved = in_array( $name , Entities::$remove );
-
- if ( $expectedGlobal && $replaced != 0 )
- {
- Entities::$countReplacedGlobal++;
- if ( $debug )
- print "Expected global, replaced $replaced times: $name\n";
- }
-
- if ( $expectedReplaced && $replaced != 1 )
- {
- Entities::$countUnstranslated++;
- if ( $debug )
- print "Expected translated, replaced $replaced times: $name\n";
- }
-
- if ( $expectedRemoved && $replaced != 0 )
- {
- Entities::$countReplacedRemove++;
- if ( $debug )
- print "Expected removed, replaced $replaced times: $name\n";
- }
- }
- }
-}
-
-function loadEnt( string $path , bool $global = false , bool $translate = false , bool $remove = false , bool $warnMissing = false )
-{
- $realpath = realpath( $path );
- if ( $realpath === false )
- if ( PARTIAL_IMPL )
- return;
- else
- if ( $warnMissing )
- fwrite( STDERR , "\n Missing entity file: $path\n" );
- $path = $realpath;
-
- $text = file_get_contents( $path );
- $text = str_replace( "&" , "&" , $text );
-
- $dom = new DOMDocument( '1.0' , 'utf8' );
- if ( ! $dom->loadXML( $text ) )
- die( "XML load failed for $path\n" );
-
- $xpath = new DOMXPath( $dom );
- $list = $xpath->query( "/*/*" );
-
- foreach( $list as $ent )
- {
- // weird, namespace correting, DOMNodeList -> DOMDocumentFragment transform
- $other = new DOMDocument( '1.0' , 'utf8' );
-
- foreach( $ent->childNodes as $node )
- $other->appendChild( $other->importNode( $node , true ) );
-
- $name = $ent->getAttribute( "name" );
- $text = $other->saveXML();
-
- $text = rtrim( $text , "\n" );
- $text = str_replace( "&" , "&" , $text );
- $lines = explode( "\n" , $text );
- array_shift( $lines ); // remove XML declaration
- $text = implode( "\n" , $lines );
-
- Entities::put( $path , $name , $text , $global , $translate , $remove );
- }
-}
-
-function loadDir( array $langs , string $lang )
-{
- global $debug;
-
- $dir = __DIR__ . "/../../$lang/entities";
- $dir = realpath( $dir );
- if ( $dir === false || ! is_dir( $dir ) )
- if ( PARTIAL_IMPL )
- {
- if ( $debug )
- print "Not a directory: $dir\n";
- return;
- }
- else
- exit( "Error: not a directory: $dir\n" );
-
- $files = scandir( $dir );
- $expectedReplaced = array_search( $lang , $langs ) > 0;
-
- foreach( $files as $file )
- {
- $path = realpath( "$dir/$file" );
-
- if ( str_starts_with( $file , '.' ) )
- continue;
- if ( is_dir( $path ) )
- continue;
-
- $text = file_get_contents( $path );
- $text = rtrim( $text , "\n" );
-
- loadXml( $path , $text , $expectedReplaced );
- }
-}
-
-function loadXml( string $path , string $text , bool $expectedReplaced )
-{
- $info = pathinfo( $path );
- $name = $info["filename"];
- $frag = "$text";
-
- if ( trim( $text ) == "" )
- {
- if ( ! PARTIAL_IMPL )
- fwrite( STDERR , "\n Empty entity (should it be in remove.ent?): '$path' \n" );
- Entities::put( $path , $name , $text );
- return;
- }
-
- $dom = new DOMDocument( '1.0' , 'utf8' );
- $dom->recover = true;
- $dom->resolveExternals = false;
- libxml_use_internal_errors( true );
-
- $res = $dom->loadXML( $frag );
-
- $err = libxml_get_errors();
- libxml_clear_errors();
-
- foreach( $err as $item )
- {
- $msg = trim( $item->message );
- if ( str_starts_with( $msg , "Entity '" ) && str_ends_with( $msg , "' not defined" ) )
- continue;
-
- fwrite( STDERR , "\n XML load failed on entity file." );
- fwrite( STDERR , "\n Path: $path" );
- fwrite( STDERR , "\n Error: $msg\n" );
- return;
- }
-
- Entities::put( $path , $name , $text , replace: $expectedReplaced );
-}
-
-function saveEntitiesFile( string $filename , array $entities )
-{
- $tmpDir = __DIR__ . "/temp"; // idempotent
-
- $file = fopen( $filename , "w" );
- fputs( $file , "\n\n\n" );
-
- foreach( $entities as $name => $entity )
- {
- $text = $entity->text;
- $quote = "";
-
- // If the text contains mixed quoting, keeping it
- // as an external file to avoid (re)quotation hell.
-
- if ( strpos( $text , "'" ) === false )
- $quote = "'";
- if ( strpos( $text , '"' ) === false )
- $quote = '"';
-
- if ( $quote == "" )
- {
- if ( $entity->path == "" )
- {
- $entity->path = $tmpDir . "/{$entity->path}.tmp";
- file_put_contents( $entity->path , $text );
- }
- fputs( $file , "path}'>\n\n" );
- Entities::slow( $entity->path );
- }
- else
- fputs( $file , "\n\n" );
- }
-
- fclose( $file );
-}
diff --git a/scripts/file-entities.php b/scripts/file-entities.php
index 9229b0b130..b6b277d86b 100644
--- a/scripts/file-entities.php
+++ b/scripts/file-entities.php
@@ -118,7 +118,9 @@
writeEntity( $file , $ent );
fclose( $file );
-echo "done\n";
+
+$total = count( $entities );
+echo "done: $total entities.\n";
exit( 0 );
diff --git a/scripts/text-entities.php b/scripts/text-entities.php
new file mode 100644
index 0000000000..43f7dfce4f
--- /dev/null
+++ b/scripts/text-entities.php
@@ -0,0 +1,457 @@
+ |
++----------------------------------------------------------------------+
+| Description: Collect individual entities into an temp/entities.ent. |
++----------------------------------------------------------------------+
+
+# Mental model for DTD ,
+ or things that I would liked to know 20 years ago
+
+DTD Entity contents have more in common with DOMDocumentFragment than
+DOMElement. In other words, simple text and multi rooted XML fragments
+are valid content, whereas they are not valid XML documents.
+
+Also, namespaces do not automatically "cross" between a parent
+document and their entities, even if they are included in the same
+file, as local textual entities. Each s are, for all intended
+purposes, separated documents, with separated namespaces and have
+*expected* different default namespaces.
+
+So each one of, possibly multiple, "root" XML elements inside an
+fragment need to be annotated with default namespace, even if the
+"root" element occurs surrounded by text. For example:
+
+- "texttext", need one namespace, or it is invalid, and;
+- "", need TWO namespaces, or it is also invalid.
+
+# Output
+
+This script collects grouped and individual XML Entity files
+(detailed below), at some expected relative paths, and generates an
+doc-base/temp/entities.ent file with their respective DTD Entities.
+
+The output file has no duplications, so collection order is important
+to create some operational semantics. Here, latter loaded entities
+takes priority (overrides) an previous defined one. Note that this is the
+reverse of DTD convention, where duplicated entity names are
+ignored. The priority order used here is important to allow detecting
+cases where unique entities are being overwritten, or if expected
+translatable entities are missing translations.
+
+# Individual XML Entity files, or `.xml` files at `doc-lang/entities/`
+
+As explained above, the individual entity contents are not really
+valid XML *documents*, they are only at most valid XML *fragments*.
+More technically, these XML files are really well-balanced texts, per
+https://www.w3.org/TR/xml-fragment/#defn-well-balanced .
+
+Yet, individual entities are stored in entities/ as .xml files, for
+two reasons: first, text editors in general can highlights XML syntax in
+well-balanced texts; and second, this allows normal revision tracking
+per file, without requiring weird changes on `revcheck.php`. Note that
+is *invalid* to place XML declaration in these fragment files, at least
+in files that are invalid XML documents (on multi-node rooted ones).
+
+# Grouped XML Entity files
+
+For very small textual entities, down to simple text or single XML
+elements that may never change, individual file tracking of entities
+is an overkill. To avoid an infinitude of micro entity files, this script
+also loads grouped XML Entity files, at some expected locations.
+
+These grouped files are really normal XML files, correctly annotated
+with XML namespaces used on the manual, so any individual exported entity
+has correct and clean XML namespace annotations. These grouped entity
+files are tracked normally by revcheck, but are not directly included
+in manual.xml.in, as they only participate in general entity loading,
+described above.
+
+# Checks
+
+Groped XML Entity files are annotated with an attribute named "translate",
+that accepts the following values:
+
+- "yes": these entities are expected to be translated or replaced;
+- "no": these entities are expected not be translated or replaced;
+- "delete": these entities should be deleted on sight.
+
+The characteristics above are validated at the end of the script. Use the
+--debug argument to also list the names of misused entities.
+
+The "delete" value exists to make possible deleting entities from
+doc-en while keeping translations building. To achieve this result,
+move any recently deleted to a .ent file with translate="delete".
+
+*/
+
+const PARTIAL_IMPL = true; // For while XML Entities are not fully implanted in all languages
+
+ini_set( 'display_errors' , 1 );
+ini_set( 'display_startup_errors' , 1 );
+error_reporting( E_ALL );
+
+Entities::truncateOutputFile();
+
+$langs = [];
+$debug = false;
+$usage = in_array( '--help' , $argv ) || in_array( '-h' , $argv );
+
+if ( count( $argv ) < 2 || $usage )
+{
+ print "\nUsage: {$argv[0]} langCode [langCode] [--debug]\n\n";
+ if ( $usage )
+ exit( 0 );
+ else
+ exit( 1 );
+}
+array_shift( $argv );
+foreach( $argv as $arg )
+ if ( $arg == "--debug" )
+ $debug = true;
+ else
+ $langs[] = $arg;
+
+if ( $debug )
+ print "Running text-entities.ent in debug mode.\n";
+else
+ print "Running text-entities.ent... ";
+
+foreach( $langs as $lang )
+{
+ $entDir = __DIR__ . "/../../$lang/entities";
+ $refDir = __DIR__ . "/../../$lang/reference";
+
+ loadDirEntities( $entDir );
+ loadDirRecurse( $refDir );
+ Entities::$countLanguages++;
+}
+
+Entities::writeOutputFile();
+Entities::checkReplaces( $debug );
+
+echo "done: generated " , Entities::$countTotalGenerated , " entities";
+if ( Entities::$countUntranslated > 0 )
+ echo ", " , Entities::$countUntranslated , " untranslated";
+if ( Entities::$countUniqueReplaced > 0 )
+ echo ", " , Entities::$countUniqueReplaced , " unique replaced";
+if ( Entities::$countRemoveReplaced > 0 )
+ echo ", " , Entities::$countRemoveReplaced , " remove replaced";
+echo ".\n";
+
+exit;
+
+enum EntityCheck
+{
+ case Unique; // Expected once
+ case Normal; // Expected used/translated
+ case Remove; // Expected unused
+}
+
+class EntityData
+{
+ public function __construct(
+ public string $path ,
+ public string $name ,
+ public string $text ) {}
+}
+
+class Entities
+{
+ private static string $filename = __DIR__ . "/../temp/entities.ent";
+
+ private static array $merged = []; // All EntityData, merged by name, no duplications
+ private static array $unique = []; // Any entity marked unique
+ private static array $remove = []; // Any entity marked deleted
+ private static array $nameCount = []; // Name / Count
+
+ public static int $countLanguages = 0; // For translated check
+ public static int $countUntranslated = 0;
+ public static int $countUniqueReplaced = 0;
+ public static int $countRemoveReplaced = 0;
+ public static int $countTotalGenerated = 0;
+
+ static function put( string $path , string $name , string $text , bool $unique = false , bool $remove = false )
+ {
+ $entity = new EntityData( $path , $name , $text );
+ Entities::$merged[ $name ] = $entity;
+
+ if ( $unique )
+ Entities::$unique[ $name ] = $name;
+
+ if ( $remove )
+ Entities::$remove[ $name ] = $name;
+
+ if ( ! isset( Entities::$nameCount[ $name ] ) )
+ Entities::$nameCount[ $name ] = 0;
+ Entities::$nameCount[ $name ]++;
+ }
+
+ static function truncateOutputFile()
+ {
+ if ( file_exists( Entities::$filename ) )
+ unlink( Entities::$filename );
+ touch( Entities::$filename );
+ Entities::$filename = realpath( Entities::$filename ); // only full paths on XML
+ }
+
+ static function writeOutputFile()
+ {
+ outputFiles( Entities::$filename , Entities::$merged );
+ }
+
+ static function checkReplaces( bool $debug )
+ {
+ Entities::$countTotalGenerated = count( Entities::$merged );
+ Entities::$countUntranslated = 0;
+ Entities::$countUniqueReplaced = 0;
+ Entities::$countRemoveReplaced = 0;
+
+ foreach( Entities::$merged as $name => $null )
+ {
+ $replaced = Entities::$nameCount[$name] - 1;
+ $languages = Entities::$countLanguages;
+ $expectedUnique = in_array( $name , Entities::$unique );
+ $expectedRemoved = in_array( $name , Entities::$remove );
+ $expectedTranslated = ! ( $expectedUnique || $expectedRemoved );
+
+ if ( $expectedUnique && $replaced != 0 )
+ {
+ Entities::$countUniqueReplaced++;
+ if ( $debug )
+ print " Expected unique, replaced $replaced times: $name\n";
+ }
+
+ if ( $expectedRemoved && $replaced != 0 )
+ {
+ Entities::$countRemoveReplaced++;
+ if ( $debug )
+ print " Expected removed, replaced $replaced times: $name\n";
+ }
+
+ if ( $expectedTranslated && $replaced != 1 && $languages != 1 )
+ {
+ Entities::$countUntranslated++;
+ if ( $debug )
+ print " Expected translated, replaced $replaced times: $name\n";
+ }
+ }
+ }
+}
+
+function loadDirEntities( string $dir )
+{
+ $dir = realpath( $dir );
+ if ( $dir === false || ! is_dir( $dir ) )
+ {
+ if ( PARTIAL_IMPL )
+ {
+ print "\n Skiped $lang/entities\n";
+ return;
+ }
+ else
+ {
+ print "\n Not a directory: $dir\n";
+ exit( 1 );
+ }
+ }
+
+ $files = scandir( $dir );
+ foreach( $files as $file )
+ {
+ $path = realpath( "$dir/$file" );
+
+ if ( str_starts_with( $file , '.' ) )
+ continue;
+ if ( is_dir( $path ) )
+ continue;
+
+ if ( str_ends_with( $path , ".xml" ) )
+ loadEntitySingle( $path );
+
+ if ( str_ends_with( $path , ".ent" ) )
+ loadEntityGroup( $path );
+ }
+}
+
+function loadDirRecurse( string $dir )
+{
+ $paths = scandir( $dir );
+ foreach( $paths as $path )
+ {
+ if ( str_starts_with( $path , '.' ) )
+ continue;
+
+ $path = realpath( "$dir/$path" );
+
+ if ( is_dir( $path ) )
+ loadDirRecurse( $path );
+ else
+ if ( str_ends_with( $path , ".ent" ) )
+ loadEntityGroup( $path );
+ }
+}
+
+function loadEntityGroup( string $path )
+{
+ $path = realpath( $path );
+ $text = file_get_contents( $path );
+ $text = str_replace( "&" , "&" , $text );
+
+ $dom = new DOMDocument( '1.0' , 'utf8' );
+ if ( ! $dom->loadXML( $text ) )
+ die( "XML load failed for $path\n" );
+
+ $unique = false;
+ $remove = false;
+ $value = $dom->documentElement->getAttribute("translate");
+ switch ( $value )
+ {
+ case "no":
+ $unique = true;
+ break;
+ case "delete":
+ case "remove":
+ $remove = true;
+ break;
+ default:
+ print "\n Invalid translate attribute '$value' in '$path'.\n";
+ exit( 1 );
+ }
+
+ $xpath = new DOMXPath( $dom );
+ $list = $xpath->query( "/*/*" );
+
+ foreach( $list as $ent )
+ {
+ $name = $ent->getAttribute( "name" );
+
+ // Weird, namespace correting, DOMNodeList -> DOMDocumentFragment transform
+
+ $other = new DOMDocument( '1.0' , 'utf8' );
+ foreach( $ent->childNodes as $node )
+ $other->appendChild( $other->importNode( $node , true ) );
+
+ // Piecewise reconstruct fragment, without XML declarations or extra newlines
+
+ $text = "";
+ foreach( $other->childNodes as $node )
+ $text .= $other->saveXML( $node );
+
+ Entities::put( $path , $name , $text , $unique , $remove );
+ }
+}
+
+function loadEntitySingle( string $path )
+{
+ $text = file_get_contents( $path );
+ $info = pathinfo( $path );
+ $name = $info["filename"];
+ $frag = "$text";
+
+ if ( trim( $text ) == "" )
+ {
+ print "\n Empty entity '$name' on file '$path'.\n";
+ print "\n Should it be in a file with translate='remove'?\n";
+ Entities::put( $path , $name , $text );
+ return;
+ }
+
+ // Validate. Accepts only the error "Entity * not defined"
+
+ $dom = new DOMDocument( '1.0' , 'utf8' );
+ $dom->recover = true;
+ $dom->resolveExternals = false;
+ libxml_use_internal_errors( true );
+
+ $xml = $dom->loadXML( $frag );
+ $err = libxml_get_errors();
+ libxml_clear_errors();
+
+ foreach( $err as $item )
+ {
+ $msg = trim( $item->message );
+
+ if ( $item->code == 26 )
+ continue;
+ if ( str_starts_with( $msg , "Entity '" ) && str_ends_with( $msg , "' not defined" ) )
+ continue;
+
+ print "\n XML load failed for entity file:";
+ print "\n Path: $path";
+ print "\n Error: $msg\n";
+ return;
+ }
+
+ Entities::put( $path , $name , $text );
+}
+
+function outputFiles( string $filename , array $entities )
+{
+ $file = fopen( $filename , "w" );
+ fputs( $file , "\n" );
+ fputs( $file , "\n\n\n" );
+
+ $sepFileDir = __DIR__ . "/../temp/text-entities/";
+
+ if ( file_exists( $sepFileDir ) == false )
+ mkdir( $sepFileDir , recursive: true );
+
+ foreach( $entities as $name => $entity )
+ {
+ $name = $entity->name;
+ $body = $entity->text;
+
+ $quote = "'";
+ $count = 0;
+
+ if ( str_contains( $body , "'" ) )
+ {
+ $quote = '"';
+ $count++;
+ }
+ if ( str_contains( $body , '"' ) )
+ {
+ $quote = "'";
+ $count++;
+ }
+
+ if ( $count < 2 )
+ {
+ // Fast path for single or no quote:
+ // entity body directly quoted on output file.
+
+ fputs( $file , "\n\n" );
+ continue;
+ }
+
+ // Slow path: entity body as an external file,
+ // as to avoid (re)quotation hell.
+
+ $path = $sepFileDir . "/{$entity->name}.xml";
+
+ if ( file_exists( $path ) )
+ {
+ echo "\nDuplicated text-entity file: '{$path}'.\n";
+ exit( 1 );
+ }
+
+ // realpath() only after file creation
+
+ file_put_contents( $path , $body );
+ $path = realpath( $path );
+ fputs( $file , "\n\n" );
+ }
+
+ fclose( $file );
+}