diff --git a/configure.php b/configure.php index b8851a1280..5aafc87550 100755 --- a/configure.php +++ b/configure.php @@ -551,10 +551,11 @@ function git_status() echo "\n" , trim( $output ) . "\n\n"; } -// DTD layer before first XML loading +// DTD entity layer before first XML loading dtd_conf_entities(); dtd_file_entities(); +dtd_text_entities(); function dtd_conf_entities() { @@ -564,22 +565,16 @@ function dtd_conf_entities() $conf[] = ""; - if ( $lang == 'en' ) - { - realpain( __DIR__ . "/temp/empty" , touch: true ); - $trans1 = realpain( __DIR__ . "/temp/empty" ); - $trans2 = realpain( __DIR__ . "/temp/empty" ); - $trans3 = realpain( __DIR__ . "/temp/empty" ); - } - else + if ( $lang != 'en' ) { $trans1 = realpain( __DIR__ . "/../$lang/language-defs.ent" ); $trans2 = realpain( __DIR__ . "/../$lang/language-snippets.ent" ); $trans3 = realpain( __DIR__ . "/../$lang/extensions.ent" ); + + $conf[] = ""; + $conf[] = ""; + $conf[] = ""; } - $conf[] = ""; - $conf[] = ""; - $conf[] = ""; if ( $ac['CHMENABLED'] == 'yes' ) { @@ -589,7 +584,7 @@ function dtd_conf_entities() else $conf[] = ""; - file_put_contents( __DIR__ . "/temp/manual.conf" , implode( "\n" , $conf ) ); + file_put_contents( __DIR__ . "/temp/manual.inc" , implode( "\n" , $conf ) ); } function dtd_file_entities() @@ -620,6 +615,30 @@ function dtd_file_entities() } } +function dtd_text_entities() +{ + global $ac; + $php = $ac['PHP']; + $lang = $ac["LANG"]; + + $parts = [ $php + , __DIR__ . "/scripts/text-entities.php" + , "en" ]; + if ( $lang != "en" ) + $parts[] = $lang; + + foreach ( $parts as & $part ) + $part = escapeshellarg( $part ); + $cmd = implode( ' ' , $parts ); + $ret = 0; + passthru( $cmd , $ret ); + + if ( $ret != 0 ) + { + echo "doc-base/scripts/entities.php FAILED.\n"; + exit( 1 ); + } +} checking("for if we should generate a simplified file"); if ($ac["GENERATE"] != "no") { diff --git a/entities/global.ent-dist b/entities/global.ent-dist deleted file mode 100644 index a453871ca3..0000000000 --- a/entities/global.ent-dist +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - diff --git a/entities/manual.ent-dist b/entities/manual.ent-dist deleted file mode 100644 index d93f720ded..0000000000 --- a/entities/manual.ent-dist +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - diff --git a/entities/normal.ent-dist b/entities/normal.ent-dist new file mode 100644 index 0000000000..ca5b02df42 --- /dev/null +++ b/entities/normal.ent-dist @@ -0,0 +1,31 @@ + + + + + + + + diff --git a/entities/remove.ent-dist b/entities/remove.ent-dist index 18ae9e6288..01c45b673e 100644 --- a/entities/remove.ent-dist +++ b/entities/remove.ent-dist @@ -1,20 +1,36 @@ - + + - - + + + diff --git a/entities/unique.ent-dist b/entities/unique.ent-dist new file mode 100644 index 0000000000..145e36a724 --- /dev/null +++ b/entities/unique.ent-dist @@ -0,0 +1,37 @@ + + + + + + + + + diff --git a/manual.xml b/manual.xml index a5899fcd26..2c85e402bb 100644 --- a/manual.xml +++ b/manual.xml @@ -2,7 +2,7 @@ - + %configure; diff --git a/scripts/entities.php b/scripts/entities.php deleted file mode 100644 index 2e927c8cd7..0000000000 --- a/scripts/entities.php +++ /dev/null @@ -1,412 +0,0 @@ - | -+----------------------------------------------------------------------+ -| Description: Collect individual entities into an .entities.ent file. | -+----------------------------------------------------------------------+ - -# Mental model, or things that I would liked to know 20 years prior - -DTD Entity processing has more in common with DOMDocumentFragment than -DOMElement. In other words, simple text and multi rooted XML files -are valid contents, whereas they are not valid XML documents. - -Also, namespaces do not automatically "cross" between a parent -document and their entities, even if they are included in the same -file, as local textual entities. s are, for all intended -purposes, separated documents, with separated namespaces and have -*expected* different default namespaces. - -So each one of, possibly multiple, "root" XML elements inside an -fragment need to be annotated with default namespace, even if the -"root" element occurs surrounded by text. For example: - -- "texttext", need one namespace, or it is invalid, and; -- "", need TWO namespaces, or it is also invalid. - -# Output - -This script collects grouped and individual XML Entity files -(detailed below), at some expected relative paths, and generates an -doc-base/temp/entities.ent file with their respective DTD Entities. - -The output file has no duplications, so collection order is important -to keep the necessary operational semantics. Here, latter loaded entities -takes priority (overrides) an previous defined one. Note that this is the -reverse of DTD convention, where duplicated entity names are -ignored. The priority order used here is important to allow detecting -cases where global entities are being overwritten, or if expected -translatable entities are missing translations. - -# Individual XML Entities, or `.xml` files at `entities/` - -As explained above, the individual entity contents are not really -valid XML *documents*, they are only at most valid XML *fragments*. -More technically, these XML files are really well-balanced texts, per -https://www.w3.org/TR/xml-fragment/#defn-well-balanced . - -Yet, individual entities are stored in entities/ as .xml files, for -two reasons: first, text editors in general can highlights XML syntax in -well-balanced texts; and second, this allows normal revision tracking -per file, without requiring weird changes on `revcheck.php`. Note that -is *invalid* to place XML declaration in these fragment files, at least -in files that are invalid XML documents (on multi-node rooted ones). - -# Grouped entities files, file tracked - -For very small textual entities, down to simple text words or single -tag elements that may never change, individual entity tracking is -an overkill. This script also loads grouped XML Entities files, at -some expected locations, with specific semantics. - -These grouped files are really normal XML files, correctly annotated -with XML namespaces used on manuals, so any individual exported entity -has correct and clean XML namespace annotations. These grouped entity -files are tracked normally by revcheck, but are not directly included -in manual.xml.in, as they only participate in general entity loading, -described above. - -- global.ent - expected unreplaced -- manual.ent - expected replaced (translated) -- remove.ent - expected unused -- lang/entities/* - expected replaced (translated) - -*/ - -const PARTIAL_IMPL = true; // For while XML Entities are not fully implanted in all languages - -ini_set( 'display_errors' , 1 ); -ini_set( 'display_startup_errors' , 1 ); -error_reporting( E_ALL ); - -if ( count( $argv ) < 2 || in_array( '--help' , $argv ) || in_array( '-h' , $argv ) ) -{ - fwrite( STDERR , "\nUsage: {$argv[0]} [--debug] langCode [langCode]\n\n" ); - return; -} - -$filename = Entities::rotateOutputFile(); // idempotent - -$langs = []; -$normal = true; -$debug = false; - -for( $idx = 1 ; $idx < count( $argv ) ; $idx++ ) - if ( $argv[$idx] == "--debug" ) - $normal = false; - else - $langs[] = $argv[$idx]; -$debug = ! $normal; - -if ( $normal ) - print "Creating .entities.ent..."; -else - print "Creating .entities.ent in debug mode.\n"; -$debug = ! $normal; - -loadEnt( __DIR__ . "/../global.ent" , global: true , warnMissing: true ); -foreach( $langs as $lang ) -{ - loadEnt( __DIR__ . "/../../$lang/global.ent" , global: true ); - loadEnt( __DIR__ . "/../../$lang/manual.ent" , translate: true , warnMissing: true ); - loadEnt( __DIR__ . "/../../$lang/remove.ent" , remove: true ); - loadDir( $langs , $lang ); - Entities::$debugUnique = false; -} - -Entities::writeOutputFile(); -Entities::checkReplaces( $debug ); - -echo " done: " , Entities::$countTotalGenerated , " entities"; -if ( Entities::$countUnstranslated > 0 ) - echo ", " , Entities::$countUnstranslated , " untranslated"; -if ( Entities::$countReplacedGlobal > 0 ) - echo ", " , Entities::$countReplacedGlobal , " global replaced"; -if ( Entities::$countReplacedRemove > 0 ) - echo ", " , Entities::$countReplacedRemove , " remove replaced"; -if ( Entities::$countDuplicated > 0 ) - echo ", " , Entities::$countDuplicated , " duplicated (first language)"; -echo ".\n"; - -exit; - -class EntityData -{ - public function __construct( - public string $path , - public string $name , - public string $text ) {} -} - -class Entities -{ - private static string $filename = __DIR__ . "/../temp/entities.ent"; // idempotent - - private static array $entities = []; // All entities, bi duplications - private static array $global = []; // Entities expected not replaced - private static array $replace = []; // Entities expected replaced / translated - private static array $remove = []; // Entities expected not replaced and not used - private static array $unique = []; // For detecting duplicated global+en entities - private static array $count = []; // Name / Count - private static array $slow = []; // External entities, slow, uncontrolled file overwrites - - public static bool $debugUnique = true; // Start on unique mode, disable on second language - - public static int $countUnstranslated = 0; - public static int $countReplacedGlobal = 0; - public static int $countReplacedRemove = 0; - public static int $countTotalGenerated = 0; - public static int $countDuplicated = 0; - - static function put( string $path , string $name , string $text , bool $global = false , bool $replace = false , bool $remove = false ) - { - $entity = new EntityData( $path , $name , $text ); - Entities::$entities[ $name ] = $entity; - - if ( $global ) - Entities::$global[ $name ] = $name; - - if ( $replace ) - Entities::$replace[ $name ] = $name; - - if ( $remove ) - Entities::$remove[ $name ] = $name; - - if ( ! isset( Entities::$count[ $name ] ) ) - Entities::$count[$name] = 1; - else - Entities::$count[$name]++; - - if ( Entities::$debugUnique ) - { - if ( isset( Entities::$unique[ $name ] ) ) - { - Entities::$countDuplicated++; - if ( Entities::$countDuplicated == 1 ) - fwrite( STDERR , "\n\n" ); - fwrite( STDERR , " Duplicated entity: $name\n" ); - } - Entities::$unique[ $name ] = $entity; - } - } - - static function slow( string $path ) - { - if ( isset( $slow[$path] ) ) - fwrite( STDERR , "Unexpected file overwrite: $path\n" ); - $slow[ $path ] = $path; - } - - static function rotateOutputFile() - { - if ( file_exists( Entities::$filename ) ) - unlink( Entities::$filename ); - touch( Entities::$filename ); - Entities::$filename = realpath( Entities::$filename ); // only full paths on XML - } - - static function writeOutputFile() - { - saveEntitiesFile( Entities::$filename , Entities::$entities ); - } - - static function checkReplaces( bool $debug ) - { - Entities::$countTotalGenerated = count( Entities::$entities ); - Entities::$countUnstranslated = 0; - Entities::$countReplacedGlobal = 0; - Entities::$countReplacedRemove = 0; - - foreach( Entities::$entities as $name => $text ) - { - $replaced = Entities::$count[$name] - 1; - $expectedGlobal = in_array( $name , Entities::$global ); - $expectedReplaced = in_array( $name , Entities::$replace ); - $expectedRemoved = in_array( $name , Entities::$remove ); - - if ( $expectedGlobal && $replaced != 0 ) - { - Entities::$countReplacedGlobal++; - if ( $debug ) - print "Expected global, replaced $replaced times: $name\n"; - } - - if ( $expectedReplaced && $replaced != 1 ) - { - Entities::$countUnstranslated++; - if ( $debug ) - print "Expected translated, replaced $replaced times: $name\n"; - } - - if ( $expectedRemoved && $replaced != 0 ) - { - Entities::$countReplacedRemove++; - if ( $debug ) - print "Expected removed, replaced $replaced times: $name\n"; - } - } - } -} - -function loadEnt( string $path , bool $global = false , bool $translate = false , bool $remove = false , bool $warnMissing = false ) -{ - $realpath = realpath( $path ); - if ( $realpath === false ) - if ( PARTIAL_IMPL ) - return; - else - if ( $warnMissing ) - fwrite( STDERR , "\n Missing entity file: $path\n" ); - $path = $realpath; - - $text = file_get_contents( $path ); - $text = str_replace( "&" , "&" , $text ); - - $dom = new DOMDocument( '1.0' , 'utf8' ); - if ( ! $dom->loadXML( $text ) ) - die( "XML load failed for $path\n" ); - - $xpath = new DOMXPath( $dom ); - $list = $xpath->query( "/*/*" ); - - foreach( $list as $ent ) - { - // weird, namespace correting, DOMNodeList -> DOMDocumentFragment transform - $other = new DOMDocument( '1.0' , 'utf8' ); - - foreach( $ent->childNodes as $node ) - $other->appendChild( $other->importNode( $node , true ) ); - - $name = $ent->getAttribute( "name" ); - $text = $other->saveXML(); - - $text = rtrim( $text , "\n" ); - $text = str_replace( "&" , "&" , $text ); - $lines = explode( "\n" , $text ); - array_shift( $lines ); // remove XML declaration - $text = implode( "\n" , $lines ); - - Entities::put( $path , $name , $text , $global , $translate , $remove ); - } -} - -function loadDir( array $langs , string $lang ) -{ - global $debug; - - $dir = __DIR__ . "/../../$lang/entities"; - $dir = realpath( $dir ); - if ( $dir === false || ! is_dir( $dir ) ) - if ( PARTIAL_IMPL ) - { - if ( $debug ) - print "Not a directory: $dir\n"; - return; - } - else - exit( "Error: not a directory: $dir\n" ); - - $files = scandir( $dir ); - $expectedReplaced = array_search( $lang , $langs ) > 0; - - foreach( $files as $file ) - { - $path = realpath( "$dir/$file" ); - - if ( str_starts_with( $file , '.' ) ) - continue; - if ( is_dir( $path ) ) - continue; - - $text = file_get_contents( $path ); - $text = rtrim( $text , "\n" ); - - loadXml( $path , $text , $expectedReplaced ); - } -} - -function loadXml( string $path , string $text , bool $expectedReplaced ) -{ - $info = pathinfo( $path ); - $name = $info["filename"]; - $frag = "$text"; - - if ( trim( $text ) == "" ) - { - if ( ! PARTIAL_IMPL ) - fwrite( STDERR , "\n Empty entity (should it be in remove.ent?): '$path' \n" ); - Entities::put( $path , $name , $text ); - return; - } - - $dom = new DOMDocument( '1.0' , 'utf8' ); - $dom->recover = true; - $dom->resolveExternals = false; - libxml_use_internal_errors( true ); - - $res = $dom->loadXML( $frag ); - - $err = libxml_get_errors(); - libxml_clear_errors(); - - foreach( $err as $item ) - { - $msg = trim( $item->message ); - if ( str_starts_with( $msg , "Entity '" ) && str_ends_with( $msg , "' not defined" ) ) - continue; - - fwrite( STDERR , "\n XML load failed on entity file." ); - fwrite( STDERR , "\n Path: $path" ); - fwrite( STDERR , "\n Error: $msg\n" ); - return; - } - - Entities::put( $path , $name , $text , replace: $expectedReplaced ); -} - -function saveEntitiesFile( string $filename , array $entities ) -{ - $tmpDir = __DIR__ . "/temp"; // idempotent - - $file = fopen( $filename , "w" ); - fputs( $file , "\n\n\n" ); - - foreach( $entities as $name => $entity ) - { - $text = $entity->text; - $quote = ""; - - // If the text contains mixed quoting, keeping it - // as an external file to avoid (re)quotation hell. - - if ( strpos( $text , "'" ) === false ) - $quote = "'"; - if ( strpos( $text , '"' ) === false ) - $quote = '"'; - - if ( $quote == "" ) - { - if ( $entity->path == "" ) - { - $entity->path = $tmpDir . "/{$entity->path}.tmp"; - file_put_contents( $entity->path , $text ); - } - fputs( $file , "path}'>\n\n" ); - Entities::slow( $entity->path ); - } - else - fputs( $file , "\n\n" ); - } - - fclose( $file ); -} diff --git a/scripts/file-entities.php b/scripts/file-entities.php index 9229b0b130..b6b277d86b 100644 --- a/scripts/file-entities.php +++ b/scripts/file-entities.php @@ -118,7 +118,9 @@ writeEntity( $file , $ent ); fclose( $file ); -echo "done\n"; + +$total = count( $entities ); +echo "done: $total entities.\n"; exit( 0 ); diff --git a/scripts/text-entities.php b/scripts/text-entities.php new file mode 100644 index 0000000000..43f7dfce4f --- /dev/null +++ b/scripts/text-entities.php @@ -0,0 +1,457 @@ + | ++----------------------------------------------------------------------+ +| Description: Collect individual entities into an temp/entities.ent. | ++----------------------------------------------------------------------+ + +# Mental model for DTD , + or things that I would liked to know 20 years ago + +DTD Entity contents have more in common with DOMDocumentFragment than +DOMElement. In other words, simple text and multi rooted XML fragments +are valid content, whereas they are not valid XML documents. + +Also, namespaces do not automatically "cross" between a parent +document and their entities, even if they are included in the same +file, as local textual entities. Each s are, for all intended +purposes, separated documents, with separated namespaces and have +*expected* different default namespaces. + +So each one of, possibly multiple, "root" XML elements inside an +fragment need to be annotated with default namespace, even if the +"root" element occurs surrounded by text. For example: + +- "texttext", need one namespace, or it is invalid, and; +- "", need TWO namespaces, or it is also invalid. + +# Output + +This script collects grouped and individual XML Entity files +(detailed below), at some expected relative paths, and generates an +doc-base/temp/entities.ent file with their respective DTD Entities. + +The output file has no duplications, so collection order is important +to create some operational semantics. Here, latter loaded entities +takes priority (overrides) an previous defined one. Note that this is the +reverse of DTD convention, where duplicated entity names are +ignored. The priority order used here is important to allow detecting +cases where unique entities are being overwritten, or if expected +translatable entities are missing translations. + +# Individual XML Entity files, or `.xml` files at `doc-lang/entities/` + +As explained above, the individual entity contents are not really +valid XML *documents*, they are only at most valid XML *fragments*. +More technically, these XML files are really well-balanced texts, per +https://www.w3.org/TR/xml-fragment/#defn-well-balanced . + +Yet, individual entities are stored in entities/ as .xml files, for +two reasons: first, text editors in general can highlights XML syntax in +well-balanced texts; and second, this allows normal revision tracking +per file, without requiring weird changes on `revcheck.php`. Note that +is *invalid* to place XML declaration in these fragment files, at least +in files that are invalid XML documents (on multi-node rooted ones). + +# Grouped XML Entity files + +For very small textual entities, down to simple text or single XML +elements that may never change, individual file tracking of entities +is an overkill. To avoid an infinitude of micro entity files, this script +also loads grouped XML Entity files, at some expected locations. + +These grouped files are really normal XML files, correctly annotated +with XML namespaces used on the manual, so any individual exported entity +has correct and clean XML namespace annotations. These grouped entity +files are tracked normally by revcheck, but are not directly included +in manual.xml.in, as they only participate in general entity loading, +described above. + +# Checks + +Groped XML Entity files are annotated with an attribute named "translate", +that accepts the following values: + +- "yes": these entities are expected to be translated or replaced; +- "no": these entities are expected not be translated or replaced; +- "delete": these entities should be deleted on sight. + +The characteristics above are validated at the end of the script. Use the +--debug argument to also list the names of misused entities. + +The "delete" value exists to make possible deleting entities from +doc-en while keeping translations building. To achieve this result, +move any recently deleted to a .ent file with translate="delete". + +*/ + +const PARTIAL_IMPL = true; // For while XML Entities are not fully implanted in all languages + +ini_set( 'display_errors' , 1 ); +ini_set( 'display_startup_errors' , 1 ); +error_reporting( E_ALL ); + +Entities::truncateOutputFile(); + +$langs = []; +$debug = false; +$usage = in_array( '--help' , $argv ) || in_array( '-h' , $argv ); + +if ( count( $argv ) < 2 || $usage ) +{ + print "\nUsage: {$argv[0]} langCode [langCode] [--debug]\n\n"; + if ( $usage ) + exit( 0 ); + else + exit( 1 ); +} +array_shift( $argv ); +foreach( $argv as $arg ) + if ( $arg == "--debug" ) + $debug = true; + else + $langs[] = $arg; + +if ( $debug ) + print "Running text-entities.ent in debug mode.\n"; +else + print "Running text-entities.ent... "; + +foreach( $langs as $lang ) +{ + $entDir = __DIR__ . "/../../$lang/entities"; + $refDir = __DIR__ . "/../../$lang/reference"; + + loadDirEntities( $entDir ); + loadDirRecurse( $refDir ); + Entities::$countLanguages++; +} + +Entities::writeOutputFile(); +Entities::checkReplaces( $debug ); + +echo "done: generated " , Entities::$countTotalGenerated , " entities"; +if ( Entities::$countUntranslated > 0 ) + echo ", " , Entities::$countUntranslated , " untranslated"; +if ( Entities::$countUniqueReplaced > 0 ) + echo ", " , Entities::$countUniqueReplaced , " unique replaced"; +if ( Entities::$countRemoveReplaced > 0 ) + echo ", " , Entities::$countRemoveReplaced , " remove replaced"; +echo ".\n"; + +exit; + +enum EntityCheck +{ + case Unique; // Expected once + case Normal; // Expected used/translated + case Remove; // Expected unused +} + +class EntityData +{ + public function __construct( + public string $path , + public string $name , + public string $text ) {} +} + +class Entities +{ + private static string $filename = __DIR__ . "/../temp/entities.ent"; + + private static array $merged = []; // All EntityData, merged by name, no duplications + private static array $unique = []; // Any entity marked unique + private static array $remove = []; // Any entity marked deleted + private static array $nameCount = []; // Name / Count + + public static int $countLanguages = 0; // For translated check + public static int $countUntranslated = 0; + public static int $countUniqueReplaced = 0; + public static int $countRemoveReplaced = 0; + public static int $countTotalGenerated = 0; + + static function put( string $path , string $name , string $text , bool $unique = false , bool $remove = false ) + { + $entity = new EntityData( $path , $name , $text ); + Entities::$merged[ $name ] = $entity; + + if ( $unique ) + Entities::$unique[ $name ] = $name; + + if ( $remove ) + Entities::$remove[ $name ] = $name; + + if ( ! isset( Entities::$nameCount[ $name ] ) ) + Entities::$nameCount[ $name ] = 0; + Entities::$nameCount[ $name ]++; + } + + static function truncateOutputFile() + { + if ( file_exists( Entities::$filename ) ) + unlink( Entities::$filename ); + touch( Entities::$filename ); + Entities::$filename = realpath( Entities::$filename ); // only full paths on XML + } + + static function writeOutputFile() + { + outputFiles( Entities::$filename , Entities::$merged ); + } + + static function checkReplaces( bool $debug ) + { + Entities::$countTotalGenerated = count( Entities::$merged ); + Entities::$countUntranslated = 0; + Entities::$countUniqueReplaced = 0; + Entities::$countRemoveReplaced = 0; + + foreach( Entities::$merged as $name => $null ) + { + $replaced = Entities::$nameCount[$name] - 1; + $languages = Entities::$countLanguages; + $expectedUnique = in_array( $name , Entities::$unique ); + $expectedRemoved = in_array( $name , Entities::$remove ); + $expectedTranslated = ! ( $expectedUnique || $expectedRemoved ); + + if ( $expectedUnique && $replaced != 0 ) + { + Entities::$countUniqueReplaced++; + if ( $debug ) + print " Expected unique, replaced $replaced times: $name\n"; + } + + if ( $expectedRemoved && $replaced != 0 ) + { + Entities::$countRemoveReplaced++; + if ( $debug ) + print " Expected removed, replaced $replaced times: $name\n"; + } + + if ( $expectedTranslated && $replaced != 1 && $languages != 1 ) + { + Entities::$countUntranslated++; + if ( $debug ) + print " Expected translated, replaced $replaced times: $name\n"; + } + } + } +} + +function loadDirEntities( string $dir ) +{ + $dir = realpath( $dir ); + if ( $dir === false || ! is_dir( $dir ) ) + { + if ( PARTIAL_IMPL ) + { + print "\n Skiped $lang/entities\n"; + return; + } + else + { + print "\n Not a directory: $dir\n"; + exit( 1 ); + } + } + + $files = scandir( $dir ); + foreach( $files as $file ) + { + $path = realpath( "$dir/$file" ); + + if ( str_starts_with( $file , '.' ) ) + continue; + if ( is_dir( $path ) ) + continue; + + if ( str_ends_with( $path , ".xml" ) ) + loadEntitySingle( $path ); + + if ( str_ends_with( $path , ".ent" ) ) + loadEntityGroup( $path ); + } +} + +function loadDirRecurse( string $dir ) +{ + $paths = scandir( $dir ); + foreach( $paths as $path ) + { + if ( str_starts_with( $path , '.' ) ) + continue; + + $path = realpath( "$dir/$path" ); + + if ( is_dir( $path ) ) + loadDirRecurse( $path ); + else + if ( str_ends_with( $path , ".ent" ) ) + loadEntityGroup( $path ); + } +} + +function loadEntityGroup( string $path ) +{ + $path = realpath( $path ); + $text = file_get_contents( $path ); + $text = str_replace( "&" , "&" , $text ); + + $dom = new DOMDocument( '1.0' , 'utf8' ); + if ( ! $dom->loadXML( $text ) ) + die( "XML load failed for $path\n" ); + + $unique = false; + $remove = false; + $value = $dom->documentElement->getAttribute("translate"); + switch ( $value ) + { + case "no": + $unique = true; + break; + case "delete": + case "remove": + $remove = true; + break; + default: + print "\n Invalid translate attribute '$value' in '$path'.\n"; + exit( 1 ); + } + + $xpath = new DOMXPath( $dom ); + $list = $xpath->query( "/*/*" ); + + foreach( $list as $ent ) + { + $name = $ent->getAttribute( "name" ); + + // Weird, namespace correting, DOMNodeList -> DOMDocumentFragment transform + + $other = new DOMDocument( '1.0' , 'utf8' ); + foreach( $ent->childNodes as $node ) + $other->appendChild( $other->importNode( $node , true ) ); + + // Piecewise reconstruct fragment, without XML declarations or extra newlines + + $text = ""; + foreach( $other->childNodes as $node ) + $text .= $other->saveXML( $node ); + + Entities::put( $path , $name , $text , $unique , $remove ); + } +} + +function loadEntitySingle( string $path ) +{ + $text = file_get_contents( $path ); + $info = pathinfo( $path ); + $name = $info["filename"]; + $frag = "$text"; + + if ( trim( $text ) == "" ) + { + print "\n Empty entity '$name' on file '$path'.\n"; + print "\n Should it be in a file with translate='remove'?\n"; + Entities::put( $path , $name , $text ); + return; + } + + // Validate. Accepts only the error "Entity * not defined" + + $dom = new DOMDocument( '1.0' , 'utf8' ); + $dom->recover = true; + $dom->resolveExternals = false; + libxml_use_internal_errors( true ); + + $xml = $dom->loadXML( $frag ); + $err = libxml_get_errors(); + libxml_clear_errors(); + + foreach( $err as $item ) + { + $msg = trim( $item->message ); + + if ( $item->code == 26 ) + continue; + if ( str_starts_with( $msg , "Entity '" ) && str_ends_with( $msg , "' not defined" ) ) + continue; + + print "\n XML load failed for entity file:"; + print "\n Path: $path"; + print "\n Error: $msg\n"; + return; + } + + Entities::put( $path , $name , $text ); +} + +function outputFiles( string $filename , array $entities ) +{ + $file = fopen( $filename , "w" ); + fputs( $file , "\n" ); + fputs( $file , "\n\n\n" ); + + $sepFileDir = __DIR__ . "/../temp/text-entities/"; + + if ( file_exists( $sepFileDir ) == false ) + mkdir( $sepFileDir , recursive: true ); + + foreach( $entities as $name => $entity ) + { + $name = $entity->name; + $body = $entity->text; + + $quote = "'"; + $count = 0; + + if ( str_contains( $body , "'" ) ) + { + $quote = '"'; + $count++; + } + if ( str_contains( $body , '"' ) ) + { + $quote = "'"; + $count++; + } + + if ( $count < 2 ) + { + // Fast path for single or no quote: + // entity body directly quoted on output file. + + fputs( $file , "\n\n" ); + continue; + } + + // Slow path: entity body as an external file, + // as to avoid (re)quotation hell. + + $path = $sepFileDir . "/{$entity->name}.xml"; + + if ( file_exists( $path ) ) + { + echo "\nDuplicated text-entity file: '{$path}'.\n"; + exit( 1 ); + } + + // realpath() only after file creation + + file_put_contents( $path , $body ); + $path = realpath( $path ); + fputs( $file , "\n\n" ); + } + + fclose( $file ); +}