From 87a75b2e54faeb9c84f27ab1dbf87aa315f25cb8 Mon Sep 17 00:00:00 2001 From: Marjo Wenzel van Lier Date: Mon, 8 Dec 2025 22:27:09 +0100 Subject: [PATCH] docs: Add GitHub Pages documentation site - Create Jekyll-based documentation with Just the Docs theme - Add comprehensive API reference for all public methods - Include getting started guide, examples, and performance docs - Add GitHub Actions workflow for automatic deployment - Update benchmark figures with fresh measurements Signed-off-by: Marjo Wenzel van Lier --- .github/workflows/pages.yml | 52 ++++ .gitignore | 2 + docs/Gemfile | 6 + docs/_config.yml | 74 +++++ docs/api-reference.md | 66 ++++ docs/api-reference/is-valid-date.md | 231 ++++++++++++++ docs/api-reference/name-fix.md | 200 ++++++++++++ docs/api-reference/remove-accents.md | 151 +++++++++ docs/api-reference/search-words.md | 177 +++++++++++ docs/api-reference/str-replace.md | 167 ++++++++++ docs/api-reference/trim.md | 189 ++++++++++++ docs/api-reference/utf8-ansi.md | 168 ++++++++++ docs/assets/images/logo.svg | 4 + docs/contributing.md | 332 ++++++++++++++++++++ docs/examples.md | 440 +++++++++++++++++++++++++++ docs/getting-started.md | 169 ++++++++++ docs/index.md | 103 +++++++ docs/performance.md | 219 +++++++++++++ 18 files changed, 2750 insertions(+) create mode 100644 .github/workflows/pages.yml create mode 100644 docs/Gemfile create mode 100644 docs/_config.yml create mode 100644 docs/api-reference.md create mode 100644 docs/api-reference/is-valid-date.md create mode 100644 docs/api-reference/name-fix.md create mode 100644 docs/api-reference/remove-accents.md create mode 100644 docs/api-reference/search-words.md create mode 100644 docs/api-reference/str-replace.md create mode 100644 docs/api-reference/trim.md create mode 100644 docs/api-reference/utf8-ansi.md create mode 100644 docs/assets/images/logo.svg create mode 100644 docs/contributing.md create mode 100644 docs/examples.md create mode 100644 docs/getting-started.md create mode 100644 docs/index.md create mode 100644 docs/performance.md diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml new file mode 100644 index 0000000..975d6e1 --- /dev/null +++ b/.github/workflows/pages.yml @@ -0,0 +1,52 @@ +# Deploy Jekyll documentation to GitHub Pages +name: Deploy Documentation + +on: + push: + branches: ["main"] + paths: + - 'docs/**' + - '.github/workflows/pages.yml' + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Pages + uses: actions/configure-pages@v5 + + - name: Build with Jekyll + uses: actions/jekyll-build-pages@v1 + with: + source: ./docs + destination: ./_site + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index 0dc923e..fef32bf 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,8 @@ !README.md !CONTRIBUTING.md !CHANGELOG.md +!docs/ +!docs/** # ======================================== # SOURCE CODE & TESTS diff --git a/docs/Gemfile b/docs/Gemfile new file mode 100644 index 0000000..62545e8 --- /dev/null +++ b/docs/Gemfile @@ -0,0 +1,6 @@ +source "https://rubygems.org" + +gem "jekyll", "~> 4.3" +gem "just-the-docs", "~> 0.10" +gem "jekyll-seo-tag" +gem "jekyll-sitemap" diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 0000000..24015a4 --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1,74 @@ +# Site settings +title: StringManipulation +description: High-performance PHP 8.3+ string manipulation library featuring O(n) algorithms with up to 5x speed improvements +baseurl: "/StringManipulation" +url: "https://marjovanlier.github.io" + +# Theme +remote_theme: just-the-docs/just-the-docs@v0.10.1 + +# Colour scheme +color_scheme: light + +# Logo +logo: "/assets/images/logo.svg" + +# Aux links +aux_links: + "GitHub": + - "https://github.com/MarjovanLier/StringManipulation" + "Packagist": + - "https://packagist.org/packages/marjovanlier/stringmanipulation" + +aux_links_new_tab: true + +# Footer +footer_content: "Copyright © 2024 Marjo Wenzel van Lier. Distributed under the MIT License." + +# Back to top link +back_to_top: true +back_to_top_text: "Back to top" + +# Heading anchor links +heading_anchors: true + +# Search +search_enabled: true +search: + heading_level: 2 + previews: 3 + preview_words_before: 5 + preview_words_after: 10 + tokenizer_separator: /[\s/]+/ + rel_url: true + button: false + +# Navigation +nav_sort: case_insensitive + +# Collections for pages +collections: + api: + permalink: "/:collection/:path/" + output: true + +# Default front matter +defaults: + - scope: + path: "" + type: "api" + values: + layout: "default" + parent: "API Reference" + +# Plugins +plugins: + - jekyll-seo-tag + - jekyll-sitemap + +# Exclude from processing +exclude: + - Gemfile + - Gemfile.lock + - node_modules + - vendor diff --git a/docs/api-reference.md b/docs/api-reference.md new file mode 100644 index 0000000..79598cf --- /dev/null +++ b/docs/api-reference.md @@ -0,0 +1,66 @@ +--- +layout: default +title: API Reference +nav_order: 3 +has_children: true +--- + +# API Reference +{: .no_toc } + +Complete documentation for all public methods in the StringManipulation library. +{: .fs-6 .fw-300 } + +--- + +## Overview + +The `StringManipulation` class provides static methods for high-performance string operations. All methods are designed with O(n) complexity for predictable, scalable performance. + +```php +use MarjovanLier\StringManipulation\StringManipulation; +``` + +--- + +## Methods at a Glance + +| Method | Description | Performance | +|:----------------------------------------------------------------------|:---------------------------------------------|:-----------------| +| [`removeAccents()`]({{ site.baseurl }}/api-reference/remove-accents/) | Strip accents and diacritics from text | ~450,000 ops/sec | +| [`searchWords()`]({{ site.baseurl }}/api-reference/search-words/) | Transform text for search optimisation | ~195,000 ops/sec | +| [`nameFix()`]({{ site.baseurl }}/api-reference/name-fix/) | Standardise names with proper capitalisation | ~130,000 ops/sec | +| [`utf8Ansi()`]({{ site.baseurl }}/api-reference/utf8-ansi/) | Convert UTF-8 to ANSI encoding | - | +| [`isValidDate()`]({{ site.baseurl }}/api-reference/is-valid-date/) | Validate date strings with format checking | - | +| [`strReplace()`]({{ site.baseurl }}/api-reference/str-replace/) | Optimised string replacement | - | +| [`trim()`]({{ site.baseurl }}/api-reference/trim/) | Remove characters from string ends | - | + +--- + +## Null Handling + +Methods handle null input consistently: + +| Method | Null Input | Returns | +|:-------|:-----------|:--------| +| `searchWords()` | `null` | `null` | +| `nameFix()` | `null` | `null` | +| `utf8Ansi()` | `null` | `''` (empty string) | +| `removeAccents()` | N/A | Requires non-null string | +| `isValidDate()` | N/A | Requires non-null string | + +--- + +## Type Signatures + +All methods use strict typing (`declare(strict_types=1)`): + +```php +public static function removeAccents(string $str): string; +public static function searchWords(?string $words): ?string; +public static function nameFix(#[\SensitiveParameter] ?string $lastName): ?string; +public static function utf8Ansi(?string $value = ''): string; +public static function isValidDate(string $date, string $format = 'Y-m-d H:i:s'): bool; +public static function strReplace(array|string $search, array|string $replace, string $subject): string; +public static function trim(string $string, string $characters = " \t\n\r\0\x0B"): string; +``` diff --git a/docs/api-reference/is-valid-date.md b/docs/api-reference/is-valid-date.md new file mode 100644 index 0000000..99b6045 --- /dev/null +++ b/docs/api-reference/is-valid-date.md @@ -0,0 +1,231 @@ +--- +layout: default +title: isValidDate() +parent: API Reference +nav_order: 5 +--- + +# isValidDate() +{: .no_toc } + +Validates date strings against specified formats with logical consistency checks. +{: .fs-6 .fw-300 } + +## Table of contents +{: .no_toc .text-delta } + +1. TOC +{:toc} + +--- + +## Signature + +```php +public static function isValidDate(string $date, string $format = 'Y-m-d H:i:s'): bool +``` + +## Parameters + +| Parameter | Type | Default | Description | +|:----------|:-----|:--------|:------------| +| `$date` | `string` | - | The date string to validate | +| `$format` | `string` | `'Y-m-d H:i:s'` | The expected date format (PHP date format) | + +## Returns + +`bool` - `true` if the date is valid, `false` otherwise. + +--- + +## Description + +The `isValidDate()` method provides comprehensive date validation beyond simple format checking. It validates: + +1. **Format conformance** - Date matches the specified format pattern +2. **Logical date validity** - Day exists in month (e.g., no February 30th) +3. **Time component validity**: + - Hours: 0-23 + - Minutes: 0-59 + - Seconds: 0-59 + +This prevents accepting dates like `2023-02-30` or `2023-12-25 25:00:00` that would be accepted by loose parsing. + +--- + +## Examples + +### Basic Usage + +```php +use MarjovanLier\StringManipulation\StringManipulation; + +$isValid = StringManipulation::isValidDate('2023-12-25 12:30:00'); +echo $isValid ? 'Valid' : 'Invalid'; // Output: Valid +``` + +### Date Only + +```php +StringManipulation::isValidDate('2023-12-25', 'Y-m-d'); +// Result: true + +StringManipulation::isValidDate('25-12-2023', 'd-m-Y'); +// Result: true + +StringManipulation::isValidDate('12/25/2023', 'm/d/Y'); +// Result: true +``` + +### Invalid Dates + +```php +// February 30th doesn't exist +StringManipulation::isValidDate('2023-02-30', 'Y-m-d'); +// Result: false + +// Month 13 doesn't exist +StringManipulation::isValidDate('2023-13-01', 'Y-m-d'); +// Result: false + +// Day 32 doesn't exist +StringManipulation::isValidDate('2023-01-32', 'Y-m-d'); +// Result: false +``` + +### Invalid Times + +```php +// Hour 25 doesn't exist +StringManipulation::isValidDate('2023-12-25 25:00:00'); +// Result: false + +// Minute 60 doesn't exist +StringManipulation::isValidDate('2023-12-25 12:60:00'); +// Result: false + +// Second 60 doesn't exist +StringManipulation::isValidDate('2023-12-25 12:30:60'); +// Result: false +``` + +### Leap Year Handling + +```php +// 2024 is a leap year +StringManipulation::isValidDate('2024-02-29', 'Y-m-d'); +// Result: true + +// 2023 is not a leap year +StringManipulation::isValidDate('2023-02-29', 'Y-m-d'); +// Result: false +``` + +--- + +## Common Format Patterns + +| Format | Example | Description | +|:-------|:--------|:------------| +| `Y-m-d H:i:s` | `2023-12-25 12:30:00` | Full datetime (default) | +| `Y-m-d` | `2023-12-25` | Date only | +| `d-m-Y` | `25-12-2023` | European date format | +| `m/d/Y` | `12/25/2023` | US date format | +| `Y-m-d H:i` | `2023-12-25 12:30` | Datetime without seconds | +| `d/m/Y H:i:s` | `25/12/2023 12:30:00` | European datetime | + +--- + +## Use Cases + +### Form Validation + +```php +class DateValidator +{ + public function validateBirthDate(string $date): bool + { + if (!StringManipulation::isValidDate($date, 'Y-m-d')) { + return false; + } + + // Additional business rules + $birthDate = new DateTime($date); + $now = new DateTime(); + + return $birthDate < $now; + } +} +``` + +### API Input Validation + +```php +function validateEventRequest(array $request): array +{ + $errors = []; + + if (!StringManipulation::isValidDate($request['start_date'] ?? '')) { + $errors[] = 'Invalid start date format'; + } + + if (!StringManipulation::isValidDate($request['end_date'] ?? '')) { + $errors[] = 'Invalid end date format'; + } + + return $errors; +} +``` + +### Data Import Validation + +```php +function validateImportRow(array $row, int $lineNumber): array +{ + $errors = []; + + // Try multiple date formats + $formats = ['Y-m-d', 'd-m-Y', 'm/d/Y']; + $dateValid = false; + + foreach ($formats as $format) { + if (StringManipulation::isValidDate($row['date'], $format)) { + $dateValid = true; + break; + } + } + + if (!$dateValid) { + $errors[] = "Line {$lineNumber}: Invalid date format"; + } + + return $errors; +} +``` + +--- + +## Edge Cases + +```php +// Empty string +StringManipulation::isValidDate('', 'Y-m-d'); +// Result: false + +// Wrong format +StringManipulation::isValidDate('2023-12-25', 'd-m-Y'); +// Result: false (format mismatch) + +// Partial match +StringManipulation::isValidDate('2023-12', 'Y-m-d'); +// Result: false +``` + +--- + +## Related Methods + +This method is standalone but often used alongside: + +- [`searchWords()`]({{ site.baseurl }}/api-reference/search-words/) - For normalising date-containing text +- [`trim()`]({{ site.baseurl }}/api-reference/trim/) - For cleaning input before validation diff --git a/docs/api-reference/name-fix.md b/docs/api-reference/name-fix.md new file mode 100644 index 0000000..956570b --- /dev/null +++ b/docs/api-reference/name-fix.md @@ -0,0 +1,200 @@ +--- +layout: default +title: nameFix() +parent: API Reference +nav_order: 3 +--- + +# nameFix() +{: .no_toc } + +Standardises names to conform to specific naming conventions with proper capitalisation. +{: .fs-6 .fw-300 } + +## Table of contents +{: .no_toc .text-delta } + +1. TOC +{:toc} + +--- + +## Signature + +```php +public static function nameFix(#[\SensitiveParameter] ?string $lastName): ?string +``` + +## Parameters + +| Parameter | Type | Description | +|:------------|:----------|:--------------------------------------------------------| +| `$lastName` | `?string` | The name to standardise. Marked as sensitive parameter. | + +## Returns + +`?string` - The standardised name, or `null` if input was null. + +--- + +## Description + +The `nameFix()` method standardises names by applying consistent capitalisation rules and handling various naming conventions correctly. It performs: + +1. **UTF-8 to ANSI conversion** - For consistent character handling +2. **Accent removal** - Normalises characters +3. **Mc/Mac prefix handling** - Adds proper spacing and capitalisation +4. **Hyphenated name handling** - Capitalises each part +5. **Common prefix correction** - Handles van, von, de, du, la, le, etc. +6. **Space normalisation** - Reduces multiple spaces + +### Performance + +- **~130,000 operations per second** +- Consolidated regex operations +- Optimised prefix handling + +### Security + +The parameter is marked with `#[\SensitiveParameter]` to prevent name data from appearing in stack traces and error logs. + +--- + +## Examples + +### Basic Usage + +```php +use MarjovanLier\StringManipulation\StringManipulation; + +$result = StringManipulation::nameFix('mcdonald'); +echo $result; // Output: McDonald +``` + +### Scottish/Irish Names + +```php +// Mc prefix +StringManipulation::nameFix('mcdonald'); +// Output: McDonald + +// Mac prefix +StringManipulation::nameFix('macdonald'); +// Output: MacDonald + +// O' prefix +StringManipulation::nameFix("o'brien"); +// Output: O'Brien +``` + +### Dutch/German Prefixes + +```php +// van +StringManipulation::nameFix('van der waals'); +// Output: van der Waals + +// von +StringManipulation::nameFix('von neumann'); +// Output: von Neumann + +// de +StringManipulation::nameFix('de souza'); +// Output: de Souza +``` + +### Hyphenated Names + +```php +StringManipulation::nameFix('smith-jones'); +// Output: Smith-Jones + +StringManipulation::nameFix("o'brien-smith"); +// Output: O'Brien-Smith +``` + +### Null Handling + +```php +$result = StringManipulation::nameFix(null); +// Result: null +``` + +--- + +## Prefix Handling + +The method handles these common prefixes by keeping them lowercase: + +| Prefix | Example Input | Output | +|:-------|:--------------|:-------| +| van | `van berg` | `van Berg` | +| von | `von stein` | `von Stein` | +| de | `de silva` | `de Silva` | +| du | `du pont` | `du Pont` | +| la | `la rue` | `la Rue` | +| le | `le blanc` | `le Blanc` | +| der | `van der berg` | `van der Berg` | +| den | `van den berg` | `van den Berg` | + +--- + +## Use Cases + +### User Registration + +```php +class UserRegistration +{ + public function normaliseUserName(string $lastName): string + { + $normalised = StringManipulation::nameFix($lastName); + + if ($normalised === null) { + throw new InvalidArgumentException('Last name is required'); + } + + return $normalised; + } +} + +$registration = new UserRegistration(); +$name = $registration->normaliseUserName('VAN DER BERG'); +// Result: van der Berg +``` + +### Data Import Standardisation + +```php +function standardiseNames(array $users): array +{ + return array_map(function ($user) { + return [ + ...$user, + 'last_name' => StringManipulation::nameFix($user['last_name']), + ]; + }, $users); +} +``` + +### Display Formatting + +```php +function formatDisplayName(string $firstName, string $lastName): string +{ + $standardisedLast = StringManipulation::nameFix($lastName); + + return sprintf('%s %s', ucfirst($firstName), $standardisedLast); +} + +echo formatDisplayName('john', 'mcdonald'); +// Output: John McDonald +``` + +--- + +## Related Methods + +- [`searchWords()`]({{ site.baseurl }}/api-reference/search-words/) - Includes name fixing with search optimisation +- [`removeAccents()`]({{ site.baseurl }}/api-reference/remove-accents/) - Accent removal only +- [`utf8Ansi()`]({{ site.baseurl }}/api-reference/utf8-ansi/) - Encoding conversion diff --git a/docs/api-reference/remove-accents.md b/docs/api-reference/remove-accents.md new file mode 100644 index 0000000..a539ef9 --- /dev/null +++ b/docs/api-reference/remove-accents.md @@ -0,0 +1,151 @@ +--- +layout: default +title: removeAccents() +parent: API Reference +nav_order: 1 +--- + +# removeAccents() +{: .no_toc } + +Strips accents and special characters from strings to normalise text. +{: .fs-6 .fw-300 } + +## Table of contents +{: .no_toc .text-delta } + +1. TOC +{:toc} + +--- + +## Signature + +```php +public static function removeAccents(string $str): string +``` + +## Parameters + +| Parameter | Type | Description | +|:----------|:---------|:------------------------------------------------| +| `$str` | `string` | The input string containing accented characters | + +## Returns + +`string` - The input string with all accents and diacritics removed. + +--- + +## Description + +The `removeAccents()` method efficiently strips accents and diacritical marks from strings, making text easier to search, compare, and index. This is essential for: + +- **Search functionality** - Match "cafe" when users search for "cafe" +- **URL slugs** - Generate clean URLs from titles +- **Data normalisation** - Standardise text for comparison + +### Performance + +- **~450,000 operations per second** +- Uses hash table lookups with `strtr()` for O(1) character replacement +- Static caching of character mapping tables + +### Character Coverage + +Supports 266+ accented and special characters including: +- Latin characters (a, e, i, o, u with various diacritics) +- Extended Latin (Ae, Oe, ss, etc.) +- Greek characters +- Currency symbols +- Special typographic characters + +--- + +## Examples + +### Basic Usage + +```php +use MarjovanLier\StringManipulation\StringManipulation; + +$result = StringManipulation::removeAccents('Creme Brulee'); +echo $result; // Output: Creme Brulee +``` + +### International Text + +```php +// French +StringManipulation::removeAccents('francais'); +// Output: francais + +// German +StringManipulation::removeAccents('Munchen Ubung'); +// Output: Munchen Ubung + +// Spanish +StringManipulation::removeAccents('Espanol manana'); +// Output: Espanol manana + +// Portuguese +StringManipulation::removeAccents('Sao Pauloacao'); +// Output: Sao Paulo acao +``` + +### Special Characters + +```php +// Ligatures +StringManipulation::removeAccents('AEsop OEuvre'); +// Output: AEsop OEuvre + +// Nordic characters +StringManipulation::removeAccents('Malmo Oslo'); +// Output: Malmo Oslo + +// Eastern European +StringManipulation::removeAccents('Praha Lodz'); +// Output: Praha Lodz +``` + +--- + +## Use Cases + +### Search Index Generation + +```php +function generateSearchIndex(string $text): string +{ + return strtolower( + StringManipulation::removeAccents($text) + ); +} + +$title = "Cafe Creme - Munchen"; +$searchIndex = generateSearchIndex($title); +// Result: cafe creme - munchen +``` + +### URL Slug Creation + +```php +function createSlug(string $title): string +{ + $normalised = StringManipulation::removeAccents($title); + $lowercase = strtolower($normalised); + $slug = preg_replace('/[^a-z0-9]+/', '-', $lowercase); + return trim($slug, '-'); +} + +$slug = createSlug('Cafe Creme Brulee'); +// Result: cafe-creme-brulee +``` + +--- + +## Related Methods + +- [`searchWords()`]({{ site.baseurl }}/api-reference/search-words/) - Combines accent removal with search optimisation +- [`utf8Ansi()`]({{ site.baseurl }}/api-reference/utf8-ansi/) - Converts UTF-8 to ANSI encoding diff --git a/docs/api-reference/search-words.md b/docs/api-reference/search-words.md new file mode 100644 index 0000000..52cb47d --- /dev/null +++ b/docs/api-reference/search-words.md @@ -0,0 +1,177 @@ +--- +layout: default +title: searchWords() +parent: API Reference +nav_order: 2 +--- + +# searchWords() +{: .no_toc } + +Transforms strings into a search-optimised format ideal for database queries. +{: .fs-6 .fw-300 } + +## Table of contents +{: .no_toc .text-delta } + +1. TOC +{:toc} + +--- + +## Signature + +```php +public static function searchWords(?string $words): ?string +``` + +## Parameters + +| Parameter | Type | Description | +|:----------|:----------|:------------------------------------------------------| +| `$words` | `?string` | The input string to optimise for search. Can be null. | + +## Returns + +`?string` - The search-optimised string, or `null` if input was null. + +--- + +## Description + +The `searchWords()` method transforms text into a format optimised for database searching. It performs multiple transformations in a single pass: + +1. **Name fixing** - Applies naming conventions (Mc/Mac prefixes) +2. **Lowercase conversion** - For case-insensitive matching +3. **Special character replacement** - Converts punctuation and symbols to spaces +4. **Accent removal** - Strips diacritics for normalised matching +5. **Space normalisation** - Reduces multiple spaces to single space + +### Performance + +- **~195,000 operations per second** +- O(n) single-pass algorithm +- Combined character mapping for efficiency + +--- + +## Examples + +### Basic Usage + +```php +use MarjovanLier\StringManipulation\StringManipulation; + +$result = StringManipulation::searchWords('Hello_World'); +echo $result; // Output: hello world +``` + +### Email Addresses + +```php +StringManipulation::searchWords('John_Doe@Example.com'); +// Output: john doe example com +``` + +### Accented Text + +```php +StringManipulation::searchWords('Cafe Munchen'); +// Output: cafe munchen +``` + +### Names with Prefixes + +```php +StringManipulation::searchWords('McDonald van der Berg'); +// Output: mcdonald van der berg +``` + +### Null Handling + +```php +$result = StringManipulation::searchWords(null); +// Result: null +``` + +--- + +## Use Cases + +### Database Search Column + +Store a search-optimised version alongside the original: + +```php +class User +{ + public string $lastName; + public string $lastNameSearch; + + public function setLastName(string $name): void + { + $this->lastName = $name; + $this->lastNameSearch = StringManipulation::searchWords($name); + } +} + +// Usage +$user->setLastName("O'Brien-McDonald"); +// $user->lastName = "O'Brien-McDonald" +// $user->lastNameSearch = "o brien mcdonald" + +// Query: WHERE last_name_search LIKE '%brien%' +``` + +### Full-Text Search Preparation + +```php +function prepareForSearch(string $query): string +{ + $optimised = StringManipulation::searchWords($query); + + // Split into individual terms + $terms = explode(' ', $optimised); + + // Filter empty terms + return implode(' ', array_filter($terms)); +} + +$searchQuery = prepareForSearch("Cafe & Restaurant - Munchen"); +// Result: cafe restaurant munchen +``` + +### Data Import Normalisation + +```php +function normaliseImportedData(array $records): array +{ + return array_map(function ($record) { + return [ + 'name' => $record['name'], + 'name_searchable' => StringManipulation::searchWords($record['name']), + 'address' => $record['address'], + 'address_searchable' => StringManipulation::searchWords($record['address']), + ]; + }, $records); +} +``` + +--- + +## Transformations Applied + +| Input | Output | Transformation | +|:------|:-------|:---------------| +| `Hello_World` | `hello world` | Underscore to space, lowercase | +| `test@example.com` | `test example com` | Special chars to space | +| `Cafe` | `cafe` | Accent removal, lowercase | +| `McDonald` | `mcdonald` | Name fixing, lowercase | +| `"Hello, World!"` | `hello world` | Punctuation removal | + +--- + +## Related Methods + +- [`nameFix()`]({{ site.baseurl }}/api-reference/name-fix/) - Name standardisation only +- [`removeAccents()`]({{ site.baseurl }}/api-reference/remove-accents/) - Accent removal only diff --git a/docs/api-reference/str-replace.md b/docs/api-reference/str-replace.md new file mode 100644 index 0000000..d1df815 --- /dev/null +++ b/docs/api-reference/str-replace.md @@ -0,0 +1,167 @@ +--- +layout: default +title: strReplace() +parent: API Reference +nav_order: 6 +--- + +# strReplace() +{: .no_toc } + +Optimised string replacement utility with performance enhancements. +{: .fs-6 .fw-300 } + +## Table of contents +{: .no_toc .text-delta } + +1. TOC +{:toc} + +--- + +## Signature + +```php +public static function strReplace( + array|string $search, + array|string $replace, + string $subject +): string +``` + +## Parameters + +| Parameter | Type | Description | +|:----------|:-----|:------------| +| `$search` | `array\|string` | The value(s) to search for | +| `$replace` | `array\|string` | The replacement value(s) | +| `$subject` | `string` | The string to perform replacements on | + +## Returns + +`string` - The string with replacements applied. Returns empty string if subject is empty. + +--- + +## Description + +The `strReplace()` method is an optimised wrapper around PHP's native `str_replace()` with additional performance enhancements: + +- **Single-character optimisation** - Uses `strtr()` for single-character replacements (faster) +- **Early return** - Returns immediately for empty subjects +- **Type safety** - Enforces string return type + +--- + +## Examples + +### Basic Usage + +```php +use MarjovanLier\StringManipulation\StringManipulation; + +$result = StringManipulation::strReplace('world', 'PHP', 'Hello world'); +echo $result; // Output: Hello PHP +``` + +### Multiple Replacements + +```php +$search = ['one', 'two', 'three']; +$replace = ['1', '2', '3']; +$subject = 'one two three'; + +$result = StringManipulation::strReplace($search, $replace, $subject); +echo $result; // Output: 1 2 3 +``` + +### Character Replacement + +```php +// Single character replacement (uses optimised strtr) +$result = StringManipulation::strReplace('_', ' ', 'hello_world'); +echo $result; // Output: hello world +``` + +### Array Search with Single Replace + +```php +$search = ['a', 'e', 'i', 'o', 'u']; +$replace = '*'; +$subject = 'Hello World'; + +$result = StringManipulation::strReplace($search, $replace, $subject); +echo $result; // Output: H*ll* W*rld +``` + +### Empty Subject + +```php +$result = StringManipulation::strReplace('foo', 'bar', ''); +echo $result; // Output: '' (empty string) +``` + +--- + +## Use Cases + +### Template Processing + +```php +function processTemplate(string $template, array $variables): string +{ + $search = array_map(fn($key) => '{{' . $key . '}}', array_keys($variables)); + $replace = array_values($variables); + + return StringManipulation::strReplace($search, $replace, $template); +} + +$template = 'Hello {{name}}, welcome to {{site}}!'; +$result = processTemplate($template, [ + 'name' => 'John', + 'site' => 'Example.com', +]); +// Result: Hello John, welcome to Example.com! +``` + +### Sanitisation + +```php +function sanitiseFilename(string $filename): string +{ + $unsafe = ['/', '\\', ':', '*', '?', '"', '<', '>', '|']; + $safe = '_'; + + return StringManipulation::strReplace($unsafe, $safe, $filename); +} + +$filename = sanitiseFilename('report:2023/12.pdf'); +// Result: report_2023_12.pdf +``` + +### Text Normalisation + +```php +function normaliseWhitespace(string $text): string +{ + $whitespace = ["\r\n", "\r", "\t"]; + $space = ' '; + + return StringManipulation::strReplace($whitespace, $space, $text); +} +``` + +--- + +## Performance Notes + +- For single-character replacements, `strtr()` is used internally for better performance +- For multiple replacements, standard `str_replace()` is used +- Empty subject strings return immediately without processing + +--- + +## Related Methods + +- [`removeAccents()`]({{ site.baseurl }}/api-reference/remove-accents/) - Uses strReplace internally +- [`searchWords()`]({{ site.baseurl }}/api-reference/search-words/) - Uses strReplace for character normalisation diff --git a/docs/api-reference/trim.md b/docs/api-reference/trim.md new file mode 100644 index 0000000..c09c433 --- /dev/null +++ b/docs/api-reference/trim.md @@ -0,0 +1,189 @@ +--- +layout: default +title: trim() +parent: API Reference +nav_order: 7 +--- + +# trim() +{: .no_toc } + +Removes specified characters from the beginning and end of a string. +{: .fs-6 .fw-300 } + +## Table of contents +{: .no_toc .text-delta } + +1. TOC +{:toc} + +--- + +## Signature + +```php +public static function trim(string $string, string $characters = " \t\n\r\0\x0B"): string +``` + +## Parameters + +| Parameter | Type | Default | Description | +|:----------|:-----|:--------|:------------| +| `$string` | `string` | - | The input string to trim | +| `$characters` | `string` | `" \t\n\r\0\x0B"` | Characters to remove from both ends | + +## Returns + +`string` - The trimmed string. + +--- + +## Description + +The `trim()` method removes specified characters from both the beginning and end of a string. By default, it removes common whitespace characters: + +| Character | Description | +|:----------|:------------| +| ` ` | Space | +| `\t` | Tab | +| `\n` | Newline (line feed) | +| `\r` | Carriage return | +| `\0` | Null byte | +| `\x0B` | Vertical tab | + +This method provides more explicit control over character removal compared to PHP's built-in `trim()`. + +--- + +## Examples + +### Basic Usage + +```php +use MarjovanLier\StringManipulation\StringManipulation; + +$result = StringManipulation::trim(' Hello World '); +echo $result; // Output: Hello World +``` + +### Custom Characters + +```php +// Remove specific characters +$result = StringManipulation::trim('###Hello###', '#'); +echo $result; // Output: Hello + +// Remove multiple custom characters +$result = StringManipulation::trim('***Hello***', '*#'); +echo $result; // Output: Hello +``` + +### Whitespace Handling + +```php +// Tabs and newlines +$input = "\t\nHello World\n\t"; +$result = StringManipulation::trim($input); +echo $result; // Output: Hello World + +// Mixed whitespace +$input = " \t Hello \n "; +$result = StringManipulation::trim($input); +echo $result; // Output: Hello +``` + +### URL Path Cleaning + +```php +$path = '/api/users/'; +$result = StringManipulation::trim($path, '/'); +echo $result; // Output: api/users +``` + +--- + +## Use Cases + +### Form Input Cleaning + +```php +function cleanFormInput(array $input): array +{ + return array_map(function ($value) { + if (is_string($value)) { + return StringManipulation::trim($value); + } + return $value; + }, $input); +} + +$input = [ + 'name' => ' John Doe ', + 'email' => ' john@example.com ', +]; + +$cleaned = cleanFormInput($input); +// Result: ['name' => 'John Doe', 'email' => 'john@example.com'] +``` + +### CSV Parsing + +```php +function parseCSVLine(string $line): array +{ + $fields = explode(',', $line); + + return array_map(function ($field) { + // Remove quotes and whitespace + return StringManipulation::trim($field, " \t\n\r\"'"); + }, $fields); +} + +$line = '"John" , "Doe" , "john@example.com"'; +$fields = parseCSVLine($line); +// Result: ['John', 'Doe', 'john@example.com'] +``` + +### Path Normalisation + +```php +function normalisePath(string $path): string +{ + // Remove trailing slashes + $path = StringManipulation::trim($path, '/\\'); + + // Ensure leading slash + return '/' . $path; +} + +$path = normalisePath('/api/users//'); +// Result: /api/users +``` + +### Log Message Cleaning + +```php +function cleanLogMessage(string $message): string +{ + // Remove control characters and excessive whitespace + return StringManipulation::trim($message, " \t\n\r\0\x0B\x1B"); +} +``` + +--- + +## Comparison with PHP trim() + +| Feature | `StringManipulation::trim()` | PHP `trim()` | +|:--------|:-----------------------------|:-------------| +| Type safety | Explicit string parameter | Mixed input | +| Return type | Guaranteed string | String (with type juggling) | +| Default chars | Same whitespace set | Same whitespace set | +| Custom chars | Second parameter | Second parameter | + +--- + +## Related Methods + +- [`strReplace()`]({{ site.baseurl }}/api-reference/str-replace/) - For replacing characters within strings +- [`searchWords()`]({{ site.baseurl }}/api-reference/search-words/) - Includes trimming in normalisation diff --git a/docs/api-reference/utf8-ansi.md b/docs/api-reference/utf8-ansi.md new file mode 100644 index 0000000..0ce5902 --- /dev/null +++ b/docs/api-reference/utf8-ansi.md @@ -0,0 +1,168 @@ +--- +layout: default +title: utf8Ansi() +parent: API Reference +nav_order: 4 +--- + +# utf8Ansi() +{: .no_toc } + +Converts UTF-8 encoded characters to their ANSI equivalents. +{: .fs-6 .fw-300 } + +## Table of contents +{: .no_toc .text-delta } + +1. TOC +{:toc} + +--- + +## Signature + +```php +public static function utf8Ansi(?string $value = ''): string +``` + +## Parameters + +| Parameter | Type | Default | Description | +|:----------|:-----|:--------|:------------| +| `$value` | `?string` | `''` | The UTF-8 encoded string to convert | + +## Returns + +`string` - The ANSI-compatible string. Returns empty string for null input. + +--- + +## Description + +The `utf8Ansi()` method converts UTF-8 encoded characters to their ANSI (Windows-1252) equivalents. This is essential for: + +- **Legacy system integration** - Older systems that don't support UTF-8 +- **File exports** - Creating files for systems with limited encoding support +- **Database compatibility** - Working with older database configurations + +### Character Mapping + +The method uses a comprehensive mapping array covering 60+ common character conversions, including: + +- Accented vowels (a, e, i, o, u) +- Special consonants (c, n, ss) +- Currency and typographic symbols +- Extended Latin characters + +--- + +## Examples + +### Basic Usage + +```php +use MarjovanLier\StringManipulation\StringManipulation; + +$result = StringManipulation::utf8Ansi('Uber'); +echo $result; // Output: Uber +``` + +### German Characters + +```php +StringManipulation::utf8Ansi('Munchen'); +// Output: Munchen + +StringManipulation::utf8Ansi('Strasse'); +// Output: Strasse (ss is converted) +``` + +### French Characters + +```php +StringManipulation::utf8Ansi('francais'); +// Output: francais + +StringManipulation::utf8Ansi('cafe'); +// Output: cafe +``` + +### Null Handling + +```php +$result = StringManipulation::utf8Ansi(null); +// Result: '' (empty string) + +$result = StringManipulation::utf8Ansi(); +// Result: '' (empty string) +``` + +--- + +## Use Cases + +### Legacy File Export + +```php +function exportToLegacyFormat(array $records, string $filename): void +{ + $handle = fopen($filename, 'w'); + + foreach ($records as $record) { + $line = StringManipulation::utf8Ansi($record['name']); + fwrite($handle, $line . "\n"); + } + + fclose($handle); +} +``` + +### Database Migration + +```php +function migrateToLegacyDatabase(PDO $legacy, array $users): void +{ + $stmt = $legacy->prepare('INSERT INTO users (name) VALUES (?)'); + + foreach ($users as $user) { + $ansiName = StringManipulation::utf8Ansi($user['name']); + $stmt->execute([$ansiName]); + } +} +``` + +### API Response Formatting + +```php +function formatForLegacyApi(array $data): array +{ + return array_map(function ($item) { + return [ + 'id' => $item['id'], + 'name' => StringManipulation::utf8Ansi($item['name']), + 'description' => StringManipulation::utf8Ansi($item['description']), + ]; + }, $data); +} +``` + +--- + +## Character Conversion Examples + +| UTF-8 Input | ANSI Output | +|:------------|:------------| +| `a` | `a` | +| `e` | `e` | +| `u` | `u` | +| `o` | `o` | +| `c` | `c` | +| `n` | `n` | +| `ss` | `ss` | + +--- + +## Related Methods + +- [`removeAccents()`]({{ site.baseurl }}/api-reference/remove-accents/) - Removes accents without encoding conversion +- [`nameFix()`]({{ site.baseurl }}/api-reference/name-fix/) - Uses utf8Ansi internally for name processing diff --git a/docs/assets/images/logo.svg b/docs/assets/images/logo.svg new file mode 100644 index 0000000..595247e --- /dev/null +++ b/docs/assets/images/logo.svg @@ -0,0 +1,4 @@ + + + S + diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000..e128948 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,332 @@ +--- +layout: default +title: Contributing +nav_order: 6 +--- + +# Contributing +{: .no_toc } + +Guidelines for contributing to the StringManipulation library. +{: .fs-6 .fw-300 } + +## Table of contents +{: .no_toc .text-delta } + +1. TOC +{:toc} + +--- + +## Getting Started + +We welcome contributions to the StringManipulation library! Whether you're fixing bugs, adding features, improving documentation, or writing tests, your help is appreciated. + +### Prerequisites + +- PHP 8.3 or later +- Docker and Docker Compose (recommended for testing) +- Git +- Composer + +### Setting Up Development Environment + +1. **Fork the repository** on GitHub + +2. **Clone your fork**: + ```bash + git clone https://github.com/YOUR-USERNAME/StringManipulation.git + cd StringManipulation + ``` + +3. **Install dependencies**: + ```bash + composer install + ``` + +4. **Verify the setup**: + ```bash + docker-compose run --rm test-all + ``` + +--- + +## Development Workflow + +### Creating a Branch + +Create a feature branch from `main`: + +```bash +git checkout main +git pull origin main +git checkout -b feature/your-feature-name +``` + +Use descriptive branch names: +- `feature/add-new-method` +- `fix/handle-null-input` +- `docs/update-examples` +- `test/improve-coverage` + +### Making Changes + +1. **Write tests first** - Follow TDD principles +2. **Implement your changes** +3. **Ensure all tests pass** +4. **Update documentation** if needed +5. **Commit with conventional messages** + +--- + +## Code Standards + +### PHP Standards + +- **Strict typing**: All files must include `declare(strict_types=1);` +- **PHP 8.3+**: Use modern PHP features +- **PSR-4 autoloading**: Follow namespace conventions +- **Final classes**: Prefer final classes with static methods +- **Typed parameters**: Always use explicit type declarations +- **Docblocks**: Comprehensive documentation for public methods + +### Style Guidelines + +The project uses Laravel Pint with the "per" preset: + +```bash +# Check code style +docker-compose run --rm test-code-style + +# Fix code style automatically +docker-compose run --rm tests ./vendor/bin/pint +``` + +### Example Method Structure + +```php +/** + * Brief description of what the method does. + * + * Longer description if needed, explaining the algorithm + * or any important considerations. + * + * @param string $input The input parameter description + * + * @return string The return value description + * + * @example + * $result = StringManipulation::methodName('example'); + * // Returns: 'processed example' + */ +public static function methodName(string $input): string +{ + // Implementation +} +``` + +--- + +## Testing Requirements + +### Running Tests + +```bash +# Run all tests (recommended) +docker-compose run --rm test-all + +# Run Pest tests only +docker-compose run --rm tests ./vendor/bin/pest + +# Run specific test file +docker-compose run --rm tests ./vendor/bin/pest tests/Unit/YourTest.php + +# Run with coverage +docker-compose run --rm tests ./vendor/bin/pest --coverage +``` + +### Test Categories + +| Test Type | Location | Purpose | +|:----------|:---------|:--------| +| Unit tests | `tests/Unit/` | Test individual methods | +| Benchmark tests | `tests/Benchmark/` | Performance verification | + +### Writing Tests + +Use Pest PHP syntax: + +```php +toBe('expected'); + }); + + it('handles null input', function (): void { + $result = StringManipulation::methodName(null); + + expect($result)->toBeNull(); + }); + + it('handles edge cases', function (): void { + expect(StringManipulation::methodName(''))->toBe(''); + expect(StringManipulation::methodName(' '))->toBe(' '); + }); +}); +``` + +### Coverage Requirements + +- **100% line coverage** for new methods +- **Edge cases** must be tested +- **Null handling** must be verified +- **Performance tests** for O(n) verification + +--- + +## Static Analysis + +The project uses three static analysis tools: + +### PHPStan (Level Max) + +```bash +docker-compose run --rm test-phpstan +``` + +Requirements: +- No errors at level max +- Strict rules enabled +- All types must be inferable + +### Psalm (Level 1) + +```bash +docker-compose run --rm test-psalm +``` + +Requirements: +- No errors at level 1 +- 99.95%+ type coverage + +### Phan + +```bash +docker-compose run --rm test-phan +``` + +Requirements: +- Clean analysis results + +--- + +## Commit Guidelines + +Use conventional commit messages: + +``` +type(scope): subject + +body (optional) + +footer (optional) +``` + +### Types + +| Type | Description | +|:-----|:------------| +| `feat` | New feature | +| `fix` | Bug fix | +| `docs` | Documentation only | +| `style` | Code style changes | +| `refactor` | Code refactoring | +| `perf` | Performance improvement | +| `test` | Adding/updating tests | +| `chore` | Maintenance tasks | + +### Examples + +``` +feat(nameFix): add support for Portuguese prefixes + +Add handling for 'da', 'das', 'do', 'dos' prefixes +commonly found in Portuguese surnames. +``` + +``` +fix(isValidDate): handle edge case for February 29 + +Correctly validate leap year dates by checking +the actual year, not just the format. +``` + +--- + +## Pull Request Process + +1. **Create your PR** against the `main` branch + +2. **Fill out the PR template** with: + - Description of changes + - Related issue (if any) + - Testing performed + - Checklist completion + +3. **Ensure CI passes**: + - All tests green + - Code style check passes + - Static analysis passes + - Coverage maintained + +4. **Address review feedback** promptly + +5. **Squash commits** if requested + +### PR Checklist + +- [ ] Tests added/updated +- [ ] Documentation updated +- [ ] Code style passes +- [ ] Static analysis passes +- [ ] Coverage maintained at 100% +- [ ] Conventional commit message + +--- + +## Reporting Issues + +### Bug Reports + +Include: +- PHP version +- Library version +- Minimal reproduction code +- Expected vs actual behaviour +- Error messages (if any) + +### Feature Requests + +Include: +- Use case description +- Proposed API (if applicable) +- Examples of expected behaviour +- Alternatives considered + +--- + +## Questions? + +- Open a [GitHub issue](https://github.com/MarjovanLier/StringManipulation/issues) for questions +- Check existing issues and discussions first +- Be respectful and patient + +Thank you for contributing! diff --git a/docs/examples.md b/docs/examples.md new file mode 100644 index 0000000..f3cf6ff --- /dev/null +++ b/docs/examples.md @@ -0,0 +1,440 @@ +--- +layout: default +title: Examples +nav_order: 5 +--- + +# Examples +{: .no_toc } + +Practical examples and common use cases for the StringManipulation library. +{: .fs-6 .fw-300 } + +## Table of contents +{: .no_toc .text-delta } + +1. TOC +{:toc} + +--- + +## Search Functionality + +### Building a Searchable Index + +Store normalised text alongside original values for efficient searching: + +```php +name = $name; + $this->nameSearchable = StringManipulation::searchWords($name) ?? ''; + + $this->description = $description; + $this->descriptionSearchable = StringManipulation::searchWords($description) ?? ''; + } +} + +// Usage +$product = new Product( + 'Cafe Creme Brulee', + 'Authentic French dessert with caramelised sugar top' +); + +// Search query +$query = StringManipulation::searchWords('creme brulee'); + +// Database query +$sql = "SELECT * FROM products + WHERE name_searchable LIKE :query + OR description_searchable LIKE :query"; +``` + +### Multi-Language Search + +Handle international text in search queries: + +```php +function search(string $query, PDO $db): array +{ + $normalised = StringManipulation::searchWords($query); + + if ($normalised === null || $normalised === '') { + return []; + } + + $terms = explode(' ', $normalised); + + $conditions = array_map( + fn($i) => "search_index LIKE :term{$i}", + array_keys($terms) + ); + + $sql = "SELECT * FROM items WHERE " . implode(' AND ', $conditions); + $stmt = $db->prepare($sql); + + foreach ($terms as $i => $term) { + $stmt->bindValue(":term{$i}", "%{$term}%"); + } + + $stmt->execute(); + return $stmt->fetchAll(); +} + +// Searches work regardless of accents +search('cafe', $db); // Finds 'Cafe' +search('munchen', $db); // Finds 'Munchen' +search('francais', $db); // Finds 'francais' +``` + +--- + +## User Data Processing + +### Registration Form Handling + +Standardise user input during registration: + +```php +class UserRegistrationService +{ + public function register(array $formData): User + { + $user = new User(); + + // Standardise names + $user->firstName = $this->capitaliseName($formData['first_name']); + $user->lastName = StringManipulation::nameFix($formData['last_name']); + + // Create searchable version for lookups + $user->fullNameSearch = StringManipulation::searchWords( + $user->firstName . ' ' . $user->lastName + ); + + // Validate birth date + if (!StringManipulation::isValidDate($formData['birth_date'], 'Y-m-d')) { + throw new ValidationException('Invalid birth date'); + } + $user->birthDate = $formData['birth_date']; + + return $user; + } + + private function capitaliseName(string $name): string + { + return ucwords(strtolower(trim($name))); + } +} +``` + +### Address Standardisation + +Clean and normalise address data: + +```php +class AddressNormaliser +{ + public function normalise(array $address): array + { + return [ + 'street' => StringManipulation::trim($address['street'] ?? ''), + 'city' => StringManipulation::nameFix($address['city'] ?? ''), + 'country' => StringManipulation::removeAccents($address['country'] ?? ''), + // Searchable version for geocoding/lookup + 'search_key' => StringManipulation::searchWords( + implode(' ', [ + $address['street'] ?? '', + $address['city'] ?? '', + $address['postal_code'] ?? '', + ]) + ), + ]; + } +} + +$normaliser = new AddressNormaliser(); +$address = $normaliser->normalise([ + 'street' => ' Rue de la Cafe ', + 'city' => 'munchen', + 'postal_code' => '80331', + 'country' => 'Deutschland', +]); + +// Result: +// [ +// 'street' => 'Rue de la Cafe', +// 'city' => 'Munchen', +// 'country' => 'Deutschland', +// 'search_key' => 'rue de la cafe munchen 80331', +// ] +``` + +--- + +## Data Import/Export + +### CSV Import with Validation + +Process CSV files with data cleaning: + +```php +class CsvImporter +{ + public function import(string $filepath): array + { + $handle = fopen($filepath, 'r'); + $headers = fgetcsv($handle); + $records = []; + $errors = []; + $line = 1; + + while (($row = fgetcsv($handle)) !== false) { + $line++; + $data = array_combine($headers, $row); + + try { + $records[] = $this->processRow($data, $line); + } catch (ValidationException $e) { + $errors[] = "Line {$line}: {$e->getMessage()}"; + } + } + + fclose($handle); + + return ['records' => $records, 'errors' => $errors]; + } + + private function processRow(array $data, int $line): array + { + // Validate date + if (!StringManipulation::isValidDate($data['date'], 'Y-m-d')) { + throw new ValidationException("Invalid date format"); + } + + // Clean and normalise + return [ + 'name' => StringManipulation::nameFix($data['name']), + 'name_search' => StringManipulation::searchWords($data['name']), + 'date' => $data['date'], + 'description' => StringManipulation::removeAccents( + StringManipulation::trim($data['description']) + ), + ]; + } +} +``` + +### Legacy System Export + +Convert data for systems that don't support UTF-8: + +```php +class LegacyExporter +{ + public function export(array $records, string $filepath): void + { + $handle = fopen($filepath, 'w'); + + // Write header + fwrite($handle, "NAME|DESCRIPTION|DATE\n"); + + foreach ($records as $record) { + $line = implode('|', [ + StringManipulation::utf8Ansi($record['name']), + StringManipulation::utf8Ansi($record['description']), + $record['date'], + ]); + fwrite($handle, $line . "\n"); + } + + fclose($handle); + } +} +``` + +--- + +## API Development + +### Request Validation Middleware + +Validate and clean incoming API requests: + +```php +class ValidationMiddleware +{ + public function handle(Request $request, Closure $next): Response + { + $data = $request->all(); + + // Clean string inputs + foreach ($data as $key => $value) { + if (is_string($value)) { + $data[$key] = StringManipulation::trim($value); + } + } + + // Validate dates + if (isset($data['start_date'])) { + if (!StringManipulation::isValidDate($data['start_date'], 'Y-m-d')) { + return response()->json([ + 'error' => 'Invalid start_date format. Use Y-m-d.', + ], 422); + } + } + + $request->merge($data); + + return $next($request); + } +} +``` + +### Search API Endpoint + +Build a search API with normalised queries: + +```php +class SearchController +{ + public function search(Request $request): JsonResponse + { + $query = $request->input('q', ''); + + // Normalise search query + $normalised = StringManipulation::searchWords($query); + + if ($normalised === null || strlen($normalised) < 2) { + return response()->json([ + 'error' => 'Search query too short', + ], 400); + } + + $results = $this->repository->search($normalised); + + return response()->json([ + 'query' => $query, + 'normalised_query' => $normalised, + 'results' => $results, + 'count' => count($results), + ]); + } +} +``` + +--- + +## URL and Slug Generation + +### SEO-Friendly URLs + +Create clean URL slugs from titles: + +```php +class SlugGenerator +{ + public function generate(string $title): string + { + // Remove accents + $slug = StringManipulation::removeAccents($title); + + // Convert to lowercase + $slug = strtolower($slug); + + // Replace non-alphanumeric with hyphens + $slug = preg_replace('/[^a-z0-9]+/', '-', $slug); + + // Trim hyphens + return StringManipulation::trim($slug, '-'); + } +} + +$generator = new SlugGenerator(); + +echo $generator->generate('Cafe Creme Brulee'); +// Output: cafe-creme-brulee + +echo $generator->generate('Munchen Guide 2024!'); +// Output: munchen-guide-2024 +``` + +--- + +## E-commerce + +### Product Catalogue Processing + +Standardise product data for a catalogue: + +```php +class ProductProcessor +{ + public function process(array $rawProduct): array + { + $name = StringManipulation::nameFix($rawProduct['name']); + + return [ + 'name' => $name, + 'slug' => $this->generateSlug($rawProduct['name']), + 'search_terms' => StringManipulation::searchWords( + $rawProduct['name'] . ' ' . ($rawProduct['category'] ?? '') + ), + 'description' => StringManipulation::trim($rawProduct['description'] ?? ''), + 'sku' => strtoupper(StringManipulation::trim($rawProduct['sku'] ?? '')), + ]; + } + + private function generateSlug(string $name): string + { + $slug = StringManipulation::removeAccents($name); + $slug = strtolower($slug); + $slug = preg_replace('/[^a-z0-9]+/', '-', $slug); + return trim($slug, '-'); + } +} +``` + +--- + +## Logging and Monitoring + +### Clean Log Messages + +Sanitise data before logging: + +```php +class SecureLogger +{ + public function log(string $level, string $message, array $context = []): void + { + // Clean control characters from message + $cleanMessage = StringManipulation::trim( + $message, + " \t\n\r\0\x0B\x1B" // Include escape character + ); + + // Sanitise context values + $cleanContext = array_map(function ($value) { + if (is_string($value)) { + return StringManipulation::utf8Ansi($value); + } + return $value; + }, $context); + + $this->logger->log($level, $cleanMessage, $cleanContext); + } +} +``` diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 0000000..dd6fb92 --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,169 @@ +--- +layout: default +title: Getting Started +nav_order: 2 +--- + +# Getting Started +{: .no_toc } + +Learn how to install and start using the StringManipulation library in your PHP projects. +{: .fs-6 .fw-300 } + +## Table of contents +{: .no_toc .text-delta } + +1. TOC +{:toc} + +--- + +## Installation + +Install the package via Composer: + +```bash +composer require marjovanlier/stringmanipulation +``` + +### Requirements + +- **PHP 8.3+** with strict typing +- **mbstring extension** for multi-byte string operations +- **intl extension** for internationalisation support + +--- + +## Basic Usage + +All methods are static and accessible through the `StringManipulation` class: + +```php + StringManipulation::nameFix($record['name']), + 'search' => StringManipulation::searchWords($record['name']), + ]; + } +} + +// Memory-efficient processing +foreach (processLargeDataset($database->cursor()) as $processed) { + // Handle each record +} +``` + +### Avoid Redundant Operations + +If you need both name fixing and search words, use `searchWords()` which includes name fixing: + +```php +// Less efficient - two passes +$fixed = StringManipulation::nameFix($name); +$search = StringManipulation::searchWords($name); + +// More efficient - searchWords includes name fixing +$search = StringManipulation::searchWords($name); +``` + +### Pre-warm Cache for Critical Paths + +If first-call latency matters, pre-warm the caches during application bootstrap: + +```php +// In bootstrap.php or service provider +StringManipulation::removeAccents('warmup'); +StringManipulation::searchWords('warmup'); +``` + +--- + +## Comparison with Alternatives + +The library outperforms common alternatives: + +| Library/Approach | removeAccents equivalent | Notes | +|:-----------------|:-------------------------|:------| +| StringManipulation | ~450,000 ops/sec | Optimised strtr() | +| Manual preg_replace | ~150,000 ops/sec | Multiple regex passes | +| iconv transliteration | ~200,000 ops/sec | System-dependent | +| Multiple str_replace | ~100,000 ops/sec | Linear per pattern | + +*Approximate comparisons. Actual results depend on input and environment.*