1 : <?php
2 : /**
3 : * $Id: Hyphenator.php 1114 2009-07-10 08:48:44Z heiglandreas $
4 : *
5 : * Copyright (c) 2008-2009 Andreas Heigl<andreas@heigl.org>
6 : *
7 : * Permission is hereby granted, free of charge, to any person obtaining a copy
8 : * of this software and associated documentation files (the "Software"), to deal
9 : * in the Software without restriction, including without limitation the rights
10 : * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 : * copies of the Software, and to permit persons to whom the Software is
12 : * furnished to do so, subject to the following conditions:
13 : *
14 : * The above copyright notice and this permission notice shall be included in
15 : * all copies or substantial portions of the Software.
16 : *
17 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 : * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 : * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 : * THE SOFTWARE.
24 : *
25 : * @category Org_Heigl
26 : * @package Org_Heigl_Hyphenator
27 : * @author Andreas Heigl <andreas@heigl.org>
28 : * @copyright 2008 Andreas Heigl<andreas@heigl.org>
29 : * @license http://www.opensource.org/licenses/mit-license.php MIT-License
30 : * @version SVN: $Revision: 1114 $
31 : * @since 12.06.2008
32 : */
33 :
34 : /**
35 : * This class implements word-hyphenation
36 : *
37 : * Word-hyphenation is implemented on the basis of the algorithms developed by
38 : * Franklin Mark Liang for LaTeX as described in his dissertation at the department
39 : * of computer science at stanford university.
40 : *
41 : * This package is based on an idea of Mathias Nater<mnater@mac.com> who
42 : * implemented this word-hyphenation-algorithm for javascript.
43 : *
44 : * Hyphenating means in this case, that all possible hypheantions in a word are
45 : * marked using the soft-hyphen character (ASCII-Caracter 173) or any other
46 : * character set via the setHyphen() method.
47 : *
48 : * A complete text will first be divided into words via a regular expression
49 : * that takes all characters that the \w-Special-Character specifies as well as
50 : * the '@'-Character and possible other - language-specific - characters that
51 : * can be set via the setSpecialChars() method.
52 : *
53 : * Hyphenation is done using a set of files taken from a current TeX-Distribution
54 : * that are matched using the method getTexFile().
55 : *
56 : * So here is an example for the usage of the class:
57 : * <code>
58 : * <?php
59 : * $hyphenator = Org_Heigl_Hyphenator::getInstance ( 'de' );
60 : * $hyphenator -> setHyphen ( '-' )
61 : * // Minimum 5 characters before the first hyphenation
62 : * -> setLeftMin ( 5 )
63 : * // Hyphenate only words with more than 4 characters
64 : * -> setWordMin ( 5 )
65 : * // Set some special characters
66 : * -> setSpecialChars ( 'äöüß' )
67 : * // Only Hyphenate with the best quality
68 : * -> setQuality ( Org_Heigl_Hyphenate::QUALITY_BEST )
69 : * // Words that shall not be hyphenated have to start with this string
70 : * -> setNoHyphenateMarker ( 'nbr:' )
71 : * // Words that contain this string are custom hyphenated
72 : * -> setCustomHyphen ( '--' );
73 : *
74 : * // Hyphenate the string $string
75 : * $hyphenated = $hyphenator -> hyphenate ( $text );
76 : * ?>
77 : * </code>
78 : *
79 : * @category Org_Heigl
80 : * @package Org_Heigl_Hyphenator
81 : * @author Andreas Heigl <a.heigl@wdv.de>
82 : * @copyright 2008-2010 Andreas Heigl
83 : * @license http://www.opensource.org/licenses/mit-license.php MIT-License
84 : * @version SVN: $Revision: 1114 $
85 : * @see http://code.google.com/p/hyphenator
86 : * @see http://www.tug.org/docs/liang/liang-thesis.pdf
87 : * @since 12.06.2008
88 : */
89 : final class Org_Heigl_Hyphenator
90 : {
91 :
92 : const QUALITY_HIGHEST = 9;
93 : const QUALITY_HIGH = 7;
94 : const QUALITY_NORMAL = 5;
95 : const QUALITY_LOW = 3;
96 : const QUALITY_LOWEST = 1;
97 :
98 : /**
99 : * This is the default language to use.
100 : *
101 : * @var string $_defaultLanguage
102 : */
103 : private static $_defaultLanguage = 'en';
104 :
105 : /**
106 : * This property stores an instance of the hyphenator for each language
107 : *
108 : * @var array $_store
109 : */
110 : private static $_store = array ();
111 :
112 : /**
113 : * Store the caching-Object
114 : *
115 : * @var Zend_Cache $_cache
116 : */
117 : private static $_cache = null;
118 :
119 : /**
120 : * Store whether caching is enabled or not
121 : *
122 : * Caching is turned off by default
123 : *
124 : * @var boolean $_cachingEnabled
125 : */
126 : private $_cachingEnabled = false;
127 :
128 : /**
129 : * The String that marks a word not to hyphenate
130 : *
131 : * @var string _noHyphenateString
132 : */
133 : private $_noHyphenateString = null;
134 :
135 : /**
136 : * This property defines the default hyphenation-character.
137 : *
138 : * This is set during instantiation to the Soft-Hyphen-Character (ASCII 173)
139 : * but can be overwritten using the setHyphen()-Method
140 : *
141 : * @var string $_hyphen
142 : */
143 : private $_hyphen = null;
144 :
145 : /**
146 : * This property defines how many characters need to stay to the left side
147 : * of a hyphenation.
148 : *
149 : * This defaults to 2 characters, but it can be overwritten using the
150 : * setLeftMin()-Method
151 : *
152 : * @var int $_leftmin
153 : */
154 : private $_leftMin = 2;
155 :
156 : /**
157 : * This property defines how many characters need to stay to the right side
158 : * of a hyphenation.
159 : *
160 : * This defaults to 2 characters, but it can be overwritten using the
161 : * setRightMin()-Method
162 : *
163 : * @var int $_rightmin
164 : */
165 : private $_rightMin = 2;
166 :
167 : /**
168 : * Whether to mark Customized Hyphenations or not.
169 : *
170 : * @var boolean $_markCustomized
171 : */
172 : private $_markCustomized = false;
173 :
174 : /**
175 : * When customizations shall be used, what string shall be prepend to the
176 : * word that contains customizations.
177 : *
178 : * @var string|null $_customizedMarker
179 : */
180 : private $_customizedMarker = '<!--cm-->';
181 :
182 : /**
183 : * The shortest pattern length to use for Hyphenating
184 : *
185 : * @var int $_shortestPattern
186 : */
187 : private $_shortestPattern = 2;
188 :
189 : /**
190 : * The longest pattern length to use for hyphenating.
191 : *
192 : * Using a high number (like '10') almost every pattern should be used
193 : *
194 : * @var int $_longestPattern
195 : */
196 : private $_longestPattern = 10;
197 :
198 : /**
199 : * This property defines some spechial Characters for a language that need
200 : * to be taken into account for the definition of a word.
201 : *
202 : * @var string $_specialChars
203 : */
204 : private $_specialChars = '';
205 :
206 : /**
207 : * This property defines, how long a word that can be hyphenated needs to be.
208 : *
209 : * This defaults to 6 Characters, but it can be overridden using
210 : * setWordMin()
211 : *
212 : * @var int $_wordMin
213 : */
214 : private $_wordMin = 6;
215 :
216 : /**
217 : * This property contains the pattern-array for a specific language
218 : *
219 : * @var array|null $_pattern
220 : */
221 : private $_pattern = null;
222 :
223 : /**
224 : * The currently set quality for hyphenation
225 : *
226 : * The lower the number, the better the hyphenation is
227 : *
228 : * @var int $_quality
229 : */
230 : private $_quality = 9;
231 :
232 : /**
233 : * The String that shall be searched for as a customHyphen
234 : * @var string $_customHyphen
235 : */
236 : private $_customHyphen = '--';
237 :
238 : /**
239 : * The special strings to parse as hyphenations
240 : *
241 : * @var array $_specialStrings
242 : */
243 : private $_specialStrings = array ( '-/-', '-' );
244 :
245 : /**
246 : * This is the static way of hyphenating a string.
247 : *
248 : * This method gets the appropriate Hyphenator-object and calls the method
249 : * hyphenate() on it.
250 : *
251 : * @param string $string The String to hyphenate
252 : * @param string $options The Options to use for Hyphenation
253 : *
254 : * @return string The hyphenated string
255 : */
256 : public static function parse ( $string, $options = null ) {
257 :
258 2 : if ( null === $options ) {
259 2 : $options = array ();
260 2 : }
261 2 : if ( ! isset ( $options [ 'language' ] ) ) {
262 2 : $options [ 'language' ] = Org_Heigl_Hyphenator::getDefaultLanguage ();
263 2 : }
264 : // Get the instance for the language.
265 2 : $hyphenator = Org_Heigl_Hyphenator::getInstance ( $options ['language'] );
266 :
267 2 : unset ( $options['language'] );
268 2 : foreach ( $options as $key => $val ) {
269 0 : call_user_func ( array ( $hyphenator, 'set' . $key ), $val );
270 2 : }
271 :
272 : // Hyphenate the string using the Hyphenator instance.
273 2 : $string = $hyphenator -> hyphenate ( $string );
274 :
275 : // Return the hyphenated string.
276 2 : return $string;
277 : }
278 :
279 : /**
280 : * Set the default Language
281 : *
282 : * @param string $language The Lanfuage to set.
283 : *
284 : * @return void
285 : */
286 : public static function setDefaultLanguage ( $language ) {
287 2 : Org_Heigl_Hyphenator::$_defaultLanguage = $language;
288 2 : }
289 :
290 : /**
291 : * Get the default language
292 : *
293 : * @return string
294 : */
295 : public static function getDefaultLanguage () {
296 2 : return Org_Heigl_Hyphenator::$_defaultLanguage;
297 : }
298 : /**
299 : * This method gets the hyphenator-instance for the language <var>$language</var>
300 : *
301 : * If no instance exists, it is created and stored.
302 : *
303 : * @param string $language The language to use for hyphenating
304 : *
305 : * @return Org_Heigl_Hyphenator A Hyphenator-Object
306 : * @throws InvalidArgumentException
307 : */
308 : public static function getInstance ( $language = 'en' ) {
309 20 : $file = dirname ( __FILE__ )
310 : . DIRECTORY_SEPARATOR
311 20 : . 'Hyphenator'
312 20 : . DIRECTORY_SEPARATOR
313 20 : . 'files'
314 20 : . DIRECTORY_SEPARATOR
315 20 : . Org_Heigl_Hyphenator::getTexFile ( $language );
316 20 : $parsedFile = dirname ( __FILE__ )
317 : . DIRECTORY_SEPARATOR
318 20 : . 'Hyphenator'
319 20 : . DIRECTORY_SEPARATOR
320 20 : . 'parsedFiles'
321 20 : . DIRECTORY_SEPARATOR
322 20 : . $language
323 20 : . '.php';
324 20 : if ( ! file_exists ( $parsedFile ) ) {
325 0 : Org_Heigl_Hyphenator::parseTexFile ( $file, $parsedFile, $language );
326 0 : }
327 20 : if ( ! file_exists ( $parsedFile ) ) {
328 0 : throw new InvalidArgumentException( 'file ' . $language . '.php does not exist' );
329 : return false;
330 : }
331 20 : if ( ( count ( Org_Heigl_Hyphenator::$_store ) <= 0 ) ||
332 19 : ( ! array_key_exists ( $language, Org_Heigl_Hyphenator::$_store ) ) ||
333 18 : ( ! is_object ( Org_Heigl_Hyphenator::$_store[$language] ) )||
334 20 : ( ! Org_Heigl_Hyphenator::$_store[$language] instanceof Org_Heigl_Hyphenator ) ) {
335 : // Begin IF.
336 3 : Org_Heigl_Hyphenator::$_store[$language] = new Org_Heigl_Hyphenator($language);
337 3 : }
338 20 : return Org_Heigl_Hyphenator::$_store[$language];
339 : }
340 :
341 : /**
342 : * This method parses a TEX-Hyphenation file and creates the appropriate
343 : * PHP-Hyphenation file
344 : *
345 : * @param string $file The original TEX-File
346 : * @param string $parsedFile The PHP-File to be created
347 : *
348 : * @return boolean
349 : */
350 : public static function parseTexFile ( $file, $parsedFile ) {
351 1 : $fc = file_get_contents ( $file );
352 1 : $array = array ();
353 1 : if ( ! preg_match ( '/[\\n\\r]\\\\patterns\\{(.*)\\}\\s*\\\\/sim', $fc, $array ) ) {
354 0 : return false;
355 : }
356 1 : $fc = preg_replace ( '/%.*/', '', $array[1] );
357 1 : $fc = preg_replace ( '/\\\\n\\{(.+?)\\}/', '\1', $fc );
358 1 : $fc = preg_replace ( array('/"a/', '/"o/', '/"u/', '/\\./' ), array ( 'ä', 'ö', 'ü', '_' ), $fc );
359 1 : $array = preg_split ( '/\\s+/', $fc );
360 1 : $fh = fopen ( $parsedFile, 'w+' );
361 : $fileheader = '<?php
362 : /**
363 1 : * $'.'Id'.'$
364 : *
365 : * Copyright (c) 2008-2010 Andreas Heigl<andreas@heigl.org>
366 : *
367 : * Permission is hereby granted, free of charge, to any person obtaining a copy
368 : * of this software and associated documentation files (the "Software"), to deal
369 : * in the Software without restriction, including without limitation the rights
370 : * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
371 : * copies of the Software, and to permit persons to whom the Software is
372 : * furnished to do so, subject to the following conditions:
373 : *
374 : * The above copyright notice and this permission notice shall be included in
375 : * all copies or substantial portions of the Software.
376 : *
377 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
378 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
379 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
380 : * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
381 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
382 : * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
383 : * THE SOFTWARE.
384 : *
385 1 : * This file has been automaticly created from the file ' . basename ( $file ) . '
386 : * via the method Org_Heigl_Hyphenator::parseTexFile().
387 : *
388 : * DO NOT EDIT THIS FILE EXCEPT YOU KNOW WHAT YOU DO!!
389 : *
390 : * @category Org_Heigl
391 : * @package Org_Heigl_Hyphenator
392 : * @subpackage HyphenationFiles
393 : * @author Org_Heigl_Hyphenator
394 : * @copyright 2008-2010 Andreas Heigl<andreas@heigl.org>
395 : * @license http://www.opensource.org/licenses/mit-license.php MIT-License
396 : * @version 1.0
397 1 : * @since ' . date ( 'd.m.Y' ) . '
398 : */
399 1 : ';
400 1 : fwrite ( $fh, $fileheader );
401 1 : foreach ( $array as $pattern ) {
402 1 : if ( strpos ( $pattern, '\\' ) !== false ) {
403 1 : continue;
404 : }
405 1 : $patternstring = '';
406 1 : $patternint = '';
407 1 : $strlen = strlen ( $pattern );
408 1 : for ( $i = 0; $i < $strlen; $i++ ) {
409 1 : if ( ( ( $i ) <= $strlen ) && preg_match ( '/[0-9]/', substr ( $pattern, $i, 1 ) ) ) {
410 1 : $patternint .= substr ( $pattern, $i, 1 );
411 1 : } else {
412 1 : $patternint .= '0';
413 : }
414 1 : }
415 1 : $patternstring = preg_replace ( '/[0-9]/', '', $pattern );
416 1 : if ( $patternstring != '' ) {
417 1 : fwrite ( $fh, '$pattern[\'' . $patternstring . '\'] = \'' . $patternint . '\';' . "\n" );
418 1 : }
419 1 : }
420 1 : fwrite ( $fh, '?>' );
421 1 : fclose ( $fh );
422 1 : return true;
423 : }
424 :
425 : /**
426 : * This method returns the name of a TeX-Hyphenation file to a language code
427 : *
428 : * @param string $language The language code to get the to use
429 : *
430 : * @return string
431 : */
432 : public static function getTexFile ( $language ) {
433 20 : $files = array ( 'ba' => 'bahyph.tex',
434 20 : 'ca' => 'cahyph.tex',
435 20 : 'dk' => 'dkhyphen.tex',
436 20 : 'fi' => 'fi8hyph.tex',
437 20 : 'hu' => 'huhyph.tex',
438 20 : 'it' => 'ithyph.tex',
439 20 : 'no' => 'nohyphbc.tex',
440 20 : 'si' => 'sihyph23.tex',
441 20 : 'dk' => 'dkspecial.tex',
442 20 : 'fi' => 'fihyph.tex',
443 20 : 'hu' => 'huhyphn.tex',
444 20 : 'la' => 'lahyph.tex',
445 20 : ' ' => 'nohyphbx.tex',
446 20 : 'sk' => 'skhyph.tex',
447 20 : 'cz' => 'czhyph.tex',
448 20 : ' ' => 'dumyhyph.tex',
449 20 : 'fr' => 'frhyph.tex',
450 20 : 'en' => 'hyphen.tex',
451 20 : 'mn' => 'mnhyphen.tex',
452 20 : 'pl' => 'plhyph.tex',
453 20 : 'sk' => 'skhyph2e.tex',
454 20 : 'de' => 'dehyphn.tex',
455 20 : 'ee' => 'eehyph.tex',
456 20 : 'ga' => 'gahyph.tex',
457 20 : ' ' => 'hypht1.tex',
458 20 : 'ne' => 'nehyph.tex',
459 20 : 'pt' => 'pt8hyph.tex',
460 20 : 'sr' => 'srhyphc.tex',
461 20 : 'de_OLD' => 'dehypht.tex',
462 20 : 'eo' => 'eohyph.tex',
463 20 : 'gr' => 'grhyph.tex',
464 20 : 'ic' => 'icehyph.tex',
465 20 : ' ' => 'nohyph.tex',
466 20 : 'ro' => 'rohyphen.tex',
467 20 : 'tr' => 'trhyph.tex',
468 20 : 'dk' => 'dkcommon.tex',
469 20 : 'es' => 'eshyph.tex',
470 20 : 'hr' => 'hrhyph.tex',
471 20 : 'in' => 'inhyph.tex',
472 20 : ' ' => 'nohyphb.tex',
473 20 : 'se' => 'sehyph.tex',
474 20 : ' ' => 'zerohyph.tex',
475 20 : );
476 20 : if ( array_key_exists ( $language, $files ) ) {
477 13 : return $files[$language];
478 : }
479 7 : return $files['en'];
480 : }
481 :
482 : /**
483 : * Set an instance of Zend_Cache as Caching-Backend.
484 : *
485 : * @param Zend_Cache $cache The caching Backend
486 : *
487 : * @uses Zend_Cache
488 : * @link http://framework.zend.com/zend.cache.html
489 : * @return boolean
490 : */
491 : public static function setCache ( Zend_Cache $cache ) {
492 :
493 0 : Org_Heigl_Hyphenator::$_cache = $cache;
494 0 : return true;
495 : }
496 :
497 : /**
498 : * Get the cache-Object
499 : *
500 : * @return Zend_Cache
501 : */
502 : public static function getCache () {
503 0 : return Org_Heigl_Hyphenator::$_cache;
504 : }
505 :
506 : /**
507 : * This is the constructor, that initialises the hyphenator for the given
508 : * language <var>$language</var>
509 : *
510 : * This constructor is declared private to ensure, that it is only called
511 : * via the getInstance() method, so we only initialize the stuff only once
512 : * for each language.
513 : *
514 : * @param string $language The language to use for hyphenating
515 : *
516 : * @throws Exception
517 : */
518 : public function __construct ( $language = 'en' ) {
519 :
520 3 : $lang = array ( $language );
521 3 : $pos = strpos ( '_', $language );
522 3 : if ( false !== $pos ) {
523 0 : $lang [] = substr ( $language, 0, $pos );
524 0 : }
525 3 : foreach ( $lang as $language ) {
526 3 : $parsedFile = dirname ( __FILE__ )
527 : . DIRECTORY_SEPARATOR
528 3 : . 'Hyphenator'
529 3 : . DIRECTORY_SEPARATOR
530 3 : . 'parsedFiles'
531 3 : . DIRECTORY_SEPARATOR
532 3 : . $language
533 3 : . '.php';
534 :
535 3 : $this -> _language = $language;
536 : try {
537 3 : include_once $parsedFile;
538 3 : } catch ( Exception $e ) {
539 : throw new Exception ( 'File \'' . $parsedFile . '\' could not be found' );
540 : }
541 3 : }
542 3 : $this -> _pattern = $pattern;
543 :
544 3 : if ( null === $this -> _hyphen ) {
545 3 : $this -> _hyphen = chr ( 173 );
546 3 : }
547 3 : }
548 :
549 : /**
550 : * This method does the actual hyphenation.
551 : *
552 : * The given <var>$string</var> is splitted into chunks (i.e. Words) at
553 : * every blank.
554 : *
555 : * After that every chunk is hyphenated and the array of chunks is merged
556 : * into a single string using blanks again.
557 : *
558 : * This method does not take into account other word-delimiters than blanks
559 : * (eg. returns or tabstops) and it will fail with texts containing markup
560 : * in any way.
561 : *
562 : * @param string $string The string to hyphenate
563 : *
564 : * @return string The hyphenated string
565 : */
566 : public function hyphenate ( $string ) {
567 :
568 8 : $this -> _rawWord = array ();
569 : // If caching is enabled and the string is already cached, return the
570 : // cached version.
571 8 : if ( $this -> isCachingEnabled () ) {
572 0 : $result = $this -> cacheRead ( $string );
573 0 : if ( false !== $result ) {
574 0 : return $result;
575 : }
576 0 : }
577 8 : $array = explode ( ' ', $string );
578 8 : $size = count ( $array );
579 8 : for ( $i = 0; $i < $size; $i++ ) {
580 8 : $array[$i] = $this -> hyphenateWord ( $array[$i] );
581 8 : }
582 8 : $hyphenatedString = implode ( ' ', $array );
583 :
584 : // If caching is enabled, write the hyphenated string to the cache.
585 8 : if ( $this -> isCachingEnabled () ) {
586 0 : $this -> cacheWrite ( $string, $hyphenatedString );
587 0 : }
588 :
589 : // Return the hyphenated string.
590 8 : return $hyphenatedString;
591 : }
592 :
593 : /**
594 : * This method hyphenates a single word
595 : *
596 : * @param string $word The Word to hyphenate
597 : *
598 : * @return string the hyphenated word
599 : */
600 : public function hyphenateWord ( $word ) {
601 :
602 : // If the Word is empty, return an empty string.
603 14 : if ( '' === trim ( $word ) ) {
604 1 : return '';
605 : }
606 :
607 : // Check whether the word shall be hyphenated.
608 13 : $result = $this -> _isNotToBeHyphenated ( $word );
609 13 : if ( false !== $result ) {
610 2 : return $result;
611 : }
612 :
613 : // If the length of the word is smaller than the minimum word-size,
614 : // return the word.
615 11 : if ( $this -> _wordMin > strlen ( $word ) ) {
616 5 : return $word;
617 : }
618 :
619 : // Character 173 is the unicode char 'Soft Hyphen' wich may not be
620 : // visible in some editors!
621 : // HTML-Entity for soft hyphenation is ­!
622 11 : if ( false !== strpos ( $word, '­' ) ) {
623 2 : return str_replace ( '­', $this -> _hyphen, $word );
624 : }
625 :
626 : // Replace a custom hyphenate-string with the hyphen.
627 10 : $result = $this -> _replaceCustomHyphen ( $word );
628 10 : if ( false !== $result ) {
629 1 : return $result;
630 : }
631 :
632 : // If the word already contains a hyphen-character, we assume it is
633 : // already hyphenated and return the word 'as is'.
634 9 : if ( false !== strpos ( $word, $this -> _hyphen ) ) {
635 0 : return $word;
636 : }
637 :
638 : // Hyphenate words containing special strings for further processing, so
639 : // put a zerowidthspace after it and hyphenate the parts separated by
640 : // the special string.
641 9 : $result = $this -> _handleSpecialStrings ( $word );
642 9 : if ( false !== $result ) {
643 0 : return $result;
644 : }
645 :
646 9 : return $this -> _hyphenateWord ( $word );
647 : }
648 :
649 : /**
650 : * Hyphenate a single word
651 : *
652 : * @param string $word The word to hyphenate
653 : *
654 : * @return string The hyphenated word
655 : */
656 : private function _hyphenateWord ( $word ) {
657 :
658 9 : $prepend = '';
659 9 : $word = $word;
660 9 : $append = '';
661 :
662 9 : $specials = '\.\:\-\,\;\!\?\/\\\(\)\[\]\{\}\"\'\+\*\#\§\$\%\&\=\@';
663 : // If a special character occurs in the middle of the word, simply
664 : // return the word AS IS as the word can not really be hyphenated
665 : // automaticaly.
666 9 : if ( preg_match ( '/[^' . $specials . ']['.$specials.'][^'.$specials.']/', $word ) ) {
667 1 : return $word;
668 : }
669 : // If one ore more special characters appear before or after a word
670 : // we take the word in between and hyphenate that asn append and prepend
671 : // the special characters later on.
672 9 : if ( preg_match ( '/(['.$specials.']*)([^' . $specials . ']+)(['.$specials.']*)/', $word, $result ) ) {
673 9 : $prepend = $result [1];
674 9 : $word = $result [2];
675 9 : $append = $result [3];
676 9 : }
677 :
678 9 : $result = array ();
679 :
680 9 : $positions = $this -> _getHyphenationPositions ( $word );
681 :
682 9 : $wl = strlen ( $word );
683 9 : $lastOne = 0;
684 :
685 9 : for ( $i = 1; $i < $wl; $i++ ) {
686 : // If the integer on position $i is higher than 0 and is odd,
687 : // we can hyphenate at that position if the integer is lower or
688 : // equal than the set quality-level.
689 : // Additionaly we check whether the left and right margins are met.
690 9 : if ( ( 0 !== $positions[$i] ) &&
691 9 : ( 1 === ( $positions[$i] % 2 ) ) &&
692 9 : ( $positions[$i] <= $this -> _quality ) &&
693 9 : ( $i >= $this -> _leftMin ) &&
694 9 : ( $i <= ( strlen ( $word ) - $this -> _rightMin ) ) ) {
695 : // Begin IF.
696 9 : $sylable = substr ( $word, $lastOne, $i - $lastOne );
697 :
698 9 : $lastOne = $i;
699 9 : $result[] = $sylable;
700 9 : }
701 9 : }
702 9 : $result [] = substr ( $word, $lastOne );
703 9 : return $prepend . trim ( implode ( $this -> _hyphen, $result ) ) . $append;
704 : }
705 :
706 : /**
707 : * Get the positions, where a hyphenation might occur and where not.
708 : *
709 : * @param string $word The word to hyphenate
710 : *
711 : * @return array The numerical positions-array
712 : */
713 : private function _getHyphenationPositions ( $word ) {
714 :
715 9 : $positions = array();
716 9 : $w = '_' . strtolower ( $word ) . '_';
717 9 : $wl = strlen ( $w );
718 : // Initialize an array of length of the word with 0-values.
719 9 : for ( $i = 0; $i < $wl; $i++ ) {
720 9 : $positions[$i] = 0;
721 9 : }
722 9 : for ( $s = 0; $s < $wl -1; $s++ ) {
723 9 : $maxl = $wl - $s;
724 9 : $window = substr ( $w, $s );
725 9 : for ( $l = $this -> _shortestPattern; $l <= $maxl && $l <= $this -> _longestPattern; $l++ ) {
726 9 : $part = substr ( $window, 0, $l );
727 9 : $values = null;
728 9 : if ( isset ( $this -> _pattern[$part] ) ) {
729 : // We found a pattern for this part.
730 9 : $values = (string) $this -> _pattern [$part];
731 9 : $i = $s;
732 9 : $v = null;
733 9 : $m = strlen ( $values );
734 9 : $corrector = 1;
735 9 : for ( $p = 0; $p < $m; $p++ ) {
736 9 : $v = substr ( $values, $p, 1 );
737 9 : $arrayKey = $i + $p - $corrector;
738 9 : if ( array_key_exists ( $arrayKey, $positions) && ( ( (int) $v > $positions[$arrayKey] ) ) ) {
739 9 : $positions[$arrayKey] = (int) $v;
740 9 : }
741 9 : if ( $v > 0 ) {
742 9 : $corrector++;
743 9 : }
744 9 : }
745 9 : }
746 9 : }
747 9 : }
748 9 : return $positions;
749 : }
750 :
751 : /**
752 : * Check whether this string shall not be hyphenated
753 : *
754 : * If so, replace a string that marks strings not to be hyphenated with an
755 : * empty string. Also replace all custom hyphenations, as the word shall
756 : * not be hyphenated.
757 : * Finaly return the word 'as is'.
758 : *
759 : * If the word can be hyphenated, return false
760 : *
761 : * @param string $word The word to be hyphenated
762 : *
763 : * @return string|false
764 : */
765 : private function _isNotToBeHyphenated ( $word ) {
766 13 : if ( ( null === $this -> _noHyphenateString ) || ( 0 !== strpos ( $word, $this -> _noHyphenateString ) ) ) {
767 11 : return false;
768 : }
769 2 : $string = str_replace ( $this -> _noHyphenateString, '', $word );
770 2 : $string = str_replace ( $this -> _customHyphen, '', $string );
771 2 : if ( null !== $this -> _customizedMarker && true === $this -> _markCustomized ) {
772 0 : $string = $this -> getCustomizationMarker () . $string;
773 0 : }
774 2 : return $string;
775 : }
776 :
777 : /**
778 : * Replace a custom hyphen
779 : *
780 : * @param string $word The word to parse
781 : *
782 : * @return string|false
783 : */
784 : private function _replaceCustomHyphen ( $word ) {
785 10 : if ( ( null === $this -> _customHyphen ) || ( false === strpos ( $word, $this -> _customHyphen ) ) ) {
786 9 : return false;
787 : }
788 1 : $string = str_replace ( $this -> _customHyphen, $this -> _hyphen, $word );
789 1 : if ( null !== $this -> _customizedMarker && true === $this -> _markCustomized ) {
790 0 : $string = $this -> getCustomizationMarker () . $string;
791 0 : }
792 1 : return $string;
793 : }
794 :
795 : /**
796 : * Handle special strings
797 : *
798 : * Hyphenate words containing special strings for further processing, so
799 : * put a zerowidthspace after it and hyphenate the parts separated by
800 : * the special string.
801 : *
802 : * @param string $word The Word to hyphenate
803 : *
804 : * @return string|false
805 : */
806 : public function _handleSpecialStrings ( $word ) {
807 :
808 9 : foreach ( $this -> _specialStrings as $specialString ) {
809 9 : if ( false === strpos ( $word, $specialString ) ) {
810 9 : continue;
811 : }
812 : // Word contains a special string so put a zerowidthspace after
813 : // it and hyphenate the parts separated with the special string.
814 0 : $parts = explode ( $specialString, $word );
815 0 : $counter = count ( $parts );
816 0 : for ( $i = 0; $i < $counter; $i++ ) {
817 0 : $parts[$i] = $this -> hyphenateWord ( $parts[$i] );
818 0 : }
819 0 : return implode ( $specialString, $parts );
820 9 : }
821 9 : return false;
822 : }
823 :
824 : /**
825 : * Set the special strings
826 : *
827 : * These are strings that can be used for further parsing of the text.
828 : *
829 : * For instance a string to be replaced with a soft return or any other
830 : * symbol your application needs.
831 : *
832 : * @param array $specialStrings An array of special strings.
833 : *
834 : * @return Org_Heigl_Hyphenator
835 : */
836 : public function setSpecialStrings ( $specialStrings = array () ) {
837 :
838 0 : $this -> _specialStrings = (array) $specialStrings;
839 0 : return $this;
840 : }
841 :
842 : /**
843 : * This method sets the Hyphenation-Character.
844 : *
845 : * @param string $char The Hyphenation Character
846 : *
847 : * @return Org_Heigl_Hyphenator Provides fluent Interface
848 : */
849 : public function setHyphen ( $char ) {
850 8 : $this -> _hyphen = (string) $char;
851 8 : return $this;
852 : }
853 :
854 : /**
855 : * Get the hyphenation character
856 : *
857 : * @return string
858 : */
859 : public function getHyphen () {
860 0 : return $this -> _hyphen;
861 : }
862 :
863 : /**
864 : * This method sets the minimum Characters, that have to stay to the left of
865 : * a hyphenation
866 : *
867 : * @param int $count The left minimum
868 : *
869 : * @return Org_Heigl_Hyphenator Provides fluent Interface
870 : */
871 : public function setLeftMin ( $count ) {
872 5 : $this -> _leftMin = (int) $count;
873 5 : return $this;
874 : }
875 :
876 : /**
877 : * This method sets the minimum Characters, that have to stay to the right of
878 : * a hyphenation
879 : *
880 : * @param int $count The minimmum characters
881 : *
882 : * @return Org_Heigl_Hyphenator Provides fluent Interface
883 : */
884 : public function setRightMin ( $count) {
885 5 : $this -> _rightMin = (int) $count;
886 5 : return $this;
887 : }
888 :
889 : /**
890 : * This method sets the minimum Characters a word has to have before being
891 : * hyphenated
892 : *
893 : * @param int $count The minimmum characters
894 : *
895 : * @return Org_Heigl_Hyphenator Provides fluent Interface
896 : */
897 : public function setWordMin ( $count) {
898 5 : $this -> _wordMin = (int) $count;
899 5 : return $this;
900 : }
901 :
902 : /**
903 : * This method sets the special Characters for a specified language
904 : *
905 : * @param string $chars The spechail characters
906 : *
907 : * @return Org_Heigl_Hyphenator Provides fluent Interface
908 : */
909 : public function setSpecialChars ( $chars ) {
910 0 : $this -> specialChars = $chars;
911 0 : return $this;
912 : }
913 :
914 : /**
915 : * Enable or disable caching of hyphenated texts
916 : *
917 : * @param boolean $caching Whether to enable caching or not. Defaults to
918 : * <var>true</var>
919 : *
920 : * @return Org_Heigl_Hyphenator
921 : */
922 : public function enableCaching ( $caching = true ) {
923 0 : $this -> _cachingEnabled = (bool) $caching;
924 :
925 0 : return $this;
926 : }
927 :
928 : /**
929 : * Check whether caching is enabled or not
930 : *
931 : * @return boolean
932 : */
933 : public function isCachingEnabled () {
934 8 : return (bool) $this -> _cachingEnabled;
935 : }
936 :
937 : /**
938 : * Write <var>string</var> to the cache.
939 : *
940 : * <var>string</var> can be retrieved using <var>key</var>
941 : *
942 : * @param string $key The key under which the string can be found in the cache
943 : * @param string $string The string to cache
944 : *
945 : * @return Org_Heigl_Hyphenator
946 : */
947 : public function cacheWrite ( $key, $string ) {
948 :
949 0 : $cache = Org_Heigl_Hyphenator::getCache ();
950 :
951 0 : if ( false === $this -> cacheRead ( $key ) ) {
952 :
953 0 : $cache -> save ( $string, $key );
954 0 : }
955 :
956 0 : return $this;
957 : }
958 :
959 : /**
960 : * Get the cached string to a key
961 : *
962 : * @param string $key The key to return a string to
963 : *
964 : * @return string
965 : */
966 : public function cacheRead ( $key ) {
967 :
968 0 : $cache = Org_Heigl_Hyphenator::getCache ();
969 :
970 0 : $result = $cache -> load ( $key );
971 :
972 0 : if ( ! $result ) {
973 0 : return false;
974 :
975 : }
976 0 : return $result;
977 : }
978 :
979 : /**
980 : * Set the quality that the Hyphenation needs to have minimum
981 : *
982 : * The lower the number, the better is the quality
983 : *
984 : * @param int $quality The quality-level to set
985 : *
986 : * @return Org_Heigl_Hyphenator
987 : */
988 : public function setQuality ( $quality = 5 ) {
989 6 : $this -> _quality = (int) $quality;
990 6 : return $this;
991 : }
992 :
993 : /**
994 : * Set a string that will be replaced with the soft-hyphen before
995 : * Hyphenation actualy starts.
996 : *
997 : * If this string is found in a word no hyphenation will be done except for
998 : * the place where the custom hyphen has been found
999 : *
1000 : * @param string $customHyphen The Custom Hyphen to set
1001 : *
1002 : * @return Org_Heigl_Hyphenator
1003 : */
1004 : public function setCustomHyphen ( $customHyphen = null ) {
1005 2 : $this -> _customHyphen = $customHyphen;
1006 :
1007 2 : return $this;
1008 : }
1009 :
1010 : /**
1011 : * Set a string that marks a words not to hyphenate
1012 : *
1013 : * @param string $marker THe Marker that marks a word
1014 : *
1015 : * @return Org_Heigl_Hyphenator
1016 : */
1017 : public function setNoHyphenateMarker ( $marker = null ) {
1018 2 : $this -> _noHyphenateString = $marker;
1019 :
1020 2 : return $this;
1021 : }
1022 :
1023 : /**
1024 : * Get the marker for custom hyphenations
1025 : *
1026 : * @return string
1027 : */
1028 : public function getCustomMarker () {
1029 0 : return (string) $this -> _customHyphen;
1030 : }
1031 :
1032 : /**
1033 : * Get the marker for Words not to hyphenate
1034 : *
1035 : * @return string
1036 : */
1037 : public function getNoHyphenMarker () {
1038 0 : return (string) $this -> _noHyphenateString;
1039 : }
1040 :
1041 : /**
1042 : * Set and retrieve whether or not to mark custom hyphenations
1043 : *
1044 : * This method always returns the current setting, so you can set AND
1045 : * retrieve the value with this method.
1046 : *
1047 : * @param null|booelan $mark Whether or not to mark
1048 : *
1049 : * @return boolean
1050 : */
1051 : public function markCustomization ( $mark = null ) {
1052 0 : if ( null !== $mark ) {
1053 0 : $this -> _markCustomized = (bool) $mark;
1054 0 : }
1055 0 : return (bool) $this -> _markCustomized;
1056 : }
1057 :
1058 : /**
1059 : * Set the string that shall be prepend to a customized word.
1060 : *
1061 : * @param string $marker The Marker to set
1062 : *
1063 : * @return Org_Heigl_Hyphenator
1064 : */
1065 : public function setCustomizationMarker ( $marker ) {
1066 0 : $this -> _customizedMarker = (string) $marker;
1067 0 : return $this;
1068 : }
1069 :
1070 : /**
1071 : * Get the string that shall be prepend to a customized word.
1072 : *
1073 : * @return string
1074 : */
1075 : public function getCustomizationMarker () {
1076 0 : return (string) $this -> _customizedMarker;
1077 : }
1078 : }
|