1 : <?php
2 : /**
3 : * $Id: Hyphenator.php 1114 2009-07-10 08:48:44Z heiglandreas $
4 : *
5 : * Copyright (c) 2008-2009 Andreas Heigl<andreas@heigl.org>
6 : *
7 : * Permission is hereby granted, free of charge, to any person obtaining a copy
8 : * of this software and associated documentation files (the "Software"), to deal
9 : * in the Software without restriction, including without limitation the rights
10 : * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 : * copies of the Software, and to permit persons to whom the Software is
12 : * furnished to do so, subject to the following conditions:
13 : *
14 : * The above copyright notice and this permission notice shall be included in
15 : * all copies or substantial portions of the Software.
16 : *
17 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 : * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 : * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 : * THE SOFTWARE.
24 : *
25 : * @category Org_Heigl
26 : * @package Org_Heigl_Hyphenator
27 : * @author Andreas Heigl <andreas@heigl.org>
28 : * @copyright 2008 Andreas Heigl<andreas@heigl.org>
29 : * @license http://www.opensource.org/licenses/mit-license.php MIT-License
30 : * @version SVN: $Revision: 1114 $
31 : * @since 12.06.2008
32 : */
33 :
34 : /**
35 : * This class implements word-hyphenation
36 : *
37 : * Word-hyphenation is implemented on the basis of the algorithms developed by
38 : * Franklin Mark Liang for LaTeX as described in his dissertation at the department
39 : * of computer science at stanford university.
40 : *
41 : * This package is based on an idea of Mathias Nater<mnater@mac.com> who
42 : * implemented this word-hyphenation-algorithm for javascript.
43 : *
44 : * Hyphenating means in this case, that all possible hypheantions in a word are
45 : * marked using the soft-hyphen character (ASCII-Caracter 173) or any other
46 : * character set via the setHyphen() method.
47 : *
48 : * A complete text will first be divided into words via a regular expression
49 : * that takes all characters that the \w-Special-Character specifies as well as
50 : * the '@'-Character and possible other - language-specific - characters that
51 : * can be set via the setSpecialChars() method.
52 : *
53 : * Hyphenation is done using a set of files taken from a current TeX-Distribution
54 : * that are matched using the method getTexFile().
55 : *
56 : * So here is an example for the usage of the class:
57 : * <code>
58 : * <?php
59 : * $hyphenator = Org_Heigl_Hyphenator::getInstance ( 'de' );
60 : * $hyphenator -> setHyphen ( '-' )
61 : * // Minimum 5 characters before the first hyphenation
62 : * -> setLeftMin ( 5 )
63 : * // Hyphenate only words with more than 4 characters
64 : * -> setWordMin ( 5 )
65 : * // Set some special characters
66 : * -> setSpecialChars ( 'äöüß' )
67 : * // Only Hyphenate with the best quality
68 : * -> setQuality ( Org_Heigl_Hyphenate::QUALITY_BEST )
69 : * // Words that shall not be hyphenated have to start with this string
70 : * -> setNoHyphenateMarker ( 'nbr:' )
71 : * // Words that contain this string are custom hyphenated
72 : * -> setCustomHyphen ( '--' );
73 : *
74 : * // Hyphenate the string $string
75 : * $hyphenated = $hyphenator -> hyphenate ( $text );
76 : * ?>
77 : * </code>
78 : *
79 : * @category Org_Heigl
80 : * @package Org_Heigl_Hyphenator
81 : * @author Andreas Heigl <a.heigl@wdv.de>
82 : * @copyright 2008-2010 Andreas Heigl
83 : * @license http://www.opensource.org/licenses/mit-license.php MIT-License
84 : * @version SVN: $Revision: 1114 $
85 : * @see http://code.google.com/p/hyphenator
86 : * @see http://www.tug.org/docs/liang/liang-thesis.pdf
87 : * @since 12.06.2008
88 : */
89 : final class Org_Heigl_Hyphenator
90 : {
91 :
92 : const QUALITY_BEST = 1;
93 : const QUALITY_BETTER = 3;
94 : const QUALITY_NORMAL = 5;
95 : const QUALITY_POOR = 7;
96 : const QUALITY_POREST = 9;
97 :
98 : /**
99 : * This is the default language to use.
100 : *
101 : * @var string $_defaultLanguage
102 : */
103 : private static $_defaultLanguage = 'en';
104 :
105 : /**
106 : * This property stores an instance of the hyphenator for each language
107 : *
108 : * @var array $_store
109 : */
110 : private static $_store = array ();
111 :
112 : /**
113 : * Store the caching-Object
114 : *
115 : * @var Zend_Cache $_cache
116 : */
117 : private static $_cache = null;
118 :
119 : /**
120 : * Store whether caching is enabled or not
121 : *
122 : * Caching is turned off by default
123 : *
124 : * @var boolean $_cachingEnabled
125 : */
126 : private $_cachingEnabled = false;
127 :
128 : /**
129 : * The String that marks a word not to hyphenate
130 : *
131 : * @var string _noHyphenateString
132 : */
133 : private $_noHyphenateString = null;
134 :
135 : /**
136 : * This property defines the default hyphenation-character.
137 : *
138 : * This is set during instantiation to the Soft-Hyphen-Character (ASCII 173)
139 : * but can be overwritten using the setHyphen()-Method
140 : *
141 : * @var string $_hyphen
142 : */
143 : private $_hyphen = null;
144 :
145 : /**
146 : * This property defines how many characters need to stay to the left side
147 : * of a hyphenation.
148 : *
149 : * This defaults to 2 characters, but it can be overwritten using the
150 : * setLeftMin()-Method
151 : *
152 : * @var int $_leftmin
153 : */
154 : private $_leftMin = 2;
155 :
156 : /**
157 : * This property defines how many characters need to stay to the right side
158 : * of a hyphenation.
159 : *
160 : * This defaults to 2 characters, but it can be overwritten using the
161 : * setRightMin()-Method
162 : *
163 : * @var int $_rightmin
164 : */
165 : private $_rightMin = 2;
166 :
167 : /**
168 : * Whether to mark Customized Hyphenations or not.
169 : *
170 : * @var boolean $_markCustomized
171 : */
172 : private $_markCustomized = false;
173 :
174 : /**
175 : * When customizations shall be used, what string shall be prepend to the
176 : * word that contains customizations.
177 : *
178 : * @var string|null $_customizedMarker
179 : */
180 : private $_customizedMarker = '<!--cm-->';
181 :
182 : /**
183 : * The shortest pattern length to use for Hyphenating
184 : *
185 : * @var int $_shortestPattern
186 : */
187 : private $_shortestPattern = 2;
188 :
189 : /**
190 : * The longest pattern length to use for hyphenating.
191 : *
192 : * Using a high number (like '10') almost every pattern should be used
193 : *
194 : * @var int $_longestPattern
195 : */
196 : private $_longestPattern = 10;
197 :
198 : /**
199 : * This property defines some spechial Characters for a language that need
200 : * to be taken into account for the definition of a word.
201 : *
202 : * @var string $_specialChars
203 : */
204 : private $_specialChars = '';
205 :
206 : /**
207 : * This property defines, how long a word that can be hyphenated needs to be.
208 : *
209 : * This defaults to 6 Characters, but it can be overridden using
210 : * setWordMin()
211 : *
212 : * @var int $_wordMin
213 : */
214 : private $_wordMin = 6;
215 :
216 : /**
217 : * This property contains the pattern-array for a specific language
218 : *
219 : * @var array|null $_pattern
220 : */
221 : private $_pattern = null;
222 :
223 : /**
224 : * The currently set quality for hyphenation
225 : *
226 : * The lower the number, the better the hyphenation is
227 : *
228 : * @var int $_quality
229 : */
230 : private $_quality = 9;
231 :
232 : /**
233 : * The String that shall be searched for as a customHyphen
234 : * @var string $_customHyphen
235 : */
236 : private $_customHyphen = '--';
237 :
238 : /**
239 : * This is the static way of hyphenating a string.
240 : *
241 : * This method gets the appropriate Hyphenator-object and calls the method
242 : * hyphenate() on it.
243 : *
244 : * @param string $string The String to hyphenate
245 : * @param string $options The Options to use for Hyphenation
246 : *
247 : * @return string The hyphenated string
248 : */
249 : public static function parse ( $string, $options = null ) {
250 :
251 2 : if ( null === $options ) {
252 2 : $options = array ();
253 2 : }
254 2 : if ( ! isset ( $options [ 'language' ] ) ) {
255 2 : $options [ 'language' ] = Org_Heigl_Hyphenator::getDefaultLanguage ();
256 2 : }
257 : // Get the instance for the language.
258 2 : $hyphenator = Org_Heigl_Hyphenator::getInstance ( $options ['language'] );
259 :
260 2 : unset ( $options['language'] );
261 2 : foreach ( $options as $key => $val ) {
262 0 : call_user_func ( array ( $hyphenator, 'set' . $key ), $val );
263 2 : }
264 :
265 : // Hyphenate the string using the Hyphenator instance.
266 2 : $string = $hyphenator -> hyphenate ( $string );
267 :
268 : // Return the hyphenated string.
269 2 : return $string;
270 : }
271 :
272 : /**
273 : * Set the default Language
274 : *
275 : * @param string $language The Lanfuage to set.
276 : *
277 : * @return void
278 : */
279 : public static function setDefaultLanguage ( $language ) {
280 2 : Org_Heigl_Hyphenator::$_defaultLanguage = $language;
281 2 : }
282 :
283 : /**
284 : * Get the default language
285 : *
286 : * @return string
287 : */
288 : public static function getDefaultLanguage () {
289 2 : return Org_Heigl_Hyphenator::$_defaultLanguage;
290 : }
291 : /**
292 : * This method gets the hyphenator-instance for the language <var>$language</var>
293 : *
294 : * If no instance exists, it is created and stored.
295 : *
296 : * @param string $language The language to use for hyphenating
297 : *
298 : * @return Org_Heigl_Hyphenator A Hyphenator-Object
299 : * @throws InvalidArgumentException
300 : */
301 : public static function getInstance ( $language = 'en' ) {
302 18 : $file = dirname ( __FILE__ )
303 : . DIRECTORY_SEPARATOR
304 18 : . 'Hyphenator'
305 18 : . DIRECTORY_SEPARATOR
306 18 : . 'files'
307 18 : . DIRECTORY_SEPARATOR
308 18 : . Org_Heigl_Hyphenator::getTexFile ( $language );
309 18 : $parsedFile = dirname ( __FILE__ )
310 : . DIRECTORY_SEPARATOR
311 18 : . 'Hyphenator'
312 18 : . DIRECTORY_SEPARATOR
313 18 : . 'parsedFiles'
314 18 : . DIRECTORY_SEPARATOR
315 18 : . $language
316 18 : . '.php';
317 18 : if ( ! file_exists ( $parsedFile ) ) {
318 0 : Org_Heigl_Hyphenator::parseTexFile ( $file, $parsedFile, $language );
319 0 : }
320 18 : if ( ! file_exists ( $parsedFile ) ) {
321 0 : throw new InvalidArgumentException( 'file ' . $language . '.php does not exist' );
322 : return false;
323 : }
324 18 : if ( ( count ( Org_Heigl_Hyphenator::$_store ) <= 0 ) ||
325 17 : ( ! array_key_exists ( $language, Org_Heigl_Hyphenator::$_store ) ) ||
326 16 : ( ! is_object ( Org_Heigl_Hyphenator::$_store[$language] ) )||
327 18 : ( ! Org_Heigl_Hyphenator::$_store[$language] instanceof Org_Heigl_Hyphenator ) ) {
328 : // Begin IF.
329 3 : Org_Heigl_Hyphenator::$_store[$language] = new Org_Heigl_Hyphenator($language);
330 3 : }
331 18 : return Org_Heigl_Hyphenator::$_store[$language];
332 : }
333 :
334 : /**
335 : * This method parses a TEX-Hyphenation file and creates the appropriate
336 : * PHP-Hyphenation file
337 : *
338 : * @param string $file The original TEX-File
339 : * @param string $parsedFile The PHP-File to be created
340 : *
341 : * @return boolean
342 : */
343 : public static function parseTexFile ( $file, $parsedFile ) {
344 1 : $fc = file_get_contents ( $file );
345 1 : $array = array ();
346 1 : if ( ! preg_match ( '/[\\n\\r]\\\\patterns\\{(.*)\\}\\s*\\\\/sim', $fc, $array ) ) {
347 0 : return false;
348 : }
349 1 : $fc = preg_replace ( '/%.*/', '', $array[1] );
350 1 : $fc = preg_replace ( '/\\\\n\\{(.+?)\\}/', '\1', $fc );
351 1 : $fc = preg_replace ( array('/"a/', '/"o/', '/"u/', '/\\./' ), array ( 'ä', 'ö', 'ü', '_' ), $fc );
352 1 : $array = preg_split ( '/\\s+/', $fc );
353 1 : $fh = fopen ( $parsedFile, 'w+' );
354 : $fileheader = '<?php
355 : /**
356 1 : * $'.'Id'.'$
357 : *
358 : * Copyright (c) 2008-2010 Andreas Heigl<andreas@heigl.org>
359 : *
360 : * Permission is hereby granted, free of charge, to any person obtaining a copy
361 : * of this software and associated documentation files (the "Software"), to deal
362 : * in the Software without restriction, including without limitation the rights
363 : * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
364 : * copies of the Software, and to permit persons to whom the Software is
365 : * furnished to do so, subject to the following conditions:
366 : *
367 : * The above copyright notice and this permission notice shall be included in
368 : * all copies or substantial portions of the Software.
369 : *
370 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
371 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
372 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
373 : * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
374 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
375 : * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
376 : * THE SOFTWARE.
377 : *
378 1 : * This file has been automaticly created from the file ' . basename ( $file ) . '
379 : * via the method Org_Heigl_Hyphenator::parseTexFile().
380 : *
381 : * DO NOT EDIT THIS FILE EXCEPT YOU KNOW WHAT YOU DO!!
382 : *
383 : * @category Org_Heigl
384 : * @package Org_Heigl_Hyphenator
385 : * @subpackage HyphenationFiles
386 : * @author Org_Heigl_Hyphenator
387 : * @copyright 2008-2010 Andreas Heigl<andreas@heigl.org>
388 : * @license http://www.opensource.org/licenses/mit-license.php MIT-License
389 : * @version 1.0
390 1 : * @since ' . date ( 'd.m.Y' ) . '
391 : */
392 1 : ';
393 1 : fwrite ( $fh, $fileheader );
394 1 : foreach ( $array as $pattern ) {
395 1 : if ( strpos ( $pattern, '\\' ) !== false ) {
396 1 : continue;
397 : }
398 1 : $patternstring = '';
399 1 : $patternint = '';
400 1 : $strlen = strlen ( $pattern );
401 1 : for ( $i = 0; $i < $strlen; $i++ ) {
402 1 : if ( ( ( $i ) <= $strlen ) && preg_match ( '/[0-9]/', substr ( $pattern, $i, 1 ) ) ) {
403 1 : $patternint .= substr ( $pattern, $i, 1 );
404 1 : } else {
405 1 : $patternint .= '0';
406 : }
407 1 : }
408 1 : $patternstring = preg_replace ( '/[0-9]/', '', $pattern );
409 1 : if ( $patternstring != '' ) {
410 1 : fwrite ( $fh, '$pattern[\'' . $patternstring . '\'] = \'' . $patternint . '\';' . "\n" );
411 1 : }
412 1 : }
413 1 : fwrite ( $fh, '?>' );
414 1 : fclose ( $fh );
415 1 : return true;
416 : }
417 :
418 : /**
419 : * This method returns the name of a TeX-Hyphenation file to a language code
420 : *
421 : * @param string $language The language code to get the to use
422 : *
423 : * @return string
424 : */
425 : public static function getTexFile ( $language ) {
426 18 : $files = array ( 'ba' => 'bahyph.tex',
427 18 : 'ca' => 'cahyph.tex',
428 18 : 'dk' => 'dkhyphen.tex',
429 18 : 'fi' => 'fi8hyph.tex',
430 18 : 'hu' => 'huhyph.tex',
431 18 : 'it' => 'ithyph.tex',
432 18 : 'no' => 'nohyphbc.tex',
433 18 : 'si' => 'sihyph23.tex',
434 18 : 'dk' => 'dkspecial.tex',
435 18 : 'fi' => 'fihyph.tex',
436 18 : 'hu' => 'huhyphn.tex',
437 18 : 'la' => 'lahyph.tex',
438 18 : ' ' => 'nohyphbx.tex',
439 18 : 'sk' => 'skhyph.tex',
440 18 : 'cz' => 'czhyph.tex',
441 18 : ' ' => 'dumyhyph.tex',
442 18 : 'fr' => 'frhyph.tex',
443 18 : 'en' => 'hyphen.tex',
444 18 : 'mn' => 'mnhyphen.tex',
445 18 : 'pl' => 'plhyph.tex',
446 18 : 'sk' => 'skhyph2e.tex',
447 18 : 'de' => 'dehyphn.tex',
448 18 : 'ee' => 'eehyph.tex',
449 18 : 'ga' => 'gahyph.tex',
450 18 : ' ' => 'hypht1.tex',
451 18 : 'ne' => 'nehyph.tex',
452 18 : 'pt' => 'pt8hyph.tex',
453 18 : 'sr' => 'srhyphc.tex',
454 18 : 'de_OLD' => 'dehypht.tex',
455 18 : 'eo' => 'eohyph.tex',
456 18 : 'gr' => 'grhyph.tex',
457 18 : 'ic' => 'icehyph.tex',
458 18 : ' ' => 'nohyph.tex',
459 18 : 'ro' => 'rohyphen.tex',
460 18 : 'tr' => 'trhyph.tex',
461 18 : 'dk' => 'dkcommon.tex',
462 18 : 'es' => 'eshyph.tex',
463 18 : 'hr' => 'hrhyph.tex',
464 18 : 'in' => 'inhyph.tex',
465 18 : ' ' => 'nohyphb.tex',
466 18 : 'se' => 'sehyph.tex',
467 18 : ' ' => 'zerohyph.tex',
468 18 : );
469 18 : if ( array_key_exists ( $language, $files ) ) {
470 11 : return $files[$language];
471 : }
472 7 : return $files['en'];
473 : }
474 :
475 : /**
476 : * Set an instance of Zend_Cache as Caching-Backend.
477 : *
478 : * @param Zend_Cache $cache The caching Backend
479 : *
480 : * @uses Zend_Cache
481 : * @link http://framework.zend.com/zend.cache.html
482 : * @return boolean
483 : */
484 : public static function setCache ( Zend_Cache $cache ) {
485 :
486 0 : Org_Heigl_Hyphenator::$_cache = $cache;
487 0 : return true;
488 : }
489 :
490 : /**
491 : * Get the cache-Object
492 : *
493 : * @return Zend_Cache
494 : */
495 : public static function getCache () {
496 0 : return Org_Heigl_Hyphenator::$_cache;
497 : }
498 :
499 : /**
500 : * This is the constructor, that initialises the hyphenator for the given
501 : * language <var>$language</var>
502 : *
503 : * This constructor is declared private to ensure, that it is only called
504 : * via the getInstance() method, so we only initialize the stuff only once
505 : * for each language.
506 : *
507 : * @param string $language The language to use for hyphenating
508 : *
509 : * @throws Exception
510 : */
511 : public function __construct ( $language = 'en' ) {
512 :
513 3 : $lang = array ( $language );
514 3 : $pos = strpos ( '_', $language );
515 3 : if ( false !== $pos ) {
516 0 : $lang [] = substr ( $language, 0, $pos );
517 0 : }
518 3 : foreach ( $lang as $language ) {
519 3 : $parsedFile = dirname ( __FILE__ )
520 : . DIRECTORY_SEPARATOR
521 3 : . 'Hyphenator'
522 3 : . DIRECTORY_SEPARATOR
523 3 : . 'parsedFiles'
524 3 : . DIRECTORY_SEPARATOR
525 3 : . $language
526 3 : . '.php';
527 :
528 3 : $this -> _language = $language;
529 : try {
530 3 : include_once $parsedFile;
531 3 : } catch ( Exception $e ) {
532 : throw new Exception ( 'File \'' . $parsedFile . '\' could not be found' );
533 : }
534 3 : }
535 3 : $this -> _pattern = $pattern;
536 :
537 3 : if ( null === $this -> _hyphen ) {
538 3 : $this -> _hyphen = chr ( 173 );
539 3 : }
540 3 : }
541 :
542 : /**
543 : * This method does the actual hyphenation.
544 : *
545 : * The given <var>$string</var> is splitted into chunks (i.e. Words) at
546 : * every blank.
547 : *
548 : * After that every chunk is hyphenated and the array of chunks is merged
549 : * into a single string using blanks again.
550 : *
551 : * This method does not take into account other word-delimiters than blanks
552 : * (eg. returns or tabstops) and it will fail with texts containing markup
553 : * in any way.
554 : *
555 : * @param string $string The string to hyphenate
556 : *
557 : * @return string The hyphenated string
558 : */
559 : public function hyphenate ( $string ) {
560 :
561 6 : $this -> _rawWord = array ();
562 : // If caching is enabled and the string is already cached, return the
563 : // cached version.
564 6 : if ( $this -> isCachingEnabled () ) {
565 0 : $result = $this -> cacheRead ( $string );
566 0 : if ( false !== $result ) {
567 0 : return $result;
568 : }
569 0 : }
570 6 : $array = explode ( ' ', $string );
571 6 : $size = count ( $array );
572 6 : for ( $i = 0; $i < $size; $i++ ) {
573 6 : $array[$i] = $this -> hyphenateWord ( $array[$i] );
574 6 : }
575 6 : $hyphenatedString = implode ( ' ', $array );
576 :
577 : // If caching is enabled, write the hyphenated string to the cache.
578 6 : if ( $this -> isCachingEnabled () ) {
579 0 : $this -> cacheWrite ( $string, $hyphenatedString );
580 0 : }
581 :
582 : // Return the hyphenated string.
583 6 : return $hyphenatedString;
584 : }
585 :
586 : /**
587 : * This method hyphenates a single word
588 : *
589 : * @param string $word The Word to hyphenate
590 : *
591 : * @return string the hyphenated word
592 : */
593 : public function hyphenateWord ( $word ) {
594 :
595 : // If the Word is empty, return an empty string.
596 12 : if ( '' === trim ( $word ) ) {
597 1 : return '';
598 : }
599 :
600 : // Replace a string that marks strings not to be hyphenated with an
601 : // empty string. Also replace all custom hyphenations, as the word shall
602 : // not be hyphenated.
603 : // Finaly return the word 'as is'.
604 11 : if ( ( null !== $this -> _noHyphenateString ) && ( 0 === strpos ( $word, $this -> _noHyphenateString ) ) ) {
605 2 : $string = str_replace ( $this -> _noHyphenateString, '', $word );
606 2 : $string = str_replace ( $this -> _customHyphen, '', $string );
607 2 : if ( null !== $this -> _customizedMarker && true === $this -> _markCustomized ) {
608 0 : $string = $this -> getCustomizationMarker () . $string;
609 0 : }
610 2 : return $string;
611 : }
612 :
613 : // If the length of the word is smaller than the minimum word-size,
614 : // return the word.
615 9 : if ( $this -> _wordMin > strlen ( $word ) ) {
616 4 : return $word;
617 : }
618 :
619 : // Character 173 is the unicode char 'Soft Hyphen' wich may not be
620 : // visible in some editors!
621 : // HTML-Entity for soft hyphenation is ­!
622 9 : if ( false !== strpos ( $word, '­' ) ) {
623 2 : return str_replace ( '­', $this -> _hyphen, $word );
624 : }
625 :
626 : // Replace a custom hyphenate-string with the hyphen.
627 8 : if ( ( null !== $this -> _customHyphen ) && ( false !== strpos ( $word, $this -> _customHyphen ) ) ) {
628 1 : $string = str_replace ( $this -> _customHyphen, $this -> _hyphen, $word );
629 1 : if ( null !== $this -> _customizedMarker && true === $this -> _markCustomized ) {
630 0 : $string = $this -> getCustomizationMarker () . $string;
631 0 : }
632 1 : return $string;
633 : }
634 :
635 : // If the word already contains a hyphen-character, we assume it is
636 : // already hyphenated and return the word 'as is'.
637 7 : if ( false !== strpos ( $word, $this -> _hyphen ) ) {
638 0 : return $word;
639 : }
640 :
641 7 : $breakPos = strpos ( $word, '-/-' );
642 7 : if ( false !== strpos ( $word, '-/-' ) ) {
643 : // Word contains '-/-', so put a zerowidthspace after it and hyphenate
644 : // the parts separated with '-'.
645 0 : $parts = explode ( '-/-', $word );
646 0 : $counter = count ( $parts );
647 0 : for ( $i = 0; $i < $counter; $i++ ) {
648 0 : $parts[$i] = $this -> hyphenateWord ( $parts[$i] );
649 0 : }
650 0 : return implode ( '-/-', $parts );
651 : }
652 7 : if ( false !== strpos ( $word, '-' ) ) {
653 : // Word contains '-', so put a zerowidthspace after it and hyphenate
654 : // the parts separated with '-'.
655 0 : $parts = explode ( '-', $word );
656 0 : $counter = count ( $parts );
657 0 : for ( $i = 0; $i < $counter; $i++ ) {
658 0 : $parts[$i] = $this -> hyphenateWord ( $parts[$i] );
659 0 : }
660 0 : return implode ( '-', $parts );
661 : }
662 :
663 : // And Finally the core hyphenation algorithm.
664 7 : $prepend = '';
665 7 : $word = $word;
666 7 : $append = '';
667 :
668 7 : $specials = '\.\:\-\,\;\!\?\/\\\(\)\[\]\{\}\"\'\+\*\#\§\$\%\&\=\@';
669 : // If a special character occurs in the middle of the word, simply
670 : // return the word AS IS.
671 7 : if ( preg_match ( '/[^' . $specials . ']['.$specials.'][^'.$specials.']/', $word ) ) {
672 0 : return $word;
673 : }
674 7 : if ( preg_match ( '/(['.$specials.']*)([^' . $specials . ']+)(['.$specials.']*)/', $word, $result ) ) {
675 7 : $prepend = $result [1];
676 7 : $word = $result [2];
677 7 : $append = $result [3];
678 7 : }
679 7 : $positions = array();
680 7 : $result = array();
681 7 : $w = '_' . strtolower ( $word ) . '_';
682 7 : $wl = strlen ( $w );
683 7 : for ( $i = 0; $i < $wl; $i++ ) {
684 7 : $positions[$i] = 0;
685 7 : }
686 7 : for ( $s = 0; $s < $wl -1; $s++ ) {
687 7 : $maxl = $wl - $s;
688 7 : $window = substr ( $w, $s );
689 7 : for ( $l = $this -> _shortestPattern; $l <= $maxl && $l <= $this -> _longestPattern; $l++ ) {
690 7 : $part = substr ( $window, 0, $l );
691 7 : $values = null;
692 7 : if ( array_key_exists ( $part, $this -> _pattern ) ) {
693 : // We found a pattern for this part.
694 7 : $values = (string) $this -> _pattern [$part];
695 7 : $i = $s;
696 7 : $v = null;
697 7 : $m = strlen ( $values );
698 7 : $corrector = 1;
699 7 : for ( $p = 0; $p < $m; $p++ ) {
700 7 : $v = substr ( $values, $p, 1 );
701 7 : $arrayKey = $i + $p - $corrector;
702 7 : if ( array_key_exists ( $arrayKey, $positions) && ( ( (int) $v > $positions[$arrayKey] ) ) && ( (int) $v <= $this -> _quality ) ) {
703 7 : $positions[$arrayKey] = (int) $v;
704 7 : }
705 7 : if ( $v > 0 ) {
706 7 : $corrector++;
707 7 : }
708 7 : }
709 7 : }
710 7 : }
711 7 : }
712 7 : $wl = strlen ( $word );
713 7 : $lastOne = 0;
714 7 : for ( $i = 1; $i < $wl; $i++ ) {
715 : // If the integer on position $i is higher than 0 and is odd,
716 : // we can hyphenate at that position if the integer is lower or
717 : // equal than the set quality-level.
718 : // Additionaly we check whether the left and right margins are met.
719 7 : if ( ( 0 !== $positions[$i] ) &&
720 7 : ( 1 === ( $positions[$i] % 2 ) ) &&
721 : // FIXME: This prohibits Hyphenation-Quality
722 : // ( $positions[$i] <= $this -> _quality ) &&
723 : // End Of FIXME!
724 7 : ( $i >= $this -> _leftMin ) &&
725 7 : ( $i <= ( strlen ( $word ) - $this -> _rightMin ) ) ) {
726 : // Begin IF.
727 7 : $sylable = substr ( $word, $lastOne, $i - $lastOne );
728 :
729 7 : $lastOne = $i;
730 7 : $result[] = $sylable;
731 7 : }
732 7 : }
733 7 : $result [] = substr ( $word, $lastOne );
734 7 : return $prepend . trim ( implode ( $this -> _hyphen, $result ) ) . $append;
735 : }
736 :
737 : /**
738 : * This method sets the Hyphenation-Character.
739 : *
740 : * @param string $char The Hyphenation Character
741 : *
742 : * @return Org_Heigl_Hyphenator Provides fluent Interface
743 : */
744 : public function setHyphen ( $char ) {
745 8 : $this -> _hyphen = (string) $char;
746 8 : return $this;
747 : }
748 :
749 : /**
750 : * Get the hyphenation character
751 : *
752 : * @return string
753 : */
754 : public function getHyphen () {
755 0 : return $this -> _hyphen;
756 : }
757 :
758 : /**
759 : * This method sets the minimum Characters, that have to stay to the left of
760 : * a hyphenation
761 : *
762 : * @param int $count The left minimum
763 : *
764 : * @return Org_Heigl_Hyphenator Provides fluent Interface
765 : */
766 : public function setLeftMin ( $count ) {
767 4 : $this -> _leftMin = (int) $count;
768 4 : return $this;
769 : }
770 :
771 : /**
772 : * This method sets the minimum Characters, that have to stay to the right of
773 : * a hyphenation
774 : *
775 : * @param int $count The minimmum characters
776 : *
777 : * @return Org_Heigl_Hyphenator Provides fluent Interface
778 : */
779 : public function setRightMin ( $count) {
780 4 : $this -> _rightMin = (int) $count;
781 4 : return $this;
782 : }
783 :
784 : /**
785 : * This method sets the minimum Characters a word has to have before being
786 : * hyphenated
787 : *
788 : * @param int $count The minimmum characters
789 : *
790 : * @return Org_Heigl_Hyphenator Provides fluent Interface
791 : */
792 : public function setWordMin ( $count) {
793 4 : $this -> _wordMin = (int) $count;
794 4 : return $this;
795 : }
796 :
797 : /**
798 : * This method sets the special Characters for a specified language
799 : *
800 : * @param string $chars The spechail characters
801 : *
802 : * @return Org_Heigl_Hyphenator Provides fluent Interface
803 : */
804 : public function setSpecialChars ( $chars ) {
805 0 : $this -> specialChars = $chars;
806 0 : return $this;
807 : }
808 :
809 : /**
810 : * Enable or disable caching of hyphenated texts
811 : *
812 : * @param boolean $caching Whether to enable caching or not. Defaults to
813 : * <var>true</var>
814 : *
815 : * @return Org_Heigl_Hyphenator
816 : */
817 : public function enableCaching ( $caching = true ) {
818 0 : $this -> _cachingEnabled = (bool) $caching;
819 :
820 0 : return $this;
821 : }
822 :
823 : /**
824 : * Check whether caching is enabled or not
825 : *
826 : * @return boolean
827 : */
828 : public function isCachingEnabled () {
829 6 : return (bool) $this -> _cachingEnabled;
830 : }
831 :
832 : /**
833 : * Write <var>string</var> to the cache.
834 : *
835 : * <var>string</var> can be retrieved using <var>key</var>
836 : *
837 : * @param string $key The key under which the string can be found in the cache
838 : * @param string $string The string to cache
839 : *
840 : * @return Org_Heigl_Hyphenator
841 : */
842 : public function cacheWrite ( $key, $string ) {
843 :
844 0 : $cache = Org_Heigl_Hyphenator::getCache ();
845 :
846 0 : if ( false === $this -> cacheRead ( $key ) ) {
847 :
848 0 : $cache -> save ( $string, $key );
849 0 : }
850 :
851 0 : return $this;
852 : }
853 :
854 : /**
855 : * Get the cached string to a key
856 : *
857 : * @param string $key The key to return a string to
858 : *
859 : * @return string
860 : */
861 : public function cacheRead ( $key ) {
862 :
863 0 : $cache = Org_Heigl_Hyphenator::getCache ();
864 :
865 0 : $result = $cache -> load ( $key );
866 :
867 0 : if ( ! $result ) {
868 0 : return false;
869 :
870 : }
871 0 : return $result;
872 : }
873 :
874 : /**
875 : * Set the quality that the Hyphenation needs to have minimum
876 : *
877 : * The lower the number, the better is the quality
878 : *
879 : * @param int $quality The quality-level to set
880 : *
881 : * @return Org_Heigl_Hyphenator
882 : */
883 : public function setQuality ( $quality = 5 ) {
884 4 : $this -> _quality = (int) $quality;
885 4 : return $this;
886 : }
887 :
888 : /**
889 : * Set a string that will be replaced with the soft-hyphen before
890 : * Hyphenation actualy starts.
891 : *
892 : * If this string is found in a word no hyphenation will be done except for
893 : * the place where the custom hyphen has been found
894 : *
895 : * @param string $customHyphen The Custom Hyphen to set
896 : *
897 : * @return Org_Heigl_Hyphenator
898 : */
899 : public function setCustomHyphen ( $customHyphen = null ) {
900 2 : $this -> _customHyphen = $customHyphen;
901 :
902 2 : return $this;
903 : }
904 :
905 : /**
906 : * Set a string that marks a words not to hyphenate
907 : *
908 : * @param string $marker THe Marker that marks a word
909 : *
910 : * @return Org_Heigl_Hyphenator
911 : */
912 : public function setNoHyphenateMarker ( $marker = null ) {
913 2 : $this -> _noHyphenateString = $marker;
914 :
915 2 : return $this;
916 : }
917 :
918 : /**
919 : * Get the marker for custom hyphenations
920 : *
921 : * @return string
922 : */
923 : public function getCustomMarker () {
924 0 : return (string) $this -> _customHyphen;
925 : }
926 :
927 : /**
928 : * Get the marker for Words not to hyphenate
929 : *
930 : * @return string
931 : */
932 : public function getNoHyphenMarker () {
933 0 : return (string) $this -> _noHyphenateString;
934 : }
935 :
936 : /**
937 : * Set and retrieve whether or not to mark custom hyphenations
938 : *
939 : * This method always returns the current setting, so you can set AND
940 : * retrieve the value with this method.
941 : *
942 : * @param null|booelan $mark Whether or not to mark
943 : *
944 : * @return boolean
945 : */
946 : public function markCustomization ( $mark = null ) {
947 0 : if ( null !== $mark ) {
948 0 : $this -> _markCustomized = (bool) $mark;
949 0 : }
950 0 : return (bool) $this -> _markCustomized;
951 : }
952 :
953 : /**
954 : * Set the string that shall be prepend to a customized word.
955 : *
956 : * @param string $marker The Marker to set
957 : *
958 : * @return Org_Heigl_Hyphenator
959 : */
960 : public function setCustomizationMarker ( $marker ) {
961 0 : $this -> _customizedMarker = (string) $marker;
962 0 : return $this;
963 : }
964 :
965 : /**
966 : * Get the string that shall be prepend to a customized word.
967 : *
968 : * @return string
969 : */
970 : public function getCustomizationMarker () {
971 0 : return (string) $this -> _customizedMarker;
972 : }
973 : }
|