utf8_encoder.class.php 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. <?php
  2. /**
  3. * Encode from another encoding to UTF8:
  4. *
  5. * - add BOM
  6. * - change encoding
  7. * - convert html entities if turned on
  8. *
  9. * Note:
  10. *
  11. * Convert_html_entities cannot but turned on by default. This would be bad
  12. * for performances but more than anything else it may be perfectly valid to write
  13. * html entities wihtout transformation - i.e. when writing html content.
  14. *
  15. * It may be better to move convert_html_entities to its own converter and to chain
  16. * converters together to achieve the same result.
  17. *
  18. * @copyright (c) 2012 University of Geneva
  19. * @license GNU General Public License - http://www.gnu.org/copyleft/gpl.html
  20. * @author Laurent Opprecht <laurent@opprecht.info>
  21. */
  22. class Utf8Encoder extends Converter
  23. {
  24. protected $started = false;
  25. protected $from_encoding;
  26. protected $encoding_converter;
  27. protected $convert_html_entities = false;
  28. function __construct($from_encoding = null , $convert_html_entities = false)
  29. {
  30. $this->from_encoding = $from_encoding ? $from_encoding : Encoding::system();
  31. $this->encoding_converter = EncodingConverter::create($this->from_encoding, Utf8::NAME);
  32. $this->convert_html_entities = $convert_html_entities;
  33. $this->reset();
  34. }
  35. function from_encoding()
  36. {
  37. return $this->from_encoding;
  38. }
  39. function to_encoding()
  40. {
  41. return Utf8::NAME;
  42. }
  43. function get_convert_html_entities()
  44. {
  45. return $this->convert_html_entities;
  46. }
  47. function reset()
  48. {
  49. $this->started = false;
  50. }
  51. function convert($string)
  52. {
  53. if ($this->convert_html_entities) {
  54. $string = html_entity_decode($string, ENT_COMPAT, Utf8::NAME);
  55. }
  56. $string = $this->encoding_converter->convert($string);
  57. if (!$this->started) {
  58. $this->started = true;
  59. $string = Utf8::BOM . $string;
  60. }
  61. return $string;
  62. }
  63. }