From 574ad003967853e43eb1512173c04e6759537d96 Mon Sep 17 00:00:00 2001 From: Andreas Schamberger Date: Sat, 22 Oct 2011 11:25:26 +0200 Subject: [PATCH] mime parser --- src/BeSimple/SoapCommon/Mime/MultiPart.php | 180 +++++++++++++++++++ src/BeSimple/SoapCommon/Mime/Parser.php | 183 ++++++++++++++++++++ src/BeSimple/SoapCommon/Mime/Part.php | 165 ++++++++++++++++++ src/BeSimple/SoapCommon/Mime/PartHeader.php | 114 ++++++++++++ 4 files changed, 642 insertions(+) create mode 100644 src/BeSimple/SoapCommon/Mime/MultiPart.php create mode 100644 src/BeSimple/SoapCommon/Mime/Parser.php create mode 100644 src/BeSimple/SoapCommon/Mime/Part.php create mode 100644 src/BeSimple/SoapCommon/Mime/PartHeader.php diff --git a/src/BeSimple/SoapCommon/Mime/MultiPart.php b/src/BeSimple/SoapCommon/Mime/MultiPart.php new file mode 100644 index 0000000..6267b77 --- /dev/null +++ b/src/BeSimple/SoapCommon/Mime/MultiPart.php @@ -0,0 +1,180 @@ + + * (c) Francis Besset + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace BeSimple\SoapCommon; + +/** + * Mime multi part container. + * + * Headers: + * - MIME-Version + * - Content-Type + * - Content-ID + * - Content-Location + * - Content-Description + * + * @author Andreas Schamberger + */ +class MultiPart extends PartHeader +{ + /** + * Content-ID of main part. + * + * @var string + */ + protected $mainPartContentId; + + /** + * Mime parts. + * + * @var array(\BeSimple\SoapCommon\Mime\Part) + */ + protected $parts = array(); + + /** + * Construct new mime object. + * + * @param string $boundary Boundary string + * @return void + */ + public function __construct($boundary = null) + { + $this->setHeader('MIME-Version', '1.0'); + $this->setHeader('Content-Type', 'multipart/related'); + $this->setHeader('Content-Type', 'type', 'text/xml'); + $this->setHeader('Content-Type', 'charset', 'utf-8'); + if (is_null($boundary)) { + $boundary = $this->generateBoundary(); + } + $this->setHeader('Content-Type', 'boundary', $boundary); + } + + /** + * Get mime message of this object (without headers). + * + * @param boolean $withHeaders Returned mime message contains headers + * @return string + */ + public function getMimeMessage($withHeaders = false) + { + $message = ($withHeaders === true) ? $this->generateHeaders() : ""; + // add parts + foreach ($this->parts as $part) { + $message .= "\r\n" . '--' . $this->getHeader('Content-Type', 'boundary') . "\r\n"; + $message .= $part->getMessagePart(); + } + $message .= "\r\n" . '--' . $this->getHeader('Content-Type', 'boundary') . '--'; + return $message; + } + + /** + * Get string array with MIME headers for usage in HTTP header (with CURL). + * + * @return arrray(string) + */ + public function getHeadersForHttp() + { + $allowed = array( + 'Content-Type', + 'Content-Description', + ); + $headers = array(); + foreach ($this->headers as $fieldName => $value) { + if (in_array($fieldName, $allowed)) { + $fieldValue = ''; + if (is_array($value)) { + if (isset($value['@'])) { + $fieldValue .= $value['@']; + } + foreach ($value as $subName => $subValue) { + if ($subName != '@') { + $fieldValue .= '; ' . $subName . '="' . $subValue . '"'; + } + } + } else { + $fieldValue .= $value; + } + // for http only ISO-8859-1 + $headers[] = $fieldName . ': '. iconv('utf-8', 'ISO-8859-1//TRANSLIT', $fieldValue); + } + } + return $headers; + } + + /** + * Add new part to MIME message. + * + * @param \BeSimple\SoapCommon\Mime\Part $part Part that is added + * @param boolean $isMain Is the given part the main part of mime message + * @return void + */ + public function addPart(Part $part, $isMain = false) + { + $contentId = trim($part->getHeader('Content-ID'), '<>'); + if ($isMain === true) { + $this->mainPartContentId = $contentId; + $this->setHeader('Content-Type', 'start', $part->getHeader('Content-ID')); + } + $this->parts[$contentId] = $part; + } + + /** + * Get part with given content id. If there is no content id given it + * returns the main part that is defined through the content-id start + * parameter. + * + * @param string $contentId Content id of desired part + * @return \BeSimple\SoapCommon\Mime\Part|null + */ + public function getPart($contentId = null) + { + if (is_null($contentId)) { + $contentId = $this->mainPartContentId; + } + if (isset($this->parts[$contentId])) { + return $this->parts[$contentId]; + } + return null; + } + + /** + * Get all parts. + * + * @param boolean $includeMainPart Should main part be in result set + * @return array(\BeSimple\SoapCommon\Mime\Part) + */ + public function getParts($includeMainPart = false) + { + if ($includeMainPart === true) { + $parts = $this->parts; + } else { + $parts = array(); + foreach ($this->parts as $cid => $part) { + if ($cid != $this->mainPartContentId) { + $parts[$cid] = $part; + } + } + } + return $parts; + } + + /** + * Returns a unique boundary string. + * + * @return string + */ + protected function generateBoundary() + { + // TODO + return 'urn:uuid:' . \ass\Soap\Helper::generateUUID(); + } +} \ No newline at end of file diff --git a/src/BeSimple/SoapCommon/Mime/Parser.php b/src/BeSimple/SoapCommon/Mime/Parser.php new file mode 100644 index 0000000..c7b815f --- /dev/null +++ b/src/BeSimple/SoapCommon/Mime/Parser.php @@ -0,0 +1,183 @@ + + * (c) Francis Besset + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace BeSimple\SoapCommon; + +/** + * Simple Multipart-Mime parser. + * + * @author Andreas Schamberger + */ +class Parser +{ + /** + * Parse the given Mime-Message and return a \BeSimple\SoapCommon\Mime\MultiPart object. + * + * @param string $mimeMessage Mime message string + * @param array(string=>string) $headers Array of header elements (e.g. coming from http request) + * @return \BeSimple\SoapCommon\Mime\MultiPart + */ + public static function parseMimeMessage($mimeMessage, array $headers = array()) + { + $boundary = null; + $start = null; + $multipart = new MultiPart(); + // add given headers, e.g. coming from HTTP headers + if (count($headers) > 0) { + foreach ($headers as $name => $value) { + if ($name == 'Content-Type') { + self::parseContentTypeHeader($multipart, $name, $value); + $boundary = $multipart->getHeader('Content-Type', 'boundary'); + $start = $multipart->getHeader('Content-Type', 'start'); + } else { + $multipart->setHeader($name, $value); + } + } + } + $hitFirstBoundary = false; + $inHeader = true; + $content = ''; + $currentPart = $multipart; + $lines = preg_split("/\r\n|\n/", $mimeMessage); + foreach ($lines as $line) { + // ignore http status code + if (substr($line, 0, 5) == 'HTTP/') { + continue; + } + if (isset($currentHeader)) { + if (isset($line[0]) && ($line[0] === ' ' || $line[0] === "\t")) { + $currentHeader .= $line; + continue; + } + list($headerName, $headerValue) = explode(':', $currentHeader, 2); + $headerValue = iconv_mime_decode($headerValue, 0, 'utf-8'); + if (strpos($headerValue, ';') !== false) { + self::parseContentTypeHeader($currentPart, $headerName, $headerValue); + $boundary = $multipart->getHeader('Content-Type', 'boundary'); + $start = $multipart->getHeader('Content-Type', 'start'); + } else { + $currentPart->setHeader($headerName, trim($headerValue)); + } + unset($currentHeader); + } + if ($inHeader) { + if ($line == '') { + $inHeader = false; + continue; + } + $currentHeader = $line; + continue; + } else { + // check if we hit any of the boundaries + if (strlen($line) > 0 && $line[0] == "-") { + if (strcmp(trim($line), '--' . $boundary) === 0) { + if ($currentPart instanceof Part) { + $content = iconv_substr($content, 0, -2, 'utf-8'); + self::decodeContent($currentPart, $content); + // check if there is a start parameter given, if not set first part + $isMain = (is_null($start) || $start == $currentPart->getHeader('Content-ID')) ? true : false; + if ($isMain === true) { + $start = $currentPart->getHeader('Content-ID'); + } + $multipart->addPart($currentPart, $isMain); + } + $currentPart = new Part(); + $hitFirstBoundary = true; + $inHeader = true; + $content = ''; + } elseif (strcmp(trim($line), '--' . $boundary . '--') === 0) { + $content = iconv_substr($content, 0, -2, 'utf-8'); + self::decodeContent($currentPart, $content); + // check if there is a start parameter given, if not set first part + $isMain = (is_null($start) || $start == $currentPart->getHeader('Content-ID')) ? true : false; + if ($isMain === true) { + $start = $currentPart->getHeader('Content-ID'); + } + $multipart->addPart($currentPart, $isMain); + $content = ''; + } + } else { + if ($hitFirstBoundary === false) { + if ($line != '') { + $inHeader = true; + $currentHeader = $line; + continue; + } + } + $content .= $line . "\r\n"; + } + } + + } + return $multipart; + } + + /** + * Parse a "Content-Type" header with multiple sub values. + * e.g. Content-Type: Multipart/Related; boundary=boundary; type=text/xml; + * start="<123@abc>" + * + * @see https://labs.omniti.com/alexandria/trunk/OmniTI/Mail/Parser.php + * + * @param \BeSimple\SoapCommon\Mime\PartHeader $part Header part + * @param string $headerName Header name + * @param string $headerValue Header value + * @return null + */ + private static function parseContentTypeHeader(PartHeader $part, $headerName, $headerValue) + { + list($value, $remainder) = explode(';', $headerValue, 2); + $value = trim($value); + $part->setHeader($headerName, $value); + $remainder = trim($remainder); + while (strlen($remainder) > 0) { + if (!preg_match('/^([a-zA-Z0-9_-]+)=(.)/', $remainder, $matches)) { + break; + } + $name = $matches[1]; + $delimiter = $matches[2]; + $pattern = '/(\S+)(\s|$)/'; + $remainder = substr($remainder, strlen($name)+1); + if (!preg_match($pattern, $remainder, $matches)) { + break; + } + $value = rtrim($matches[1], ';'); + if ($delimiter == '\'' || $delimiter == '"') { + $value = trim($value, $delimiter); + } + $part->setHeader($headerName, $name, $value); + $remainder = substr($remainder, strlen($matches[0])); + } + } + + /** + * Decodes the content of a Mime part. + * + * @param \BeSimple\SoapCommon\Mime\Part $part Part to add content + * @param string $content Content to decode + * @return null + */ + private static function decodeContent(Part $part, $content) + { + $encoding = strtolower($part->getHeader('Content-Transfer-Encoding')); + $charset = strtolower($part->getHeader('Content-Type', 'charset')); + if ($encoding == Part::ENCODING_BASE64) { + $content = base64_decode($content); + } elseif ($encoding == Part::ENCODING_QUOTED_PRINTABLE) { + $content = quoted_printable_decode($content); + } + if ($charset != 'utf-8') { + $content = iconv($charset, 'utf-8', $content); + } + $part->setContent($content); + } +} \ No newline at end of file diff --git a/src/BeSimple/SoapCommon/Mime/Part.php b/src/BeSimple/SoapCommon/Mime/Part.php new file mode 100644 index 0000000..45065ab --- /dev/null +++ b/src/BeSimple/SoapCommon/Mime/Part.php @@ -0,0 +1,165 @@ + + * (c) Francis Besset + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace BeSimple\SoapCommon; + +/** + * Mime part. Everything must be UTF-8. Default charset for text is UTF-8. + * + * Headers: + * - Content-Type + * - Content-Transfer-Encoding + * - Content-ID + * - Content-Location + * - Content-Description + * + * @author Andreas Schamberger + */ +class Part extends PartHeader +{ + /** + * Encoding type base 64 + */ + const ENCODING_BASE64 = 'base64'; + + /** + * Encoding type binary + */ + const ENCODING_BINARY = 'binary'; + + /** + * Encoding type eight bit + */ + const ENCODING_EIGHT_BIT = '8bit'; + + /** + * Encoding type seven bit + */ + const ENCODING_SEVEN_BIT = '7bit'; + + /** + * Encoding type quoted printable + */ + const ENCODING_QUOTED_PRINTABLE = 'quoted-printable'; + + /** + * Content. + * + * @var mixed + */ + protected $content; + + /** + * Construct new mime object. + * + * @param mixed $content Content + * @param string $contentType Content type + * @param string $charset Charset + * @param string $encoding Encoding + * @param string $contentId Content id + * @return void + */ + public function __construct($content = null, $contentType = 'application/octet-stream', $charset = null, $encoding = self::ENCODING_BINARY, $contentId = null) + { + $this->content = $content; + $this->setHeader('Content-Type', $contentType); + if (!is_null($charset)) { + $this->setHeader('Content-Type', 'charset', $charset); + } else { // if (substr($contentType, 0, 4) == 'text') { + $this->setHeader('Content-Type', 'charset', 'utf-8'); + } + $this->setHeader('Content-Transfer-Encoding', $encoding); + if (is_null($contentId)) { + $contentId = $this->generateContentId(); + } + $this->setHeader('Content-ID', '<' . $contentId . '>'); + } + + /** + * __toString. + * + * @return mixed + */ + public function __toString() + { + return $this->content; + } + + /** + * Get mime content. + * + * @return mixed + */ + public function getContent() + { + return $this->content; + } + + /** + * Set mime content. + * + * @param mixed $content Content to set + * @return void + */ + public function setContent($content) + { + $this->content = $content; + } + + /** + * Get complete mime message of this object. + * + * @return string + */ + public function getMessagePart() + { + return $this->generateHeaders() . "\r\n" . $this->generateBody(); + } + + /** + * Generate body. + * + * @return string + */ + protected function generateBody() + { + $encoding = strtolower($this->getHeader('Content-Transfer-Encoding')); + $charset = strtolower($this->getHeader('Content-Type', 'charset')); + if ($charset != 'utf-8') { + $content = iconv('utf-8', $charset . '//TRANSLIT', $this->content); + } else { + $content = $this->content; + } + switch ($encoding) { + case self::ENCODING_BASE64: + return substr(chunk_split(base64_encode($content), 76, "\r\n"), -2); + case self::ENCODING_QUOTED_PRINTABLE: + return quoted_printable_encode($content); + case self::ENCODING_BINARY: + case self::ENCODING_SEVEN_BIT: + case self::ENCODING_EIGHT_BIT: + default: + return preg_replace("/\r\n|\r|\n/", "\r\n", $content); + } + } + + /** + * Returns a unique ID to be used for the Content-ID header. + * + * @return string + */ + protected function generateContentId() + { + // TODO + return 'urn:uuid:' . \ass\Soap\Helper::generateUUID(); + } +} \ No newline at end of file diff --git a/src/BeSimple/SoapCommon/Mime/PartHeader.php b/src/BeSimple/SoapCommon/Mime/PartHeader.php new file mode 100644 index 0000000..4dc3d06 --- /dev/null +++ b/src/BeSimple/SoapCommon/Mime/PartHeader.php @@ -0,0 +1,114 @@ + + * (c) Francis Besset + * + * This source file is subject to the MIT license that is bundled + * with this source code in the file LICENSE. + */ + +namespace BeSimple\SoapCommon; + +/** + * Mime part base class. + * + * @author Andreas Schamberger + */ +abstract class PartHeader +{ + /** + * Mime headers. + * + * @var array(string=>mixed|array(mixed)) + */ + protected $headers = array(); + + /** + * Add a new header to the mime part. + * + * @param string $name Header name + * @param string $value Header value + * @param string $subValue Is sub value? + * @return void + */ + public function setHeader($name, $value, $subValue = null) + { + if (isset($this->headers[$name]) && !is_null($subValue)) { + if (!is_array($this->headers[$name])) { + $this->headers[$name] = array( + '@' => $this->headers[$name], + $value => $subValue, + ); + } else { + $this->headers[$name][$value] = $subValue; + } + } elseif (isset($this->headers[$name]) && is_array($this->headers[$name]) && isset($this->headers[$name]['@'])) { + $this->headers[$name]['@'] = $value; + } else { + $this->headers[$name] = $value; + } + } + + /** + * Get given mime header. + * + * @param string $name Header name + * @param string $subValue Sub value name + * @return mixed|array(mixed) + */ + public function getHeader($name, $subValue = null) + { + if (isset($this->headers[$name])) { + if (!is_null($subValue)) { + if (is_array($this->headers[$name]) && isset($this->headers[$name][$subValue])) { + return $this->headers[$name][$subValue]; + } else { + return null; + } + } elseif (is_array($this->headers[$name]) && isset($this->headers[$name]['@'])) { + return $this->headers[$name]['@']; + } else { + return $this->headers[$name]; + } + } + return null; + } + + /** + * Generate headers. + * + * @return string + */ + protected function generateHeaders() + { + $charset = strtolower($this->getHeader('Content-Type', 'charset')); + $preferences = array( + 'scheme' => 'Q', + 'input-charset' => 'utf-8', + 'output-charset' => $charset, + ); + $headers = ''; + foreach ($this->headers as $fieldName => $value) { + $fieldValue = ''; + if (is_array($value)) { + if (isset($value['@'])) { + $fieldValue .= $value['@']; + } + foreach ($value as $subName => $subValue) { + if ($subName != '@') { + $fieldValue .= '; ' . $subName . '=' . $subValue; + } + } + } else { + $fieldValue .= $value; + } + // do not use proper encoding as Apache Axis does not understand this + // $headers .= iconv_mime_encode($field_name, $field_value, $preferences) . "\r\n"; + $headers .= $fieldName . ': ' . $fieldValue . "\r\n"; + } + return $headers; + } +} \ No newline at end of file