mime parser

This commit is contained in:
Andreas Schamberger 2011-10-22 11:25:26 +02:00
parent f95a5177b1
commit 574ad00396
4 changed files with 642 additions and 0 deletions

View File

@ -0,0 +1,180 @@
<?php
/*
* This file is part of BeSimpleSoapCommon.
*
* (c) Christian Kerl <christian-kerl@web.de>
* (c) Francis Besset <francis.besset@gmail.com>
*
* This source file is subject to the MIT license that is bundled
* with this source code in the file LICENSE.
*/
namespace BeSimple\SoapCommon;
/**
* Mime multi part container.
*
* Headers:
* - MIME-Version
* - Content-Type
* - Content-ID
* - Content-Location
* - Content-Description
*
* @author Andreas Schamberger <mail@andreass.net>
*/
class MultiPart extends PartHeader
{
/**
* Content-ID of main part.
*
* @var string
*/
protected $mainPartContentId;
/**
* Mime parts.
*
* @var array(\BeSimple\SoapCommon\Mime\Part)
*/
protected $parts = array();
/**
* Construct new mime object.
*
* @param string $boundary Boundary string
* @return void
*/
public function __construct($boundary = null)
{
$this->setHeader('MIME-Version', '1.0');
$this->setHeader('Content-Type', 'multipart/related');
$this->setHeader('Content-Type', 'type', 'text/xml');
$this->setHeader('Content-Type', 'charset', 'utf-8');
if (is_null($boundary)) {
$boundary = $this->generateBoundary();
}
$this->setHeader('Content-Type', 'boundary', $boundary);
}
/**
* Get mime message of this object (without headers).
*
* @param boolean $withHeaders Returned mime message contains headers
* @return string
*/
public function getMimeMessage($withHeaders = false)
{
$message = ($withHeaders === true) ? $this->generateHeaders() : "";
// add parts
foreach ($this->parts as $part) {
$message .= "\r\n" . '--' . $this->getHeader('Content-Type', 'boundary') . "\r\n";
$message .= $part->getMessagePart();
}
$message .= "\r\n" . '--' . $this->getHeader('Content-Type', 'boundary') . '--';
return $message;
}
/**
* Get string array with MIME headers for usage in HTTP header (with CURL).
*
* @return arrray(string)
*/
public function getHeadersForHttp()
{
$allowed = array(
'Content-Type',
'Content-Description',
);
$headers = array();
foreach ($this->headers as $fieldName => $value) {
if (in_array($fieldName, $allowed)) {
$fieldValue = '';
if (is_array($value)) {
if (isset($value['@'])) {
$fieldValue .= $value['@'];
}
foreach ($value as $subName => $subValue) {
if ($subName != '@') {
$fieldValue .= '; ' . $subName . '="' . $subValue . '"';
}
}
} else {
$fieldValue .= $value;
}
// for http only ISO-8859-1
$headers[] = $fieldName . ': '. iconv('utf-8', 'ISO-8859-1//TRANSLIT', $fieldValue);
}
}
return $headers;
}
/**
* Add new part to MIME message.
*
* @param \BeSimple\SoapCommon\Mime\Part $part Part that is added
* @param boolean $isMain Is the given part the main part of mime message
* @return void
*/
public function addPart(Part $part, $isMain = false)
{
$contentId = trim($part->getHeader('Content-ID'), '<>');
if ($isMain === true) {
$this->mainPartContentId = $contentId;
$this->setHeader('Content-Type', 'start', $part->getHeader('Content-ID'));
}
$this->parts[$contentId] = $part;
}
/**
* Get part with given content id. If there is no content id given it
* returns the main part that is defined through the content-id start
* parameter.
*
* @param string $contentId Content id of desired part
* @return \BeSimple\SoapCommon\Mime\Part|null
*/
public function getPart($contentId = null)
{
if (is_null($contentId)) {
$contentId = $this->mainPartContentId;
}
if (isset($this->parts[$contentId])) {
return $this->parts[$contentId];
}
return null;
}
/**
* Get all parts.
*
* @param boolean $includeMainPart Should main part be in result set
* @return array(\BeSimple\SoapCommon\Mime\Part)
*/
public function getParts($includeMainPart = false)
{
if ($includeMainPart === true) {
$parts = $this->parts;
} else {
$parts = array();
foreach ($this->parts as $cid => $part) {
if ($cid != $this->mainPartContentId) {
$parts[$cid] = $part;
}
}
}
return $parts;
}
/**
* Returns a unique boundary string.
*
* @return string
*/
protected function generateBoundary()
{
// TODO
return 'urn:uuid:' . \ass\Soap\Helper::generateUUID();
}
}

View File

@ -0,0 +1,183 @@
<?php
/*
* This file is part of BeSimpleSoapCommon.
*
* (c) Christian Kerl <christian-kerl@web.de>
* (c) Francis Besset <francis.besset@gmail.com>
*
* This source file is subject to the MIT license that is bundled
* with this source code in the file LICENSE.
*/
namespace BeSimple\SoapCommon;
/**
* Simple Multipart-Mime parser.
*
* @author Andreas Schamberger <mail@andreass.net>
*/
class Parser
{
/**
* Parse the given Mime-Message and return a \BeSimple\SoapCommon\Mime\MultiPart object.
*
* @param string $mimeMessage Mime message string
* @param array(string=>string) $headers Array of header elements (e.g. coming from http request)
* @return \BeSimple\SoapCommon\Mime\MultiPart
*/
public static function parseMimeMessage($mimeMessage, array $headers = array())
{
$boundary = null;
$start = null;
$multipart = new MultiPart();
// add given headers, e.g. coming from HTTP headers
if (count($headers) > 0) {
foreach ($headers as $name => $value) {
if ($name == 'Content-Type') {
self::parseContentTypeHeader($multipart, $name, $value);
$boundary = $multipart->getHeader('Content-Type', 'boundary');
$start = $multipart->getHeader('Content-Type', 'start');
} else {
$multipart->setHeader($name, $value);
}
}
}
$hitFirstBoundary = false;
$inHeader = true;
$content = '';
$currentPart = $multipart;
$lines = preg_split("/\r\n|\n/", $mimeMessage);
foreach ($lines as $line) {
// ignore http status code
if (substr($line, 0, 5) == 'HTTP/') {
continue;
}
if (isset($currentHeader)) {
if (isset($line[0]) && ($line[0] === ' ' || $line[0] === "\t")) {
$currentHeader .= $line;
continue;
}
list($headerName, $headerValue) = explode(':', $currentHeader, 2);
$headerValue = iconv_mime_decode($headerValue, 0, 'utf-8');
if (strpos($headerValue, ';') !== false) {
self::parseContentTypeHeader($currentPart, $headerName, $headerValue);
$boundary = $multipart->getHeader('Content-Type', 'boundary');
$start = $multipart->getHeader('Content-Type', 'start');
} else {
$currentPart->setHeader($headerName, trim($headerValue));
}
unset($currentHeader);
}
if ($inHeader) {
if ($line == '') {
$inHeader = false;
continue;
}
$currentHeader = $line;
continue;
} else {
// check if we hit any of the boundaries
if (strlen($line) > 0 && $line[0] == "-") {
if (strcmp(trim($line), '--' . $boundary) === 0) {
if ($currentPart instanceof Part) {
$content = iconv_substr($content, 0, -2, 'utf-8');
self::decodeContent($currentPart, $content);
// check if there is a start parameter given, if not set first part
$isMain = (is_null($start) || $start == $currentPart->getHeader('Content-ID')) ? true : false;
if ($isMain === true) {
$start = $currentPart->getHeader('Content-ID');
}
$multipart->addPart($currentPart, $isMain);
}
$currentPart = new Part();
$hitFirstBoundary = true;
$inHeader = true;
$content = '';
} elseif (strcmp(trim($line), '--' . $boundary . '--') === 0) {
$content = iconv_substr($content, 0, -2, 'utf-8');
self::decodeContent($currentPart, $content);
// check if there is a start parameter given, if not set first part
$isMain = (is_null($start) || $start == $currentPart->getHeader('Content-ID')) ? true : false;
if ($isMain === true) {
$start = $currentPart->getHeader('Content-ID');
}
$multipart->addPart($currentPart, $isMain);
$content = '';
}
} else {
if ($hitFirstBoundary === false) {
if ($line != '') {
$inHeader = true;
$currentHeader = $line;
continue;
}
}
$content .= $line . "\r\n";
}
}
}
return $multipart;
}
/**
* Parse a "Content-Type" header with multiple sub values.
* e.g. Content-Type: Multipart/Related; boundary=boundary; type=text/xml;
* start="<123@abc>"
*
* @see https://labs.omniti.com/alexandria/trunk/OmniTI/Mail/Parser.php
*
* @param \BeSimple\SoapCommon\Mime\PartHeader $part Header part
* @param string $headerName Header name
* @param string $headerValue Header value
* @return null
*/
private static function parseContentTypeHeader(PartHeader $part, $headerName, $headerValue)
{
list($value, $remainder) = explode(';', $headerValue, 2);
$value = trim($value);
$part->setHeader($headerName, $value);
$remainder = trim($remainder);
while (strlen($remainder) > 0) {
if (!preg_match('/^([a-zA-Z0-9_-]+)=(.)/', $remainder, $matches)) {
break;
}
$name = $matches[1];
$delimiter = $matches[2];
$pattern = '/(\S+)(\s|$)/';
$remainder = substr($remainder, strlen($name)+1);
if (!preg_match($pattern, $remainder, $matches)) {
break;
}
$value = rtrim($matches[1], ';');
if ($delimiter == '\'' || $delimiter == '"') {
$value = trim($value, $delimiter);
}
$part->setHeader($headerName, $name, $value);
$remainder = substr($remainder, strlen($matches[0]));
}
}
/**
* Decodes the content of a Mime part.
*
* @param \BeSimple\SoapCommon\Mime\Part $part Part to add content
* @param string $content Content to decode
* @return null
*/
private static function decodeContent(Part $part, $content)
{
$encoding = strtolower($part->getHeader('Content-Transfer-Encoding'));
$charset = strtolower($part->getHeader('Content-Type', 'charset'));
if ($encoding == Part::ENCODING_BASE64) {
$content = base64_decode($content);
} elseif ($encoding == Part::ENCODING_QUOTED_PRINTABLE) {
$content = quoted_printable_decode($content);
}
if ($charset != 'utf-8') {
$content = iconv($charset, 'utf-8', $content);
}
$part->setContent($content);
}
}

View File

@ -0,0 +1,165 @@
<?php
/*
* This file is part of BeSimpleSoapCommon.
*
* (c) Christian Kerl <christian-kerl@web.de>
* (c) Francis Besset <francis.besset@gmail.com>
*
* This source file is subject to the MIT license that is bundled
* with this source code in the file LICENSE.
*/
namespace BeSimple\SoapCommon;
/**
* Mime part. Everything must be UTF-8. Default charset for text is UTF-8.
*
* Headers:
* - Content-Type
* - Content-Transfer-Encoding
* - Content-ID
* - Content-Location
* - Content-Description
*
* @author Andreas Schamberger <mail@andreass.net>
*/
class Part extends PartHeader
{
/**
* Encoding type base 64
*/
const ENCODING_BASE64 = 'base64';
/**
* Encoding type binary
*/
const ENCODING_BINARY = 'binary';
/**
* Encoding type eight bit
*/
const ENCODING_EIGHT_BIT = '8bit';
/**
* Encoding type seven bit
*/
const ENCODING_SEVEN_BIT = '7bit';
/**
* Encoding type quoted printable
*/
const ENCODING_QUOTED_PRINTABLE = 'quoted-printable';
/**
* Content.
*
* @var mixed
*/
protected $content;
/**
* Construct new mime object.
*
* @param mixed $content Content
* @param string $contentType Content type
* @param string $charset Charset
* @param string $encoding Encoding
* @param string $contentId Content id
* @return void
*/
public function __construct($content = null, $contentType = 'application/octet-stream', $charset = null, $encoding = self::ENCODING_BINARY, $contentId = null)
{
$this->content = $content;
$this->setHeader('Content-Type', $contentType);
if (!is_null($charset)) {
$this->setHeader('Content-Type', 'charset', $charset);
} else { // if (substr($contentType, 0, 4) == 'text') {
$this->setHeader('Content-Type', 'charset', 'utf-8');
}
$this->setHeader('Content-Transfer-Encoding', $encoding);
if (is_null($contentId)) {
$contentId = $this->generateContentId();
}
$this->setHeader('Content-ID', '<' . $contentId . '>');
}
/**
* __toString.
*
* @return mixed
*/
public function __toString()
{
return $this->content;
}
/**
* Get mime content.
*
* @return mixed
*/
public function getContent()
{
return $this->content;
}
/**
* Set mime content.
*
* @param mixed $content Content to set
* @return void
*/
public function setContent($content)
{
$this->content = $content;
}
/**
* Get complete mime message of this object.
*
* @return string
*/
public function getMessagePart()
{
return $this->generateHeaders() . "\r\n" . $this->generateBody();
}
/**
* Generate body.
*
* @return string
*/
protected function generateBody()
{
$encoding = strtolower($this->getHeader('Content-Transfer-Encoding'));
$charset = strtolower($this->getHeader('Content-Type', 'charset'));
if ($charset != 'utf-8') {
$content = iconv('utf-8', $charset . '//TRANSLIT', $this->content);
} else {
$content = $this->content;
}
switch ($encoding) {
case self::ENCODING_BASE64:
return substr(chunk_split(base64_encode($content), 76, "\r\n"), -2);
case self::ENCODING_QUOTED_PRINTABLE:
return quoted_printable_encode($content);
case self::ENCODING_BINARY:
case self::ENCODING_SEVEN_BIT:
case self::ENCODING_EIGHT_BIT:
default:
return preg_replace("/\r\n|\r|\n/", "\r\n", $content);
}
}
/**
* Returns a unique ID to be used for the Content-ID header.
*
* @return string
*/
protected function generateContentId()
{
// TODO
return 'urn:uuid:' . \ass\Soap\Helper::generateUUID();
}
}

View File

@ -0,0 +1,114 @@
<?php
/*
* This file is part of BeSimpleSoapCommon.
*
* (c) Christian Kerl <christian-kerl@web.de>
* (c) Francis Besset <francis.besset@gmail.com>
*
* This source file is subject to the MIT license that is bundled
* with this source code in the file LICENSE.
*/
namespace BeSimple\SoapCommon;
/**
* Mime part base class.
*
* @author Andreas Schamberger <mail@andreass.net>
*/
abstract class PartHeader
{
/**
* Mime headers.
*
* @var array(string=>mixed|array(mixed))
*/
protected $headers = array();
/**
* Add a new header to the mime part.
*
* @param string $name Header name
* @param string $value Header value
* @param string $subValue Is sub value?
* @return void
*/
public function setHeader($name, $value, $subValue = null)
{
if (isset($this->headers[$name]) && !is_null($subValue)) {
if (!is_array($this->headers[$name])) {
$this->headers[$name] = array(
'@' => $this->headers[$name],
$value => $subValue,
);
} else {
$this->headers[$name][$value] = $subValue;
}
} elseif (isset($this->headers[$name]) && is_array($this->headers[$name]) && isset($this->headers[$name]['@'])) {
$this->headers[$name]['@'] = $value;
} else {
$this->headers[$name] = $value;
}
}
/**
* Get given mime header.
*
* @param string $name Header name
* @param string $subValue Sub value name
* @return mixed|array(mixed)
*/
public function getHeader($name, $subValue = null)
{
if (isset($this->headers[$name])) {
if (!is_null($subValue)) {
if (is_array($this->headers[$name]) && isset($this->headers[$name][$subValue])) {
return $this->headers[$name][$subValue];
} else {
return null;
}
} elseif (is_array($this->headers[$name]) && isset($this->headers[$name]['@'])) {
return $this->headers[$name]['@'];
} else {
return $this->headers[$name];
}
}
return null;
}
/**
* Generate headers.
*
* @return string
*/
protected function generateHeaders()
{
$charset = strtolower($this->getHeader('Content-Type', 'charset'));
$preferences = array(
'scheme' => 'Q',
'input-charset' => 'utf-8',
'output-charset' => $charset,
);
$headers = '';
foreach ($this->headers as $fieldName => $value) {
$fieldValue = '';
if (is_array($value)) {
if (isset($value['@'])) {
$fieldValue .= $value['@'];
}
foreach ($value as $subName => $subValue) {
if ($subName != '@') {
$fieldValue .= '; ' . $subName . '=' . $subValue;
}
}
} else {
$fieldValue .= $value;
}
// do not use proper encoding as Apache Axis does not understand this
// $headers .= iconv_mime_encode($field_name, $field_value, $preferences) . "\r\n";
$headers .= $fieldName . ': ' . $fieldValue . "\r\n";
}
return $headers;
}
}