SoapServer/Client now handle binary files correctly & large tests/fixtures update

Soap Server and Client were breaking binary files during transfer due to invalid Mime Message Parser. Now is it working fine with no errors, but the message parser is about to be rewritten into a better form.
This commit is contained in:
Petr Bechyně
2017-04-04 18:36:18 +02:00
parent 311f9e6d08
commit 564005da93
42 changed files with 1135 additions and 250 deletions

View File

@ -0,0 +1,51 @@
<?php
namespace BeSimple\SoapCommon\Mime\Boundary;
class MimeBoundaryAnalyser
{
/**
* @param string[] $mimeMessageLines
* @return bool
*/
public static function hasMessageBoundary(array $mimeMessageLines)
{
foreach ($mimeMessageLines as $mimeMessageLine) {
if (self::isMessageLineBoundary($mimeMessageLine)) {
return true;
}
}
return false;
}
/**
* @param string $mimeMessageLine
* @return bool
*/
public static function isMessageLineBoundary($mimeMessageLine)
{
return strlen($mimeMessageLine) > 0 && $mimeMessageLine[0] === "-";
}
/**
* @param string $mimeMessageLine
* @param string $mimeTypeBoundary
* @return bool
*/
public static function isMessageLineMiddleBoundary($mimeMessageLine, $mimeTypeBoundary)
{
return strcmp(trim($mimeMessageLine), '--'.$mimeTypeBoundary) === 0;
}
/**
* @param string $mimeMessageLine
* @param string $mimeTypeBoundary
* @return bool
*/
public static function isMessageLineLastBoundary($mimeMessageLine, $mimeTypeBoundary)
{
return strcmp(trim($mimeMessageLine), '--'.$mimeTypeBoundary.'--') === 0;
}
}

View File

@ -12,9 +12,10 @@
namespace BeSimple\SoapCommon\Mime;
use BeSimple\SoapCommon\Mime\Boundary\MimeBoundaryAnalyser;
use BeSimple\SoapCommon\Mime\Parser\ContentTypeParser;
use BeSimple\SoapCommon\Mime\Parser\ParsedPart;
use BeSimple\SoapCommon\Mime\Parser\ParsedPartList;
use BeSimple\SoapCommon\Mime\Parser\ParsedPartsGetter;
use Exception;
/**
@ -25,9 +26,6 @@ use Exception;
*/
class Parser
{
const HAS_HTTP_REQUEST_HEADERS = true;
const HAS_NO_HTTP_REQUEST_HEADERS = false;
/**
* Parse the given Mime-Message and return a \BeSimple\SoapCommon\Mime\MultiPart object.
*
@ -39,16 +37,26 @@ class Parser
public static function parseMimeMessage($mimeMessage, array $headers = [])
{
$multiPart = new MultiPart();
$mimeMessageLines = preg_split("/(\n)/", $mimeMessage);
$mimeMessageLines = explode("\n", $mimeMessage);
$mimeMessageLineCount = count($mimeMessageLines);
if ($mimeMessageLineCount <= 1) {
throw new Exception(
sprintf(
'Cannot process message of %d characters: got unexpectable low number of lines: %s',
mb_strlen($mimeMessage),
(string)$mimeMessageLineCount
)
);
}
// add given headers, e.g. coming from HTTP headers
if (count($headers) > 0) {
self::setMultiPartHeaders($multiPart, $headers);
$hasHttpRequestHeaders = self::HAS_HTTP_REQUEST_HEADERS;
$hasHttpRequestHeaders = ParsedPartsGetter::HAS_HTTP_REQUEST_HEADERS;
} else {
$hasHttpRequestHeaders = self::HAS_NO_HTTP_REQUEST_HEADERS;
$hasHttpRequestHeaders = ParsedPartsGetter::HAS_NO_HTTP_REQUEST_HEADERS;
}
if (self::hasBoundary($mimeMessageLines)) {
$parsedPartList = self::getPartsFromMimeMessageLines(
if (MimeBoundaryAnalyser::hasMessageBoundary($mimeMessageLines) === true) {
$parsedPartList = ParsedPartsGetter::getPartsFromMimeMessageLines(
$multiPart,
$mimeMessageLines,
$hasHttpRequestHeaders
@ -79,115 +87,6 @@ class Parser
return $multiPart;
}
/**
* @param MultiPart $multiPart
* @param string[] $mimeMessageLines
* @param bool $hasHttpHeaders = self::HAS_HTTP_REQUEST_HEADERS|self::HAS_NO_HTTP_REQUEST_HEADERS
* @return ParsedPartList
*/
private static function getPartsFromMimeMessageLines(
MultiPart $multiPart,
array $mimeMessageLines,
$hasHttpHeaders
) {
$parsedParts = [];
$contentTypeBoundary = $multiPart->getHeader('Content-Type', 'boundary');
$contentTypeContentIdStart = $multiPart->getHeader('Content-Type', 'start');
$currentPart = $multiPart;
$messagePartStringContent = '';
$inHeader = $hasHttpHeaders;
$hitFirstBoundary = false;
foreach ($mimeMessageLines as $mimeMessageLine) {
// ignore http status code and POST *
if (substr($mimeMessageLine, 0, 5) == 'HTTP/' || substr($mimeMessageLine, 0, 4) == 'POST') {
continue;
}
if (isset($currentHeader)) {
if (isset($mimeMessageLine[0]) && ($mimeMessageLine[0] === ' ' || $mimeMessageLine[0] === "\t")) {
$currentHeader .= $mimeMessageLine;
continue;
}
if (strpos($currentHeader, ':') !== false) {
list($headerName, $headerValue) = explode(':', $currentHeader, 2);
$headerValueWithNoCrAtTheEnd = trim($headerValue);
$headerValue = iconv_mime_decode($headerValueWithNoCrAtTheEnd, 0, Part::CHARSET_UTF8);
$parsedMimeHeaders = ContentTypeParser::parseContentTypeHeader($headerName, $headerValue);
foreach ($parsedMimeHeaders as $parsedMimeHeader) {
$currentPart->setHeader(
$parsedMimeHeader->getName(),
$parsedMimeHeader->getValue(),
$parsedMimeHeader->getSubValue()
);
}
$contentTypeBoundary = $multiPart->getHeader('Content-Type', 'boundary');
$contentTypeContentIdStart = $multiPart->getHeader('Content-Type', 'start');
}
unset($currentHeader);
}
if ($inHeader === true) {
if (trim($mimeMessageLine) == '') {
$inHeader = false;
continue;
}
$currentHeader = $mimeMessageLine;
continue;
} else {
if (self::isBoundary($mimeMessageLine)) {
if (self::isMiddleBoundary($mimeMessageLine, $contentTypeBoundary)) {
if ($currentPart instanceof Part) {
$currentPartContent = self::decodeContent(
$currentPart,
substr($messagePartStringContent, 0, -1)
);
$currentPart->setContent($currentPartContent);
// check if there is a start parameter given, if not set first part
if ($contentTypeContentIdStart === null || $currentPart->hasContentId($contentTypeContentIdStart) === true) {
$contentTypeContentIdStart = $currentPart->getHeader('Content-ID');
$parsedParts[] = new ParsedPart($currentPart, ParsedPart::PART_IS_MAIN);
} else {
$parsedParts[] = new ParsedPart($currentPart, ParsedPart::PART_IS_NOT_MAIN);
}
}
$currentPart = new Part();
$hitFirstBoundary = true;
$inHeader = true;
$messagePartStringContent = '';
} else if (self::isLastBoundary($mimeMessageLine, $contentTypeBoundary)) {
$currentPartContent = self::decodeContent(
$currentPart,
substr($messagePartStringContent, 0, -1)
);
$currentPart->setContent($currentPartContent);
// check if there is a start parameter given, if not set first part
if ($contentTypeContentIdStart === null || $currentPart->hasContentId($contentTypeContentIdStart) === true) {
$contentTypeContentIdStart = $currentPart->getHeader('Content-ID');
$parsedParts[] = new ParsedPart($currentPart, ParsedPart::PART_IS_MAIN);
} else {
$parsedParts[] = new ParsedPart($currentPart, ParsedPart::PART_IS_NOT_MAIN);
}
$messagePartStringContent = '';
} else {
// else block migrated from https://github.com/progmancod/BeSimpleSoap/commit/bf9437e3bcf35c98c6c2f26aca655ec3d3514694
// be careful to replace \r\n with \n
$messagePartStringContent .= $mimeMessageLine . "\n";
}
} else {
if ($hitFirstBoundary === false) {
if (trim($mimeMessageLine) !== '') {
$inHeader = true;
$currentHeader = $mimeMessageLine;
continue;
}
}
$messagePartStringContent .= $mimeMessageLine . "\n";
}
}
}
return new ParsedPartList($parsedParts);
}
/**
* @param ParsedPartList $parsedPartList
* @param MultiPart $multiPart
@ -224,56 +123,4 @@ class Parser
}
}
}
/**
* Decodes the content of a Mime part
*
* @param Part $part Part to add content
* @param string $partStringContent Content to decode
* @return string $partStringContent decodedContent
*/
private static function decodeContent(Part $part, $partStringContent)
{
$encoding = strtolower($part->getHeader('Content-Transfer-Encoding'));
$charset = strtolower($part->getHeader('Content-Type', 'charset'));
if ($encoding === Part::ENCODING_BASE64) {
$partStringContent = base64_decode($partStringContent);
} else if ($encoding === Part::ENCODING_QUOTED_PRINTABLE) {
$partStringContent = quoted_printable_decode($partStringContent);
}
if ($charset !== Part::CHARSET_UTF8) {
return iconv($charset, Part::CHARSET_UTF8, $partStringContent);
}
return $partStringContent;
}
private static function hasBoundary(array $lines)
{
foreach ($lines as $line) {
if (self::isBoundary($line)) {
return true;
}
}
return false;
}
private static function isBoundary($mimeMessageLine)
{
return strlen($mimeMessageLine) > 0 && $mimeMessageLine[0] === "-";
}
private static function isMiddleBoundary($mimeMessageLine, $contentTypeBoundary)
{
return strcmp(trim($mimeMessageLine), '--'.$contentTypeBoundary) === 0;
}
private static function isLastBoundary($mimeMessageLine, $contentTypeBoundary)
{
return strcmp(trim($mimeMessageLine), '--'.$contentTypeBoundary.'--') === 0;
}
}
}

View File

@ -0,0 +1,179 @@
<?php
namespace BeSimple\SoapCommon\Mime\Parser;
use BeSimple\SoapCommon\Mime\Boundary\MimeBoundaryAnalyser;
use BeSimple\SoapCommon\Mime\MultiPart;
use BeSimple\SoapCommon\Mime\Part;
use Exception;
class ParsedPartsGetter
{
const HAS_HTTP_REQUEST_HEADERS = true;
const HAS_NO_HTTP_REQUEST_HEADERS = false;
/**
* @param MultiPart $multiPart
* @param string[] $mimeMessageLines
* @param bool $hasHttpHeaders = self::HAS_HTTP_REQUEST_HEADERS|self::HAS_NO_HTTP_REQUEST_HEADERS
* @return ParsedPartList
*/
public static function getPartsFromMimeMessageLines(
MultiPart $multiPart,
array $mimeMessageLines,
$hasHttpHeaders
) {
$parsedParts = [];
$contentTypeBoundary = $multiPart->getHeader('Content-Type', 'boundary');
if ($contentTypeBoundary === null) {
throw new Exception(
'Unable to get Content-Type boundary from given MultiPart: ' . var_export($multiPart->getHeaders(), true)
);
}
$contentTypeContentIdStart = $multiPart->getHeader('Content-Type', 'start');
if ($contentTypeContentIdStart === null) {
throw new Exception(
'Unable to get Content-Type start from given MultiPart: ' . var_export($multiPart->getHeaders(), true)
);
}
$currentPart = $multiPart;
$messagePartStringContent = '';
$inHeader = $hasHttpHeaders;
$hitFirstBoundary = false;
foreach ($mimeMessageLines as $mimeMessageLine) {
if (substr($mimeMessageLine, 0, 5) === 'HTTP/' || substr($mimeMessageLine, 0, 4) === 'POST') {
continue;
}
if (isset($currentHeader)) {
if (isset($mimeMessageLine[0]) && ($mimeMessageLine[0] === ' ' || $mimeMessageLine[0] === "\t")) {
$currentHeader .= $mimeMessageLine;
continue;
}
if (strpos($currentHeader, ':') !== false) {
list($headerName, $headerValue) = explode(':', $currentHeader, 2);
$headerValueWithNoCrAtTheEnd = trim($headerValue);
try {
$headerValue = iconv_mime_decode($headerValueWithNoCrAtTheEnd, 0, Part::CHARSET_UTF8);
} catch (Exception $e) {
if ($hitFirstBoundary === false) {
throw new Exception(
'Unable to parse message: cannot parse headers before hitting the first boundary'
);
}
throw new Exception(
sprintf(
'Unable to get header value: possible parsing message contents of %s characters in header parser: %s',
mb_strlen($headerValueWithNoCrAtTheEnd),
$e->getMessage()
)
);
}
$parsedMimeHeaders = ContentTypeParser::parseContentTypeHeader($headerName, $headerValue);
foreach ($parsedMimeHeaders as $parsedMimeHeader) {
$currentPart->setHeader(
$parsedMimeHeader->getName(),
$parsedMimeHeader->getValue(),
$parsedMimeHeader->getSubValue()
);
}
$contentTypeBoundary = $multiPart->getHeader('Content-Type', 'boundary');
$contentTypeContentIdStart = $multiPart->getHeader('Content-Type', 'start');
}
unset($currentHeader);
}
if ($inHeader === true) {
if (trim($mimeMessageLine) === '') {
$inHeader = false;
continue;
}
$currentHeader = $mimeMessageLine;
continue;
} else {
if (MimeBoundaryAnalyser::isMessageLineBoundary($mimeMessageLine)) {
if (MimeBoundaryAnalyser::isMessageLineMiddleBoundary($mimeMessageLine, $contentTypeBoundary)) {
if ($currentPart instanceof Part) {
$currentPartContent = self::decodeContent(
$currentPart,
substr($messagePartStringContent, 0, -1)
);
if ($currentPartContent[strlen($currentPartContent) - 1] === "\r") {
// temporary hack: if there is a CRLF before any middle boundary, then the remaining CR must be removed
$currentPartContent = substr($currentPartContent, 0, -1);
}
$currentPart->setContent($currentPartContent);
// check if there is a start parameter given, if not set first part
if ($contentTypeContentIdStart === null || $currentPart->hasContentId($contentTypeContentIdStart) === true) {
$contentTypeContentIdStart = $currentPart->getHeader('Content-ID');
$parsedParts[] = new ParsedPart($currentPart, ParsedPart::PART_IS_MAIN);
} else {
$parsedParts[] = new ParsedPart($currentPart, ParsedPart::PART_IS_NOT_MAIN);
}
}
$currentPart = new Part();
$hitFirstBoundary = true;
$inHeader = true;
$messagePartStringContent = '';
} else if (MimeBoundaryAnalyser::isMessageLineLastBoundary($mimeMessageLine, $contentTypeBoundary)) {
$currentPartContent = self::decodeContent(
$currentPart,
substr($messagePartStringContent, 0, -1)
);
if ($currentPartContent[strlen($currentPartContent) - 1] === "\r") {
// temporary hack: if there is a CRLF before last boundary, then the remaining CR must be removed
$currentPartContent = substr($currentPartContent, 0, -1);
}
$currentPart->setContent($currentPartContent);
// check if there is a start parameter given, if not set first part
if ($contentTypeContentIdStart === null || $currentPart->hasContentId($contentTypeContentIdStart) === true) {
$contentTypeContentIdStart = $currentPart->getHeader('Content-ID');
$parsedParts[] = new ParsedPart($currentPart, ParsedPart::PART_IS_MAIN);
} else {
$parsedParts[] = new ParsedPart($currentPart, ParsedPart::PART_IS_NOT_MAIN);
}
$messagePartStringContent = '';
} else {
// else block migrated from https://github.com/progmancod/BeSimpleSoap/commit/bf9437e3bcf35c98c6c2f26aca655ec3d3514694
// be careful to replace \r\n with \n
$messagePartStringContent .= $mimeMessageLine . "\n";
}
} else {
if ($hitFirstBoundary === false) {
if (trim($mimeMessageLine) !== '') {
$inHeader = true;
$currentHeader = $mimeMessageLine;
continue;
}
}
$messagePartStringContent .= $mimeMessageLine . "\n";
}
}
}
return new ParsedPartList($parsedParts);
}
/**
* Decodes the content of a Mime part
*
* @param Part $part Part to add content
* @param string $partStringContent Content to decode
* @return string $partStringContent decodedContent
*/
private static function decodeContent(Part $part, $partStringContent)
{
$encoding = strtolower($part->getHeader('Content-Transfer-Encoding'));
$charset = strtolower($part->getHeader('Content-Type', 'charset'));
if ($encoding === Part::ENCODING_BASE64) {
$partStringContent = base64_decode($partStringContent);
} else if ($encoding === Part::ENCODING_QUOTED_PRINTABLE) {
$partStringContent = quoted_printable_decode($partStringContent);
}
if ($charset !== Part::CHARSET_UTF8) {
return iconv($charset, Part::CHARSET_UTF8, $partStringContent);
}
return $partStringContent;
}
}

View File

@ -75,6 +75,11 @@ abstract class PartHeader
return null;
}
public function getHeaders()
{
return $this->headers;
}
/**
* Generate headers.
*