Skip to content

Commit fcc91ca

Browse files
committed
Fix #92: simplexml should not throw error on string parsing
1 parent b653b5c commit fcc91ca

File tree

3 files changed

+191
-1
lines changed

3 files changed

+191
-1
lines changed

src/Parser/AbstractXmlOutputParser.php

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,17 @@ protected function transformXmlToArray(string $xmlString): array
1515
$xmlString = utf8_encode($xmlString);
1616
}
1717

18-
$xml = simplexml_load_string($xmlString);
18+
libxml_use_internal_errors(true);
19+
$dom = new \DOMDocument('1.0', 'UTF-8');
20+
$dom->strictErrorChecking = false;
21+
$dom->validateOnParse = false;
22+
$dom->recover = true;
23+
$dom->loadXML($xmlString);
24+
$xml = simplexml_import_dom($dom);
25+
26+
libxml_clear_errors();
27+
libxml_use_internal_errors(false);
28+
1929
$json = json_encode($xml);
2030

2131
return json_decode($json, true);

tests/Parser/MediaInfoOutputParserTest.php

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,17 @@ class MediaInfoOutputParserTest extends TestCase
2525
*/
2626
private $outputMediainfo1710Path;
2727

28+
/**
29+
* @var string
30+
*/
31+
private $invalidEncodingOutputPath;
32+
2833
protected function setUp(): void
2934
{
3035
$this->outputPath = __DIR__.'/../fixtures/mediainfo-output.xml';
3136
$this->outputMediainfo1710Path = __DIR__.'/../fixtures/mediainfo-17.10-output.xml';
3237
$this->invalidOutputPath = __DIR__.'/../fixtures/mediainfo-output-invalid-types.xml';
38+
$this->invalidEncodingOutputPath = __DIR__.'/../fixtures/mediainfo-output-invalid-encoding.xml';
3339
}
3440

3541
public function testGetMediaInfoContainerBeforeCallParse(): void
@@ -98,4 +104,23 @@ public function testThrowInvalidTrackType(): void
98104
// will throw exception here as default behavior
99105
$mediaInfoContainer = $mediaInfoOutputParser->getMediaInfoContainer();
100106
}
107+
108+
public function testIgnoreInvalidEncodingErrors()
109+
{
110+
$mediaInfoOutputParser = new MediaInfoOutputParser();
111+
$mediaInfoOutputParser->parse(file_get_contents($this->invalidEncodingOutputPath));
112+
// xml string in file contains bad encoded characters, on parsing simplexml should not
113+
// throw an error
114+
$mediaInfoContainer = $mediaInfoOutputParser->getMediaInfoContainer(true);
115+
116+
$this->assertEquals('Mhor\MediaInfo\Type\General', get_class($mediaInfoContainer->getGeneral()));
117+
118+
$this->assertEquals(1, count($mediaInfoContainer->getAudios()));
119+
120+
$general = $mediaInfoContainer->getGeneral();
121+
122+
$this->assertTrue($general->has('copyright'));
123+
$this->assertTrue(is_array($general->get('copyright')));
124+
$this->assertEquals('Invalid Char', $general->get('copyright2'));
125+
}
101126
}
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<Mediainfo version="19.09">
3+
<File>
4+
<track type="General">
5+
<Count>331</Count>
6+
<Count_of_stream_of_this_kind>1</Count_of_stream_of_this_kind>
7+
<Kind_of_stream>General</Kind_of_stream>
8+
<Kind_of_stream>General</Kind_of_stream>
9+
<Stream_identifier>0</Stream_identifier>
10+
<Count_of_video_streams>1</Count_of_video_streams>
11+
<Count_of_audio_streams>1</Count_of_audio_streams>
12+
<Video_Format_List>VC-1</Video_Format_List>
13+
<Video_Format_WithHint_List>VC-1 (WMV3)</Video_Format_WithHint_List>
14+
<Codecs_Video>VC-1</Codecs_Video>
15+
<Audio_Format_List>WMA</Audio_Format_List>
16+
<Audio_Format_WithHint_List>WMA</Audio_Format_WithHint_List>
17+
<Audio_codecs>WMA</Audio_codecs>
18+
<Complete_name>/mnt/ramdisk/5/54c9f93b-8550-4100-8eeb-328841dc00d6/782247_ohrly-rh131aaso.wmv</Complete_name>
19+
<Folder_name>/mnt/ramdisk/5/54c9f93b-8550-4100-8eeb-328841dc00d6</Folder_name>
20+
<File_name_extension>782247_ohrly-rh131aaso.wmv</File_name_extension>
21+
<File_name>782247_ohrly-rh131aaso</File_name>
22+
<File_extension>wmv</File_extension>
23+
<Format>Windows Media</Format>
24+
<Format>Windows Media</Format>
25+
<Format_Extensions_usually_used>asf dvr-ms wma wmv</Format_Extensions_usually_used>
26+
<Commercial_name>Windows Media</Commercial_name>
27+
<Internet_media_type>video/x-ms-wmv</Internet_media_type>
28+
<File_size>760169</File_size>
29+
<File_size>742 KiB</File_size>
30+
<File_size>742 KiB</File_size>
31+
<File_size>742 KiB</File_size>
32+
<File_size>742 KiB</File_size>
33+
<File_size>742.4 KiB</File_size>
34+
<Duration>489056</Duration>
35+
<Duration>8 min 9 s</Duration>
36+
<Duration>8 min 9 s 56 ms</Duration>
37+
<Duration>8 min 9 s</Duration>
38+
<Duration>00:08:09.056</Duration>
39+
<Duration>00:08:09;03</Duration>
40+
<Duration>00:08:09.056 (00:08:09;03)</Duration>
41+
<Overall_bit_rate>12435</Overall_bit_rate>
42+
<Overall_bit_rate>12.4 kb/s</Overall_bit_rate>
43+
<Maximum_Overall_bit_rate>5136894</Maximum_Overall_bit_rate>
44+
<Maximum_Overall_bit_rate>5 137 kb/s</Maximum_Overall_bit_rate>
45+
<Frame_rate>29.970</Frame_rate>
46+
<Frame_rate>29.970 FPS</Frame_rate>
47+
<Frame_count>14657</Frame_count>
48+
<HeaderSize>1046</HeaderSize>
49+
<DataSize>759123</DataSize>
50+
<Performer>Ron Harris</Performer>
51+
<Encoded_date>UTC 2012-05-14 00:53:44.000</Encoded_date>
52+
<File_last_modification_date>UTC 2019-12-17 17:20:55</File_last_modification_date>
53+
<File_last_modification_date__local_>2019-12-17 18:20:55</File_last_modification_date__local_>
54+
<Copyright> ￾ </Copyright>
55+
<Copyright2>Invalid ￾ Char</Copyright>
56+
<Comment>HD Videos</Comment>
57+
</track>
58+
<track type="Video">
59+
<Count>377</Count>
60+
<Count_of_stream_of_this_kind>1</Count_of_stream_of_this_kind>
61+
<Kind_of_stream>Video</Kind_of_stream>
62+
<Kind_of_stream>Video</Kind_of_stream>
63+
<Stream_identifier>0</Stream_identifier>
64+
<StreamOrder>0</StreamOrder>
65+
<ID>1</ID>
66+
<ID>1</ID>
67+
<Format>VC-1</Format>
68+
<Format>VC-1</Format>
69+
<Commercial_name>VC-1</Commercial_name>
70+
<Format_profile>Main</Format_profile>
71+
<Internet_media_type>video/vc1</Internet_media_type>
72+
<Codec_ID>WMV3</Codec_ID>
73+
<Codec_ID_Info>Windows Media Video 9</Codec_ID_Info>
74+
<Codec_ID_Hint>WMV3</Codec_ID_Hint>
75+
<Codec_ID_Url>http://www.microsoft.com/windows/windowsmedia/format/codecdownload.aspx</Codec_ID_Url>
76+
<Description_of_the_codec>Windows Media Video 9 - 2-pass VBR</Description_of_the_codec>
77+
<Duration>489056</Duration>
78+
<Duration>8 min 9 s</Duration>
79+
<Duration>8 min 9 s 56 ms</Duration>
80+
<Duration>8 min 9 s</Duration>
81+
<Duration>00:08:09.056</Duration>
82+
<Duration>00:08:09;03</Duration>
83+
<Duration>00:08:09.056 (00:08:09;03)</Duration>
84+
<Bit_rate>5000000</Bit_rate>
85+
<Bit_rate>5 000 kb/s</Bit_rate>
86+
<Width>1920</Width>
87+
<Width>1 920 pixels</Width>
88+
<Height>1080</Height>
89+
<Height>1 080 pixels</Height>
90+
<Pixel_aspect_ratio>1.000</Pixel_aspect_ratio>
91+
<Display_aspect_ratio>1.778</Display_aspect_ratio>
92+
<Display_aspect_ratio>16:9</Display_aspect_ratio>
93+
<Frame_rate>29.970</Frame_rate>
94+
<Frame_rate>29.970 (29970/1000) FPS</Frame_rate>
95+
<FrameRate_Num>29970</FrameRate_Num>
96+
<FrameRate_Den>1000</FrameRate_Den>
97+
<Frame_count>14657</Frame_count>
98+
<Color_space>YUV</Color_space>
99+
<Chroma_subsampling>4:2:0</Chroma_subsampling>
100+
<Chroma_subsampling>4:2:0</Chroma_subsampling>
101+
<Bit_depth>8</Bit_depth>
102+
<Bit_depth>8 bits</Bit_depth>
103+
<Scan_type>Progressive</Scan_type>
104+
<Scan_type>Progressive</Scan_type>
105+
<Compression_mode>Lossy</Compression_mode>
106+
<Compression_mode>Lossy</Compression_mode>
107+
<Bits__Pixel_Frame_>0.080</Bits__Pixel_Frame_>
108+
<Stream_size>305660000</Stream_size>
109+
<Stream_size>292 MiB</Stream_size>
110+
<Stream_size>292 MiB</Stream_size>
111+
<Stream_size>292 MiB</Stream_size>
112+
<Stream_size>292 MiB</Stream_size>
113+
<Stream_size>291.5 MiB</Stream_size>
114+
</track>
115+
<track type="Audio">
116+
<Count>280</Count>
117+
<Count_of_stream_of_this_kind>1</Count_of_stream_of_this_kind>
118+
<Kind_of_stream>Audio</Kind_of_stream>
119+
<Kind_of_stream>Audio</Kind_of_stream>
120+
<Stream_identifier>0</Stream_identifier>
121+
<StreamOrder>1</StreamOrder>
122+
<ID>2</ID>
123+
<ID>2</ID>
124+
<Format>WMA</Format>
125+
<Format>WMA</Format>
126+
<Commercial_name>WMA</Commercial_name>
127+
<Format_version>Version 2</Format_version>
128+
<Codec_ID>161</Codec_ID>
129+
<Codec_ID_Info>Windows Media Audio</Codec_ID_Info>
130+
<Codec_ID_Url>http://www.microsoft.com/windows/windowsmedia/format/codecdownload.aspx</Codec_ID_Url>
131+
<Description_of_the_codec>Windows Media Audio 9 - 128 kbps, 44 kHz, stereo CBR</Description_of_the_codec>
132+
<Duration>489056</Duration>
133+
<Duration>8 min 9 s</Duration>
134+
<Duration>8 min 9 s 56 ms</Duration>
135+
<Duration>8 min 9 s</Duration>
136+
<Duration>00:08:09.056</Duration>
137+
<Duration>00:08:09.056</Duration>
138+
<Bit_rate>128000</Bit_rate>
139+
<Bit_rate>128 kb/s</Bit_rate>
140+
<Channel_s_>2</Channel_s_>
141+
<Channel_s_>2 channels</Channel_s_>
142+
<Sampling_rate>44100</Sampling_rate>
143+
<Sampling_rate>44.1 kHz</Sampling_rate>
144+
<Samples_count>21567370</Samples_count>
145+
<Bit_depth>16</Bit_depth>
146+
<Bit_depth>16 bits</Bit_depth>
147+
<Stream_size>7824896</Stream_size>
148+
<Stream_size>7.46 MiB</Stream_size>
149+
<Stream_size>7 MiB</Stream_size>
150+
<Stream_size>7.5 MiB</Stream_size>
151+
<Stream_size>7.46 MiB</Stream_size>
152+
<Stream_size>7.462 MiB</Stream_size>
153+
</track>
154+
</File>
155+
</Mediainfo>

0 commit comments

Comments
 (0)